LCOV - code coverage report
Current view: top level - lib/blob - blobstore.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 4116 5144 80.0 %
Date: 2024-07-15 21:05:59 Functions: 339 360 94.2 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2017 Intel Corporation.
       3             :  *   All rights reserved.
       4             :  *   Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
       5             :  */
       6             : 
       7             : #include "spdk/stdinc.h"
       8             : 
       9             : #include "spdk/blob.h"
      10             : #include "spdk/crc32.h"
      11             : #include "spdk/env.h"
      12             : #include "spdk/queue.h"
      13             : #include "spdk/thread.h"
      14             : #include "spdk/bit_array.h"
      15             : #include "spdk/bit_pool.h"
      16             : #include "spdk/likely.h"
      17             : #include "spdk/util.h"
      18             : #include "spdk/string.h"
      19             : 
      20             : #include "spdk_internal/assert.h"
      21             : #include "spdk/log.h"
      22             : 
      23             : #include "blobstore.h"
      24             : 
      25             : #define BLOB_CRC32C_INITIAL    0xffffffffUL
      26             : 
      27             : static int bs_register_md_thread(struct spdk_blob_store *bs);
      28             : static int bs_unregister_md_thread(struct spdk_blob_store *bs);
      29             : static void blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
      30             : static void blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
      31             :                 uint64_t cluster, uint32_t extent, struct spdk_blob_md_page *page,
      32             :                 spdk_blob_op_complete cb_fn, void *cb_arg);
      33             : static void blob_free_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
      34             :                 uint32_t extent_page, struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);
      35             : 
      36             : static int blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
      37             :                           uint16_t value_len, bool internal);
      38             : static int blob_get_xattr_value(struct spdk_blob *blob, const char *name,
      39             :                                 const void **value, size_t *value_len, bool internal);
      40             : static int blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal);
      41             : 
      42             : static void blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
      43             :                                    struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);
      44             : static void blob_freeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg);
      45             : 
      46             : static void bs_shallow_copy_cluster_find_next(void *cb_arg);
      47             : 
      48             : /*
      49             :  * External snapshots require a channel per thread per esnap bdev.  The tree
      50             :  * is populated lazily as blob IOs are handled by the back_bs_dev. When this
      51             :  * channel is destroyed, all the channels in the tree are destroyed.
      52             :  */
      53             : 
      54             : struct blob_esnap_channel {
      55             :         RB_ENTRY(blob_esnap_channel)    node;
      56             :         spdk_blob_id                    blob_id;
      57             :         struct spdk_io_channel          *channel;
      58             : };
      59             : 
      60             : static int blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2);
      61             : static void blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, bool abort_io,
      62             :                 spdk_blob_op_with_handle_complete cb_fn, void *cb_arg);
      63             : static void blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch);
      64             : static void blob_set_back_bs_dev_frozen(void *_ctx, int bserrno);
      65       10245 : RB_GENERATE_STATIC(blob_esnap_channel_tree, blob_esnap_channel, node, blob_esnap_channel_compare)
      66             : 
      67             : static inline bool
      68       49582 : blob_is_esnap_clone(const struct spdk_blob *blob)
      69             : {
      70       49582 :         assert(blob != NULL);
      71       49582 :         return !!(blob->invalid_flags & SPDK_BLOB_EXTERNAL_SNAPSHOT);
      72             : }
      73             : 
      74             : static int
      75        2289 : blob_id_cmp(struct spdk_blob *blob1, struct spdk_blob *blob2)
      76             : {
      77        2289 :         assert(blob1 != NULL && blob2 != NULL);
      78        2289 :         return (blob1->id < blob2->id ? -1 : blob1->id > blob2->id);
      79             : }
      80             : 
      81       14741 : RB_GENERATE_STATIC(spdk_blob_tree, spdk_blob, link, blob_id_cmp);
      82             : 
      83             : static void
      84       37031 : blob_verify_md_op(struct spdk_blob *blob)
      85             : {
      86       37031 :         assert(blob != NULL);
      87       37031 :         assert(spdk_get_thread() == blob->bs->md_thread);
      88       37031 :         assert(blob->state != SPDK_BLOB_STATE_LOADING);
      89       37031 : }
      90             : 
      91             : static struct spdk_blob_list *
      92        3828 : bs_get_snapshot_entry(struct spdk_blob_store *bs, spdk_blob_id blobid)
      93             : {
      94        3828 :         struct spdk_blob_list *snapshot_entry = NULL;
      95             : 
      96        4808 :         TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
      97        1756 :                 if (snapshot_entry->id == blobid) {
      98         776 :                         break;
      99             :                 }
     100             :         }
     101             : 
     102        3828 :         return snapshot_entry;
     103             : }
     104             : 
     105             : static void
     106        2904 : bs_claim_md_page(struct spdk_blob_store *bs, uint32_t page)
     107             : {
     108        2904 :         assert(spdk_spin_held(&bs->used_lock));
     109        2904 :         assert(page < spdk_bit_array_capacity(bs->used_md_pages));
     110        2904 :         assert(spdk_bit_array_get(bs->used_md_pages, page) == false);
     111             : 
     112        2904 :         spdk_bit_array_set(bs->used_md_pages, page);
     113        2904 : }
     114             : 
     115             : static void
     116        2200 : bs_release_md_page(struct spdk_blob_store *bs, uint32_t page)
     117             : {
     118        2200 :         assert(spdk_spin_held(&bs->used_lock));
     119        2200 :         assert(page < spdk_bit_array_capacity(bs->used_md_pages));
     120        2200 :         assert(spdk_bit_array_get(bs->used_md_pages, page) == true);
     121             : 
     122        2200 :         spdk_bit_array_clear(bs->used_md_pages, page);
     123        2200 : }
     124             : 
     125             : static uint32_t
     126        8220 : bs_claim_cluster(struct spdk_blob_store *bs)
     127             : {
     128             :         uint32_t cluster_num;
     129             : 
     130        8220 :         assert(spdk_spin_held(&bs->used_lock));
     131             : 
     132        8220 :         cluster_num = spdk_bit_pool_allocate_bit(bs->used_clusters);
     133        8220 :         if (cluster_num == UINT32_MAX) {
     134           0 :                 return UINT32_MAX;
     135             :         }
     136             : 
     137        8220 :         SPDK_DEBUGLOG(blob, "Claiming cluster %u\n", cluster_num);
     138        8220 :         bs->num_free_clusters--;
     139             : 
     140        8220 :         return cluster_num;
     141             : }
     142             : 
     143             : static void
     144        2399 : bs_release_cluster(struct spdk_blob_store *bs, uint32_t cluster_num)
     145             : {
     146        2399 :         assert(spdk_spin_held(&bs->used_lock));
     147        2399 :         assert(cluster_num < spdk_bit_pool_capacity(bs->used_clusters));
     148        2399 :         assert(spdk_bit_pool_is_allocated(bs->used_clusters, cluster_num) == true);
     149        2399 :         assert(bs->num_free_clusters < bs->total_clusters);
     150             : 
     151        2399 :         SPDK_DEBUGLOG(blob, "Releasing cluster %u\n", cluster_num);
     152             : 
     153        2399 :         spdk_bit_pool_free_bit(bs->used_clusters, cluster_num);
     154        2399 :         bs->num_free_clusters++;
     155        2399 : }
     156             : 
     157             : static int
     158        8220 : blob_insert_cluster(struct spdk_blob *blob, uint32_t cluster_num, uint64_t cluster)
     159             : {
     160        8220 :         uint64_t *cluster_lba = &blob->active.clusters[cluster_num];
     161             : 
     162        8220 :         blob_verify_md_op(blob);
     163             : 
     164        8220 :         if (*cluster_lba != 0) {
     165           4 :                 return -EEXIST;
     166             :         }
     167             : 
     168        8216 :         *cluster_lba = bs_cluster_to_lba(blob->bs, cluster);
     169        8216 :         blob->active.num_allocated_clusters++;
     170             : 
     171        8216 :         return 0;
     172             : }
     173             : 
     174             : static int
     175        8220 : bs_allocate_cluster(struct spdk_blob *blob, uint32_t cluster_num,
     176             :                     uint64_t *cluster, uint32_t *lowest_free_md_page, bool update_map)
     177             : {
     178        8220 :         uint32_t *extent_page = 0;
     179             : 
     180        8220 :         assert(spdk_spin_held(&blob->bs->used_lock));
     181             : 
     182        8220 :         *cluster = bs_claim_cluster(blob->bs);
     183        8220 :         if (*cluster == UINT32_MAX) {
     184             :                 /* No more free clusters. Cannot satisfy the request */
     185           0 :                 return -ENOSPC;
     186             :         }
     187             : 
     188        8220 :         if (blob->use_extent_table) {
     189        4168 :                 extent_page = bs_cluster_to_extent_page(blob, cluster_num);
     190        4168 :                 if (*extent_page == 0) {
     191             :                         /* Extent page shall never occupy md_page so start the search from 1 */
     192         728 :                         if (*lowest_free_md_page == 0) {
     193         726 :                                 *lowest_free_md_page = 1;
     194             :                         }
     195             :                         /* No extent_page is allocated for the cluster */
     196         728 :                         *lowest_free_md_page = spdk_bit_array_find_first_clear(blob->bs->used_md_pages,
     197             :                                                *lowest_free_md_page);
     198         728 :                         if (*lowest_free_md_page == UINT32_MAX) {
     199             :                                 /* No more free md pages. Cannot satisfy the request */
     200           0 :                                 bs_release_cluster(blob->bs, *cluster);
     201           0 :                                 return -ENOSPC;
     202             :                         }
     203         728 :                         bs_claim_md_page(blob->bs, *lowest_free_md_page);
     204             :                 }
     205             :         }
     206             : 
     207        8220 :         SPDK_DEBUGLOG(blob, "Claiming cluster %" PRIu64 " for blob 0x%" PRIx64 "\n", *cluster,
     208             :                       blob->id);
     209             : 
     210        8220 :         if (update_map) {
     211        7404 :                 blob_insert_cluster(blob, cluster_num, *cluster);
     212        7404 :                 if (blob->use_extent_table && *extent_page == 0) {
     213         644 :                         *extent_page = *lowest_free_md_page;
     214             :                 }
     215             :         }
     216             : 
     217        8220 :         return 0;
     218             : }
     219             : 
     220             : static void
     221        5582 : blob_xattrs_init(struct spdk_blob_xattr_opts *xattrs)
     222             : {
     223        5582 :         xattrs->count = 0;
     224        5582 :         xattrs->names = NULL;
     225        5582 :         xattrs->ctx = NULL;
     226        5582 :         xattrs->get_value = NULL;
     227        5582 : }
     228             : 
     229             : void
     230        3688 : spdk_blob_opts_init(struct spdk_blob_opts *opts, size_t opts_size)
     231             : {
     232        3688 :         if (!opts) {
     233           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
     234           0 :                 return;
     235             :         }
     236             : 
     237        3688 :         if (!opts_size) {
     238           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
     239           0 :                 return;
     240             :         }
     241             : 
     242        3688 :         memset(opts, 0, opts_size);
     243        3688 :         opts->opts_size = opts_size;
     244             : 
     245             : #define FIELD_OK(field) \
     246             :         offsetof(struct spdk_blob_opts, field) + sizeof(opts->field) <= opts_size
     247             : 
     248             : #define SET_FIELD(field, value) \
     249             :         if (FIELD_OK(field)) { \
     250             :                 opts->field = value; \
     251             :         } \
     252             : 
     253        3688 :         SET_FIELD(num_clusters, 0);
     254        3688 :         SET_FIELD(thin_provision, false);
     255        3688 :         SET_FIELD(clear_method, BLOB_CLEAR_WITH_DEFAULT);
     256             : 
     257        3688 :         if (FIELD_OK(xattrs)) {
     258        3688 :                 blob_xattrs_init(&opts->xattrs);
     259             :         }
     260             : 
     261        3688 :         SET_FIELD(use_extent_table, true);
     262             : 
     263             : #undef FIELD_OK
     264             : #undef SET_FIELD
     265             : }
     266             : 
     267             : void
     268        3478 : spdk_blob_open_opts_init(struct spdk_blob_open_opts *opts, size_t opts_size)
     269             : {
     270        3478 :         if (!opts) {
     271           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
     272           0 :                 return;
     273             :         }
     274             : 
     275        3478 :         if (!opts_size) {
     276           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
     277           0 :                 return;
     278             :         }
     279             : 
     280        3478 :         memset(opts, 0, opts_size);
     281        3478 :         opts->opts_size = opts_size;
     282             : 
     283             : #define FIELD_OK(field) \
     284             :         offsetof(struct spdk_blob_open_opts, field) + sizeof(opts->field) <= opts_size
     285             : 
     286             : #define SET_FIELD(field, value) \
     287             :         if (FIELD_OK(field)) { \
     288             :                 opts->field = value; \
     289             :         } \
     290             : 
     291        3478 :         SET_FIELD(clear_method, BLOB_CLEAR_WITH_DEFAULT);
     292             : 
     293             : #undef FIELD_OK
     294             : #undef SET_FILED
     295             : }
     296             : 
     297             : static struct spdk_blob *
     298        5368 : blob_alloc(struct spdk_blob_store *bs, spdk_blob_id id)
     299             : {
     300             :         struct spdk_blob *blob;
     301             : 
     302        5368 :         blob = calloc(1, sizeof(*blob));
     303        5368 :         if (!blob) {
     304           0 :                 return NULL;
     305             :         }
     306             : 
     307        5368 :         blob->id = id;
     308        5368 :         blob->bs = bs;
     309             : 
     310        5368 :         blob->parent_id = SPDK_BLOBID_INVALID;
     311             : 
     312        5368 :         blob->state = SPDK_BLOB_STATE_DIRTY;
     313        5368 :         blob->extent_rle_found = false;
     314        5368 :         blob->extent_table_found = false;
     315        5368 :         blob->active.num_pages = 1;
     316        5368 :         blob->active.pages = calloc(1, sizeof(*blob->active.pages));
     317        5368 :         if (!blob->active.pages) {
     318           0 :                 free(blob);
     319           0 :                 return NULL;
     320             :         }
     321             : 
     322        5368 :         blob->active.pages[0] = bs_blobid_to_page(id);
     323             : 
     324        5368 :         TAILQ_INIT(&blob->xattrs);
     325        5368 :         TAILQ_INIT(&blob->xattrs_internal);
     326        5368 :         TAILQ_INIT(&blob->pending_persists);
     327        5368 :         TAILQ_INIT(&blob->persists_to_complete);
     328             : 
     329        5368 :         return blob;
     330             : }
     331             : 
     332             : static void
     333       10736 : xattrs_free(struct spdk_xattr_tailq *xattrs)
     334             : {
     335             :         struct spdk_xattr       *xattr, *xattr_tmp;
     336             : 
     337       12502 :         TAILQ_FOREACH_SAFE(xattr, xattrs, link, xattr_tmp) {
     338        1766 :                 TAILQ_REMOVE(xattrs, xattr, link);
     339        1766 :                 free(xattr->name);
     340        1766 :                 free(xattr->value);
     341        1766 :                 free(xattr);
     342             :         }
     343       10736 : }
     344             : 
     345             : static void
     346        1116 : blob_back_bs_dev_unref(struct spdk_blob *blob)
     347             : {
     348        1116 :         struct spdk_blob **le_prev = blob->back_bs_dev_link.le_prev;
     349        1116 :         struct spdk_blob *le_next = blob->back_bs_dev_link.le_next;
     350             : 
     351        1116 :         if (!le_next && !le_prev) {
     352             :                 /* If this is the last reference to the back_bs_dev, destroy it. */
     353        1112 :                 blob->back_bs_dev->destroy(blob->back_bs_dev);
     354             :         } else {
     355             :                 /* Remove the reference to back_bs_dev. This is a headless list, in
     356             :                  * which le_prev of first item is NULL. So, we remove the link manually
     357             :                  * instead of using LIST_REMOVE, which assumes le_prev isn't NULL.
     358             :                  */
     359           4 :                 if (le_prev) {
     360           0 :                         *le_prev = le_next;
     361             :                 }
     362             : 
     363           4 :                 if (le_next) {
     364           4 :                         le_next->back_bs_dev_link.le_prev = le_prev;
     365             :                 }
     366             :         }
     367             : 
     368        1116 :         blob->back_bs_dev = NULL;
     369        1116 : }
     370             : 
     371             : static void
     372        5368 : blob_free(struct spdk_blob *blob)
     373             : {
     374        5368 :         assert(blob != NULL);
     375        5368 :         assert(TAILQ_EMPTY(&blob->pending_persists));
     376        5368 :         assert(TAILQ_EMPTY(&blob->persists_to_complete));
     377             : 
     378        5368 :         free(blob->active.extent_pages);
     379        5368 :         free(blob->clean.extent_pages);
     380        5368 :         free(blob->active.clusters);
     381        5368 :         free(blob->clean.clusters);
     382        5368 :         free(blob->active.pages);
     383        5368 :         free(blob->clean.pages);
     384             : 
     385        5368 :         xattrs_free(&blob->xattrs);
     386        5368 :         xattrs_free(&blob->xattrs_internal);
     387             : 
     388        5368 :         if (blob->back_bs_dev) {
     389        1088 :                 blob_back_bs_dev_unref(blob);
     390             :         }
     391             : 
     392        5368 :         free(blob);
     393        5368 : }
     394             : 
     395             : static void
     396         328 : blob_back_bs_destroy_esnap_done(void *ctx, struct spdk_blob *blob, int bserrno)
     397             : {
     398         328 :         struct spdk_bs_dev      *bs_dev = ctx;
     399             : 
     400         328 :         if (bserrno != 0) {
     401             :                 /*
     402             :                  * This is probably due to a memory allocation failure when creating the
     403             :                  * blob_esnap_destroy_ctx before iterating threads.
     404             :                  */
     405           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": Unable to destroy bs dev channels: error %d\n",
     406             :                             blob->id, bserrno);
     407           0 :                 assert(false);
     408             :         }
     409             : 
     410         328 :         if (bs_dev == NULL) {
     411             :                 /*
     412             :                  * This check exists to make scanbuild happy.
     413             :                  *
     414             :                  * blob->back_bs_dev for an esnap is NULL during the first iteration of blobs while
     415             :                  * the blobstore is being loaded. It could also be NULL if there was an error
     416             :                  * opening the esnap device. In each of these cases, no channels could have been
     417             :                  * created because back_bs_dev->create_channel() would have led to a NULL pointer
     418             :                  * deref.
     419             :                  */
     420           0 :                 assert(false);
     421             :                 return;
     422             :         }
     423             : 
     424         328 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": calling destroy on back_bs_dev\n", blob->id);
     425         328 :         bs_dev->destroy(bs_dev);
     426             : }
     427             : 
     428             : static void
     429         328 : blob_back_bs_destroy(struct spdk_blob *blob)
     430             : {
     431         328 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": preparing to destroy back_bs_dev\n",
     432             :                       blob->id);
     433             : 
     434         328 :         blob_esnap_destroy_bs_dev_channels(blob, false, blob_back_bs_destroy_esnap_done,
     435         328 :                                            blob->back_bs_dev);
     436         328 :         blob->back_bs_dev = NULL;
     437         328 : }
     438             : 
     439             : struct blob_parent {
     440             :         union {
     441             :                 struct {
     442             :                         spdk_blob_id id;
     443             :                         struct spdk_blob *blob;
     444             :                 } snapshot;
     445             : 
     446             :                 struct {
     447             :                         void *id;
     448             :                         uint32_t id_len;
     449             :                         struct spdk_bs_dev *back_bs_dev;
     450             :                 } esnap;
     451             :         } u;
     452             : };
     453             : 
     454             : typedef int (*set_parent_refs_cb)(struct spdk_blob *blob, struct blob_parent *parent);
     455             : 
     456             : struct set_bs_dev_ctx {
     457             :         struct spdk_blob        *blob;
     458             :         struct spdk_bs_dev      *back_bs_dev;
     459             : 
     460             :         /*
     461             :          * This callback is used during a set parent operation to change the references
     462             :          * to the parent of the blob.
     463             :          */
     464             :         set_parent_refs_cb      parent_refs_cb_fn;
     465             :         struct blob_parent      *parent_refs_cb_arg;
     466             : 
     467             :         spdk_blob_op_complete   cb_fn;
     468             :         void                    *cb_arg;
     469             :         int                     bserrno;
     470             : };
     471             : 
     472             : static void
     473          28 : blob_set_back_bs_dev(struct spdk_blob *blob, struct spdk_bs_dev *back_bs_dev,
     474             :                      set_parent_refs_cb parent_refs_cb_fn, struct blob_parent *parent_refs_cb_arg,
     475             :                      spdk_blob_op_complete cb_fn, void *cb_arg)
     476             : {
     477             :         struct set_bs_dev_ctx   *ctx;
     478             : 
     479          28 :         ctx = calloc(1, sizeof(*ctx));
     480          28 :         if (ctx == NULL) {
     481           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": out of memory while setting back_bs_dev\n",
     482             :                             blob->id);
     483           0 :                 cb_fn(cb_arg, -ENOMEM);
     484           0 :                 return;
     485             :         }
     486             : 
     487          28 :         ctx->parent_refs_cb_fn = parent_refs_cb_fn;
     488          28 :         ctx->parent_refs_cb_arg = parent_refs_cb_arg;
     489          28 :         ctx->cb_fn = cb_fn;
     490          28 :         ctx->cb_arg = cb_arg;
     491          28 :         ctx->back_bs_dev = back_bs_dev;
     492          28 :         ctx->blob = blob;
     493             : 
     494          28 :         blob_freeze_io(blob, blob_set_back_bs_dev_frozen, ctx);
     495             : }
     496             : 
     497             : struct freeze_io_ctx {
     498             :         struct spdk_bs_cpl cpl;
     499             :         struct spdk_blob *blob;
     500             : };
     501             : 
     502             : static void
     503         530 : blob_io_sync(struct spdk_io_channel_iter *i)
     504             : {
     505         530 :         spdk_for_each_channel_continue(i, 0);
     506         530 : }
     507             : 
     508             : static void
     509         518 : blob_execute_queued_io(struct spdk_io_channel_iter *i)
     510             : {
     511         518 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
     512         518 :         struct spdk_bs_channel *ch = spdk_io_channel_get_ctx(_ch);
     513         518 :         struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
     514             :         struct spdk_bs_request_set      *set;
     515             :         struct spdk_bs_user_op_args     *args;
     516             :         spdk_bs_user_op_t *op, *tmp;
     517             : 
     518         522 :         TAILQ_FOREACH_SAFE(op, &ch->queued_io, link, tmp) {
     519           4 :                 set = (struct spdk_bs_request_set *)op;
     520           4 :                 args = &set->u.user_op;
     521             : 
     522           4 :                 if (args->blob == ctx->blob) {
     523           4 :                         TAILQ_REMOVE(&ch->queued_io, op, link);
     524           4 :                         bs_user_op_execute(op);
     525             :                 }
     526             :         }
     527             : 
     528         518 :         spdk_for_each_channel_continue(i, 0);
     529         518 : }
     530             : 
     531             : static void
     532        1016 : blob_io_cpl(struct spdk_io_channel_iter *i, int status)
     533             : {
     534        1016 :         struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
     535             : 
     536        1016 :         ctx->cpl.u.blob_basic.cb_fn(ctx->cpl.u.blob_basic.cb_arg, 0);
     537             : 
     538        1016 :         free(ctx);
     539        1016 : }
     540             : 
     541             : static void
     542         514 : blob_freeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
     543             : {
     544             :         struct freeze_io_ctx *ctx;
     545             : 
     546         514 :         blob_verify_md_op(blob);
     547             : 
     548         514 :         ctx = calloc(1, sizeof(*ctx));
     549         514 :         if (!ctx) {
     550           0 :                 cb_fn(cb_arg, -ENOMEM);
     551           0 :                 return;
     552             :         }
     553             : 
     554         514 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
     555         514 :         ctx->cpl.u.blob_basic.cb_fn = cb_fn;
     556         514 :         ctx->cpl.u.blob_basic.cb_arg = cb_arg;
     557         514 :         ctx->blob = blob;
     558             : 
     559             :         /* Freeze I/O on blob */
     560         514 :         blob->frozen_refcnt++;
     561             : 
     562         514 :         spdk_for_each_channel(blob->bs, blob_io_sync, ctx, blob_io_cpl);
     563             : }
     564             : 
     565             : static void
     566         502 : blob_unfreeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
     567             : {
     568             :         struct freeze_io_ctx *ctx;
     569             : 
     570         502 :         blob_verify_md_op(blob);
     571             : 
     572         502 :         ctx = calloc(1, sizeof(*ctx));
     573         502 :         if (!ctx) {
     574           0 :                 cb_fn(cb_arg, -ENOMEM);
     575           0 :                 return;
     576             :         }
     577             : 
     578         502 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
     579         502 :         ctx->cpl.u.blob_basic.cb_fn = cb_fn;
     580         502 :         ctx->cpl.u.blob_basic.cb_arg = cb_arg;
     581         502 :         ctx->blob = blob;
     582             : 
     583         502 :         assert(blob->frozen_refcnt > 0);
     584             : 
     585         502 :         blob->frozen_refcnt--;
     586             : 
     587         502 :         spdk_for_each_channel(blob->bs, blob_execute_queued_io, ctx, blob_io_cpl);
     588             : }
     589             : 
     590             : static int
     591        8474 : blob_mark_clean(struct spdk_blob *blob)
     592             : {
     593        8474 :         uint32_t *extent_pages = NULL;
     594        8474 :         uint64_t *clusters = NULL;
     595        8474 :         uint32_t *pages = NULL;
     596             : 
     597        8474 :         assert(blob != NULL);
     598             : 
     599        8474 :         if (blob->active.num_extent_pages) {
     600        2859 :                 assert(blob->active.extent_pages);
     601        2859 :                 extent_pages = calloc(blob->active.num_extent_pages, sizeof(*blob->active.extent_pages));
     602        2859 :                 if (!extent_pages) {
     603           0 :                         return -ENOMEM;
     604             :                 }
     605        2859 :                 memcpy(extent_pages, blob->active.extent_pages,
     606        2859 :                        blob->active.num_extent_pages * sizeof(*extent_pages));
     607             :         }
     608             : 
     609        8474 :         if (blob->active.num_clusters) {
     610        5946 :                 assert(blob->active.clusters);
     611        5946 :                 clusters = calloc(blob->active.num_clusters, sizeof(*blob->active.clusters));
     612        5946 :                 if (!clusters) {
     613           0 :                         free(extent_pages);
     614           0 :                         return -ENOMEM;
     615             :                 }
     616        5946 :                 memcpy(clusters, blob->active.clusters, blob->active.num_clusters * sizeof(*blob->active.clusters));
     617             :         }
     618             : 
     619        8474 :         if (blob->active.num_pages) {
     620        6986 :                 assert(blob->active.pages);
     621        6986 :                 pages = calloc(blob->active.num_pages, sizeof(*blob->active.pages));
     622        6986 :                 if (!pages) {
     623           0 :                         free(extent_pages);
     624           0 :                         free(clusters);
     625           0 :                         return -ENOMEM;
     626             :                 }
     627        6986 :                 memcpy(pages, blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
     628             :         }
     629             : 
     630        8474 :         free(blob->clean.extent_pages);
     631        8474 :         free(blob->clean.clusters);
     632        8474 :         free(blob->clean.pages);
     633             : 
     634        8474 :         blob->clean.num_extent_pages = blob->active.num_extent_pages;
     635        8474 :         blob->clean.extent_pages = blob->active.extent_pages;
     636        8474 :         blob->clean.num_clusters = blob->active.num_clusters;
     637        8474 :         blob->clean.clusters = blob->active.clusters;
     638        8474 :         blob->clean.num_allocated_clusters = blob->active.num_allocated_clusters;
     639        8474 :         blob->clean.num_pages = blob->active.num_pages;
     640        8474 :         blob->clean.pages = blob->active.pages;
     641             : 
     642        8474 :         blob->active.extent_pages = extent_pages;
     643        8474 :         blob->active.clusters = clusters;
     644        8474 :         blob->active.pages = pages;
     645             : 
     646             :         /* If the metadata was dirtied again while the metadata was being written to disk,
     647             :          *  we do not want to revert the DIRTY state back to CLEAN here.
     648             :          */
     649        8474 :         if (blob->state == SPDK_BLOB_STATE_LOADING) {
     650        3410 :                 blob->state = SPDK_BLOB_STATE_CLEAN;
     651             :         }
     652             : 
     653        8474 :         return 0;
     654             : }
     655             : 
     656             : static int
     657        1284 : blob_deserialize_xattr(struct spdk_blob *blob,
     658             :                        struct spdk_blob_md_descriptor_xattr *desc_xattr, bool internal)
     659             : {
     660             :         struct spdk_xattr                       *xattr;
     661             : 
     662        1284 :         if (desc_xattr->length != sizeof(desc_xattr->name_length) +
     663             :             sizeof(desc_xattr->value_length) +
     664        1284 :             desc_xattr->name_length + desc_xattr->value_length) {
     665           0 :                 return -EINVAL;
     666             :         }
     667             : 
     668        1284 :         xattr = calloc(1, sizeof(*xattr));
     669        1284 :         if (xattr == NULL) {
     670           0 :                 return -ENOMEM;
     671             :         }
     672             : 
     673        1284 :         xattr->name = malloc(desc_xattr->name_length + 1);
     674        1284 :         if (xattr->name == NULL) {
     675           0 :                 free(xattr);
     676           0 :                 return -ENOMEM;
     677             :         }
     678             : 
     679        1284 :         xattr->value = malloc(desc_xattr->value_length);
     680        1284 :         if (xattr->value == NULL) {
     681           0 :                 free(xattr->name);
     682           0 :                 free(xattr);
     683           0 :                 return -ENOMEM;
     684             :         }
     685             : 
     686        1284 :         memcpy(xattr->name, desc_xattr->name, desc_xattr->name_length);
     687        1284 :         xattr->name[desc_xattr->name_length] = '\0';
     688        1284 :         xattr->value_len = desc_xattr->value_length;
     689        1284 :         memcpy(xattr->value,
     690        1284 :                (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
     691        1284 :                desc_xattr->value_length);
     692             : 
     693        1284 :         TAILQ_INSERT_TAIL(internal ? &blob->xattrs_internal : &blob->xattrs, xattr, link);
     694             : 
     695        1284 :         return 0;
     696             : }
     697             : 
     698             : 
     699             : static int
     700        4588 : blob_parse_page(const struct spdk_blob_md_page *page, struct spdk_blob *blob)
     701             : {
     702             :         struct spdk_blob_md_descriptor *desc;
     703        4588 :         size_t  cur_desc = 0;
     704             :         void *tmp;
     705             : 
     706        4588 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
     707       13476 :         while (cur_desc < sizeof(page->descriptors)) {
     708       13476 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
     709        4540 :                         if (desc->length == 0) {
     710             :                                 /* If padding and length are 0, this terminates the page */
     711        4540 :                                 break;
     712             :                         }
     713        8936 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
     714             :                         struct spdk_blob_md_descriptor_flags    *desc_flags;
     715             : 
     716        3442 :                         desc_flags = (struct spdk_blob_md_descriptor_flags *)desc;
     717             : 
     718        3442 :                         if (desc_flags->length != sizeof(*desc_flags) - sizeof(*desc)) {
     719           0 :                                 return -EINVAL;
     720             :                         }
     721             : 
     722        3442 :                         if ((desc_flags->invalid_flags | SPDK_BLOB_INVALID_FLAGS_MASK) !=
     723             :                             SPDK_BLOB_INVALID_FLAGS_MASK) {
     724           8 :                                 return -EINVAL;
     725             :                         }
     726             : 
     727        3434 :                         if ((desc_flags->data_ro_flags | SPDK_BLOB_DATA_RO_FLAGS_MASK) !=
     728             :                             SPDK_BLOB_DATA_RO_FLAGS_MASK) {
     729          12 :                                 blob->data_ro = true;
     730          12 :                                 blob->md_ro = true;
     731             :                         }
     732             : 
     733        3434 :                         if ((desc_flags->md_ro_flags | SPDK_BLOB_MD_RO_FLAGS_MASK) !=
     734             :                             SPDK_BLOB_MD_RO_FLAGS_MASK) {
     735          12 :                                 blob->md_ro = true;
     736             :                         }
     737             : 
     738        3434 :                         if ((desc_flags->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
     739         566 :                                 blob->data_ro = true;
     740         566 :                                 blob->md_ro = true;
     741             :                         }
     742             : 
     743        3434 :                         blob->invalid_flags = desc_flags->invalid_flags;
     744        3434 :                         blob->data_ro_flags = desc_flags->data_ro_flags;
     745        3434 :                         blob->md_ro_flags = desc_flags->md_ro_flags;
     746             : 
     747        5494 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
     748             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
     749             :                         unsigned int                            i, j;
     750        1396 :                         unsigned int                            cluster_count = blob->active.num_clusters;
     751             : 
     752        1396 :                         if (blob->extent_table_found) {
     753             :                                 /* Extent Table already present in the md,
     754             :                                  * both descriptors should never be at the same time. */
     755           0 :                                 return -EINVAL;
     756             :                         }
     757        1396 :                         blob->extent_rle_found = true;
     758             : 
     759        1396 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
     760             : 
     761        1396 :                         if (desc_extent_rle->length == 0 ||
     762        1396 :                             (desc_extent_rle->length % sizeof(desc_extent_rle->extents[0]) != 0)) {
     763           0 :                                 return -EINVAL;
     764             :                         }
     765             : 
     766        2970 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
     767       21282 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
     768       19708 :                                         if (desc_extent_rle->extents[i].cluster_idx != 0) {
     769        6692 :                                                 if (!spdk_bit_pool_is_allocated(blob->bs->used_clusters,
     770        6692 :                                                                                 desc_extent_rle->extents[i].cluster_idx + j)) {
     771           0 :                                                         return -EINVAL;
     772             :                                                 }
     773             :                                         }
     774       19708 :                                         cluster_count++;
     775             :                                 }
     776             :                         }
     777             : 
     778        1396 :                         if (cluster_count == 0) {
     779           0 :                                 return -EINVAL;
     780             :                         }
     781        1396 :                         tmp = realloc(blob->active.clusters, cluster_count * sizeof(*blob->active.clusters));
     782        1396 :                         if (tmp == NULL) {
     783           0 :                                 return -ENOMEM;
     784             :                         }
     785        1396 :                         blob->active.clusters = tmp;
     786        1396 :                         blob->active.cluster_array_size = cluster_count;
     787             : 
     788        2970 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
     789       21282 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
     790       19708 :                                         if (desc_extent_rle->extents[i].cluster_idx != 0) {
     791       13384 :                                                 blob->active.clusters[blob->active.num_clusters++] = bs_cluster_to_lba(blob->bs,
     792        6692 :                                                                 desc_extent_rle->extents[i].cluster_idx + j);
     793        6692 :                                                 blob->active.num_allocated_clusters++;
     794       13016 :                                         } else if (spdk_blob_is_thin_provisioned(blob)) {
     795       13016 :                                                 blob->active.clusters[blob->active.num_clusters++] = 0;
     796             :                                         } else {
     797           0 :                                                 return -EINVAL;
     798             :                                         }
     799             :                                 }
     800             :                         }
     801        4098 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
     802             :                         struct spdk_blob_md_descriptor_extent_table *desc_extent_table;
     803        1768 :                         uint32_t num_extent_pages = blob->active.num_extent_pages;
     804             :                         uint32_t i, j;
     805             :                         size_t extent_pages_length;
     806             : 
     807        1768 :                         desc_extent_table = (struct spdk_blob_md_descriptor_extent_table *)desc;
     808        1768 :                         extent_pages_length = desc_extent_table->length - sizeof(desc_extent_table->num_clusters);
     809             : 
     810        1768 :                         if (blob->extent_rle_found) {
     811             :                                 /* This means that Extent RLE is present in MD,
     812             :                                  * both should never be at the same time. */
     813           0 :                                 return -EINVAL;
     814        1768 :                         } else if (blob->extent_table_found &&
     815           0 :                                    desc_extent_table->num_clusters != blob->remaining_clusters_in_et) {
     816             :                                 /* Number of clusters in this ET does not match number
     817             :                                  * from previously read EXTENT_TABLE. */
     818           0 :                                 return -EINVAL;
     819             :                         }
     820             : 
     821        1768 :                         if (desc_extent_table->length == 0 ||
     822        1768 :                             (extent_pages_length % sizeof(desc_extent_table->extent_page[0]) != 0)) {
     823           0 :                                 return -EINVAL;
     824             :                         }
     825             : 
     826        1768 :                         blob->extent_table_found = true;
     827             : 
     828        3246 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
     829        1478 :                                 num_extent_pages += desc_extent_table->extent_page[i].num_pages;
     830             :                         }
     831             : 
     832        1768 :                         if (num_extent_pages > 0) {
     833        1462 :                                 tmp = realloc(blob->active.extent_pages, num_extent_pages * sizeof(uint32_t));
     834        1462 :                                 if (tmp == NULL) {
     835           0 :                                         return -ENOMEM;
     836             :                                 }
     837        1462 :                                 blob->active.extent_pages = tmp;
     838             :                         }
     839        1768 :                         blob->active.extent_pages_array_size = num_extent_pages;
     840             : 
     841        1768 :                         blob->remaining_clusters_in_et = desc_extent_table->num_clusters;
     842             : 
     843             :                         /* Extent table entries contain md page numbers for extent pages.
     844             :                          * Zeroes represent unallocated extent pages, those are run-length-encoded.
     845             :                          */
     846        3246 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
     847        1478 :                                 if (desc_extent_table->extent_page[i].page_idx != 0) {
     848        1052 :                                         assert(desc_extent_table->extent_page[i].num_pages == 1);
     849        1052 :                                         blob->active.extent_pages[blob->active.num_extent_pages++] =
     850        1052 :                                                 desc_extent_table->extent_page[i].page_idx;
     851         426 :                                 } else if (spdk_blob_is_thin_provisioned(blob)) {
     852         852 :                                         for (j = 0; j < desc_extent_table->extent_page[i].num_pages; j++) {
     853         426 :                                                 blob->active.extent_pages[blob->active.num_extent_pages++] = 0;
     854             :                                         }
     855             :                                 } else {
     856           0 :                                         return -EINVAL;
     857             :                                 }
     858             :                         }
     859        2330 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
     860             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
     861             :                         unsigned int                                    i;
     862        1046 :                         unsigned int                                    cluster_count = 0;
     863             :                         size_t                                          cluster_idx_length;
     864             : 
     865        1046 :                         if (blob->extent_rle_found) {
     866             :                                 /* This means that Extent RLE is present in MD,
     867             :                                  * both should never be at the same time. */
     868           0 :                                 return -EINVAL;
     869             :                         }
     870             : 
     871        1046 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
     872        1046 :                         cluster_idx_length = desc_extent->length - sizeof(desc_extent->start_cluster_idx);
     873             : 
     874        1046 :                         if (desc_extent->length <= sizeof(desc_extent->start_cluster_idx) ||
     875        1046 :                             (cluster_idx_length % sizeof(desc_extent->cluster_idx[0]) != 0)) {
     876           0 :                                 return -EINVAL;
     877             :                         }
     878             : 
     879       16344 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
     880       15298 :                                 if (desc_extent->cluster_idx[i] != 0) {
     881        6962 :                                         if (!spdk_bit_pool_is_allocated(blob->bs->used_clusters, desc_extent->cluster_idx[i])) {
     882           0 :                                                 return -EINVAL;
     883             :                                         }
     884             :                                 }
     885       15298 :                                 cluster_count++;
     886             :                         }
     887             : 
     888        1046 :                         if (cluster_count == 0) {
     889           0 :                                 return -EINVAL;
     890             :                         }
     891             : 
     892             :                         /* When reading extent pages sequentially starting cluster idx should match
     893             :                          * current size of a blob.
     894             :                          * If changed to batch reading, this check shall be removed. */
     895        1046 :                         if (desc_extent->start_cluster_idx != blob->active.num_clusters) {
     896           0 :                                 return -EINVAL;
     897             :                         }
     898             : 
     899        1046 :                         tmp = realloc(blob->active.clusters,
     900        1046 :                                       (cluster_count + blob->active.num_clusters) * sizeof(*blob->active.clusters));
     901        1046 :                         if (tmp == NULL) {
     902           0 :                                 return -ENOMEM;
     903             :                         }
     904        1046 :                         blob->active.clusters = tmp;
     905        1046 :                         blob->active.cluster_array_size = (cluster_count + blob->active.num_clusters);
     906             : 
     907       16344 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
     908       15298 :                                 if (desc_extent->cluster_idx[i] != 0) {
     909        6962 :                                         blob->active.clusters[blob->active.num_clusters++] = bs_cluster_to_lba(blob->bs,
     910             :                                                         desc_extent->cluster_idx[i]);
     911        6962 :                                         blob->active.num_allocated_clusters++;
     912        8336 :                                 } else if (spdk_blob_is_thin_provisioned(blob)) {
     913        8336 :                                         blob->active.clusters[blob->active.num_clusters++] = 0;
     914             :                                 } else {
     915           0 :                                         return -EINVAL;
     916             :                                 }
     917             :                         }
     918        1046 :                         assert(desc_extent->start_cluster_idx + cluster_count == blob->active.num_clusters);
     919        1046 :                         assert(blob->remaining_clusters_in_et >= cluster_count);
     920        1046 :                         blob->remaining_clusters_in_et -= cluster_count;
     921        1284 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
     922             :                         int rc;
     923             : 
     924         394 :                         rc = blob_deserialize_xattr(blob,
     925             :                                                     (struct spdk_blob_md_descriptor_xattr *) desc, false);
     926         394 :                         if (rc != 0) {
     927           0 :                                 return rc;
     928             :                         }
     929         890 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
     930             :                         int rc;
     931             : 
     932         890 :                         rc = blob_deserialize_xattr(blob,
     933             :                                                     (struct spdk_blob_md_descriptor_xattr *) desc, true);
     934         890 :                         if (rc != 0) {
     935           0 :                                 return rc;
     936             :                         }
     937             :                 } else {
     938             :                         /* Unrecognized descriptor type.  Do not fail - just continue to the
     939             :                          *  next descriptor.  If this descriptor is associated with some feature
     940             :                          *  defined in a newer version of blobstore, that version of blobstore
     941             :                          *  should create and set an associated feature flag to specify if this
     942             :                          *  blob can be loaded or not.
     943             :                          */
     944             :                 }
     945             : 
     946             :                 /* Advance to the next descriptor */
     947        8928 :                 cur_desc += sizeof(*desc) + desc->length;
     948        8928 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
     949          40 :                         break;
     950             :                 }
     951        8888 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
     952             :         }
     953             : 
     954        4580 :         return 0;
     955             : }
     956             : 
     957             : static bool bs_load_cur_extent_page_valid(struct spdk_blob_md_page *page);
     958             : 
     959             : static int
     960        1046 : blob_parse_extent_page(struct spdk_blob_md_page *extent_page, struct spdk_blob *blob)
     961             : {
     962        1046 :         assert(blob != NULL);
     963        1046 :         assert(blob->state == SPDK_BLOB_STATE_LOADING);
     964             : 
     965        1046 :         if (bs_load_cur_extent_page_valid(extent_page) == false) {
     966           0 :                 return -ENOENT;
     967             :         }
     968             : 
     969        1046 :         return blob_parse_page(extent_page, blob);
     970             : }
     971             : 
     972             : static int
     973        3446 : blob_parse(const struct spdk_blob_md_page *pages, uint32_t page_count,
     974             :            struct spdk_blob *blob)
     975             : {
     976             :         const struct spdk_blob_md_page *page;
     977             :         uint32_t i;
     978             :         int rc;
     979             :         void *tmp;
     980             : 
     981        3446 :         assert(page_count > 0);
     982        3446 :         assert(pages[0].sequence_num == 0);
     983        3446 :         assert(blob != NULL);
     984        3446 :         assert(blob->state == SPDK_BLOB_STATE_LOADING);
     985        3446 :         assert(blob->active.clusters == NULL);
     986             : 
     987             :         /* The blobid provided doesn't match what's in the MD, this can
     988             :          * happen for example if a bogus blobid is passed in through open.
     989             :          */
     990        3446 :         if (blob->id != pages[0].id) {
     991           4 :                 SPDK_ERRLOG("Blobid (0x%" PRIx64 ") doesn't match what's in metadata "
     992             :                             "(0x%" PRIx64 ")\n", blob->id, pages[0].id);
     993           4 :                 return -ENOENT;
     994             :         }
     995             : 
     996        3442 :         tmp = realloc(blob->active.pages, page_count * sizeof(*blob->active.pages));
     997        3442 :         if (!tmp) {
     998           0 :                 return -ENOMEM;
     999             :         }
    1000        3442 :         blob->active.pages = tmp;
    1001             : 
    1002        3442 :         blob->active.pages[0] = pages[0].id;
    1003             : 
    1004        3542 :         for (i = 1; i < page_count; i++) {
    1005         100 :                 assert(spdk_bit_array_get(blob->bs->used_md_pages, pages[i - 1].next));
    1006         100 :                 blob->active.pages[i] = pages[i - 1].next;
    1007             :         }
    1008        3442 :         blob->active.num_pages = page_count;
    1009             : 
    1010        6976 :         for (i = 0; i < page_count; i++) {
    1011        3542 :                 page = &pages[i];
    1012             : 
    1013        3542 :                 assert(page->id == blob->id);
    1014        3542 :                 assert(page->sequence_num == i);
    1015             : 
    1016        3542 :                 rc = blob_parse_page(page, blob);
    1017        3542 :                 if (rc != 0) {
    1018           8 :                         return rc;
    1019             :                 }
    1020             :         }
    1021             : 
    1022        3434 :         return 0;
    1023             : }
    1024             : 
    1025             : static int
    1026        4370 : blob_serialize_add_page(const struct spdk_blob *blob,
    1027             :                         struct spdk_blob_md_page **pages,
    1028             :                         uint32_t *page_count,
    1029             :                         struct spdk_blob_md_page **last_page)
    1030             : {
    1031             :         struct spdk_blob_md_page *page, *tmp_pages;
    1032             : 
    1033        4370 :         assert(pages != NULL);
    1034        4370 :         assert(page_count != NULL);
    1035             : 
    1036        4370 :         *last_page = NULL;
    1037        4370 :         if (*page_count == 0) {
    1038        4282 :                 assert(*pages == NULL);
    1039        4282 :                 *pages = spdk_malloc(SPDK_BS_PAGE_SIZE, 0,
    1040             :                                      NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    1041        4282 :                 if (*pages == NULL) {
    1042           0 :                         return -ENOMEM;
    1043             :                 }
    1044        4282 :                 *page_count = 1;
    1045             :         } else {
    1046          88 :                 assert(*pages != NULL);
    1047          88 :                 tmp_pages = spdk_realloc(*pages, SPDK_BS_PAGE_SIZE * (*page_count + 1), 0);
    1048          88 :                 if (tmp_pages == NULL) {
    1049           0 :                         return -ENOMEM;
    1050             :                 }
    1051          88 :                 (*page_count)++;
    1052          88 :                 *pages = tmp_pages;
    1053             :         }
    1054             : 
    1055        4370 :         page = &(*pages)[*page_count - 1];
    1056        4370 :         memset(page, 0, sizeof(*page));
    1057        4370 :         page->id = blob->id;
    1058        4370 :         page->sequence_num = *page_count - 1;
    1059        4370 :         page->next = SPDK_INVALID_MD_PAGE;
    1060        4370 :         *last_page = page;
    1061             : 
    1062        4370 :         return 0;
    1063             : }
    1064             : 
    1065             : /* Transform the in-memory representation 'xattr' into an on-disk xattr descriptor.
    1066             :  * Update required_sz on both success and failure.
    1067             :  *
    1068             :  */
    1069             : static int
    1070        1795 : blob_serialize_xattr(const struct spdk_xattr *xattr,
    1071             :                      uint8_t *buf, size_t buf_sz,
    1072             :                      size_t *required_sz, bool internal)
    1073             : {
    1074             :         struct spdk_blob_md_descriptor_xattr    *desc;
    1075             : 
    1076        1795 :         *required_sz = sizeof(struct spdk_blob_md_descriptor_xattr) +
    1077        1795 :                        strlen(xattr->name) +
    1078        1795 :                        xattr->value_len;
    1079             : 
    1080        1795 :         if (buf_sz < *required_sz) {
    1081          48 :                 return -1;
    1082             :         }
    1083             : 
    1084        1747 :         desc = (struct spdk_blob_md_descriptor_xattr *)buf;
    1085             : 
    1086        1747 :         desc->type = internal ? SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL : SPDK_MD_DESCRIPTOR_TYPE_XATTR;
    1087        1747 :         desc->length = sizeof(desc->name_length) +
    1088             :                        sizeof(desc->value_length) +
    1089        1747 :                        strlen(xattr->name) +
    1090        1747 :                        xattr->value_len;
    1091        1747 :         desc->name_length = strlen(xattr->name);
    1092        1747 :         desc->value_length = xattr->value_len;
    1093             : 
    1094        1747 :         memcpy(desc->name, xattr->name, desc->name_length);
    1095        1747 :         memcpy((void *)((uintptr_t)desc->name + desc->name_length),
    1096        1747 :                xattr->value,
    1097        1747 :                desc->value_length);
    1098             : 
    1099        1747 :         return 0;
    1100             : }
    1101             : 
    1102             : static void
    1103        1695 : blob_serialize_extent_table_entry(const struct spdk_blob *blob,
    1104             :                                   uint64_t start_ep, uint64_t *next_ep,
    1105             :                                   uint8_t **buf, size_t *remaining_sz)
    1106             : {
    1107             :         struct spdk_blob_md_descriptor_extent_table *desc;
    1108             :         size_t cur_sz;
    1109             :         uint64_t i, et_idx;
    1110             :         uint32_t extent_page, ep_len;
    1111             : 
    1112             :         /* The buffer must have room for at least num_clusters entry */
    1113        1695 :         cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc->num_clusters);
    1114        1695 :         if (*remaining_sz < cur_sz) {
    1115          20 :                 *next_ep = start_ep;
    1116          20 :                 return;
    1117             :         }
    1118             : 
    1119        1675 :         desc = (struct spdk_blob_md_descriptor_extent_table *)*buf;
    1120        1675 :         desc->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE;
    1121             : 
    1122        1675 :         desc->num_clusters = blob->active.num_clusters;
    1123             : 
    1124        1675 :         ep_len = 1;
    1125        1675 :         et_idx = 0;
    1126        4256 :         for (i = start_ep; i < blob->active.num_extent_pages; i++) {
    1127        2581 :                 if (*remaining_sz < cur_sz  + sizeof(desc->extent_page[0])) {
    1128             :                         /* If we ran out of buffer space, return */
    1129           0 :                         break;
    1130             :                 }
    1131             : 
    1132        2581 :                 extent_page = blob->active.extent_pages[i];
    1133             :                 /* Verify that next extent_page is unallocated */
    1134        2581 :                 if (extent_page == 0 &&
    1135        1528 :                     (i + 1 < blob->active.num_extent_pages && blob->active.extent_pages[i + 1] == 0)) {
    1136        1078 :                         ep_len++;
    1137        1078 :                         continue;
    1138             :                 }
    1139        1503 :                 desc->extent_page[et_idx].page_idx = extent_page;
    1140        1503 :                 desc->extent_page[et_idx].num_pages = ep_len;
    1141        1503 :                 et_idx++;
    1142             : 
    1143        1503 :                 ep_len = 1;
    1144        1503 :                 cur_sz += sizeof(desc->extent_page[et_idx]);
    1145             :         }
    1146        1675 :         *next_ep = i;
    1147             : 
    1148        1675 :         desc->length = sizeof(desc->num_clusters) + sizeof(desc->extent_page[0]) * et_idx;
    1149        1675 :         *remaining_sz -= sizeof(struct spdk_blob_md_descriptor) + desc->length;
    1150        1675 :         *buf += sizeof(struct spdk_blob_md_descriptor) + desc->length;
    1151             : }
    1152             : 
    1153             : static int
    1154        1677 : blob_serialize_extent_table(const struct spdk_blob *blob,
    1155             :                             struct spdk_blob_md_page **pages,
    1156             :                             struct spdk_blob_md_page *cur_page,
    1157             :                             uint32_t *page_count, uint8_t **buf,
    1158             :                             size_t *remaining_sz)
    1159             : {
    1160        1677 :         uint64_t                                last_extent_page;
    1161             :         int                                     rc;
    1162             : 
    1163        1677 :         last_extent_page = 0;
    1164             :         /* At least single extent table entry has to be always persisted.
    1165             :          * Such case occurs with num_extent_pages == 0. */
    1166        1695 :         while (last_extent_page <= blob->active.num_extent_pages) {
    1167        1695 :                 blob_serialize_extent_table_entry(blob, last_extent_page, &last_extent_page, buf,
    1168             :                                                   remaining_sz);
    1169             : 
    1170        1695 :                 if (last_extent_page == blob->active.num_extent_pages) {
    1171        1677 :                         break;
    1172             :                 }
    1173             : 
    1174          18 :                 rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1175          18 :                 if (rc < 0) {
    1176           0 :                         return rc;
    1177             :                 }
    1178             : 
    1179          18 :                 *buf = (uint8_t *)cur_page->descriptors;
    1180          18 :                 *remaining_sz = sizeof(cur_page->descriptors);
    1181             :         }
    1182             : 
    1183        1677 :         return 0;
    1184             : }
    1185             : 
    1186             : static void
    1187        1747 : blob_serialize_extent_rle(const struct spdk_blob *blob,
    1188             :                           uint64_t start_cluster, uint64_t *next_cluster,
    1189             :                           uint8_t **buf, size_t *buf_sz)
    1190             : {
    1191             :         struct spdk_blob_md_descriptor_extent_rle *desc_extent_rle;
    1192             :         size_t cur_sz;
    1193             :         uint64_t i, extent_idx;
    1194             :         uint64_t lba, lba_per_cluster, lba_count;
    1195             : 
    1196             :         /* The buffer must have room for at least one extent */
    1197        1747 :         cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc_extent_rle->extents[0]);
    1198        1747 :         if (*buf_sz < cur_sz) {
    1199          18 :                 *next_cluster = start_cluster;
    1200          18 :                 return;
    1201             :         }
    1202             : 
    1203        1729 :         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)*buf;
    1204        1729 :         desc_extent_rle->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE;
    1205             : 
    1206        1729 :         lba_per_cluster = bs_cluster_to_lba(blob->bs, 1);
    1207             :         /* Assert for scan-build false positive */
    1208        1729 :         assert(lba_per_cluster > 0);
    1209             : 
    1210        1729 :         lba = blob->active.clusters[start_cluster];
    1211        1729 :         lba_count = lba_per_cluster;
    1212        1729 :         extent_idx = 0;
    1213      810550 :         for (i = start_cluster + 1; i < blob->active.num_clusters; i++) {
    1214      808825 :                 if ((lba + lba_count) == blob->active.clusters[i] && lba != 0) {
    1215             :                         /* Run-length encode sequential non-zero LBA */
    1216        7276 :                         lba_count += lba_per_cluster;
    1217        7276 :                         continue;
    1218      801549 :                 } else if (lba == 0 && blob->active.clusters[i] == 0) {
    1219             :                         /* Run-length encode unallocated clusters */
    1220      800356 :                         lba_count += lba_per_cluster;
    1221      800356 :                         continue;
    1222             :                 }
    1223        1193 :                 desc_extent_rle->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
    1224        1193 :                 desc_extent_rle->extents[extent_idx].length = lba_count / lba_per_cluster;
    1225        1193 :                 extent_idx++;
    1226             : 
    1227        1193 :                 cur_sz += sizeof(desc_extent_rle->extents[extent_idx]);
    1228             : 
    1229        1193 :                 if (*buf_sz < cur_sz) {
    1230             :                         /* If we ran out of buffer space, return */
    1231           4 :                         *next_cluster = i;
    1232           4 :                         break;
    1233             :                 }
    1234             : 
    1235        1189 :                 lba = blob->active.clusters[i];
    1236        1189 :                 lba_count = lba_per_cluster;
    1237             :         }
    1238             : 
    1239        1729 :         if (*buf_sz >= cur_sz) {
    1240        1725 :                 desc_extent_rle->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
    1241        1725 :                 desc_extent_rle->extents[extent_idx].length = lba_count / lba_per_cluster;
    1242        1725 :                 extent_idx++;
    1243             : 
    1244        1725 :                 *next_cluster = blob->active.num_clusters;
    1245             :         }
    1246             : 
    1247        1729 :         desc_extent_rle->length = sizeof(desc_extent_rle->extents[0]) * extent_idx;
    1248        1729 :         *buf_sz -= sizeof(struct spdk_blob_md_descriptor) + desc_extent_rle->length;
    1249        1729 :         *buf += sizeof(struct spdk_blob_md_descriptor) + desc_extent_rle->length;
    1250             : }
    1251             : 
    1252             : static int
    1253        1939 : blob_serialize_extents_rle(const struct spdk_blob *blob,
    1254             :                            struct spdk_blob_md_page **pages,
    1255             :                            struct spdk_blob_md_page *cur_page,
    1256             :                            uint32_t *page_count, uint8_t **buf,
    1257             :                            size_t *remaining_sz)
    1258             : {
    1259        1939 :         uint64_t                                last_cluster;
    1260             :         int                                     rc;
    1261             : 
    1262        1939 :         last_cluster = 0;
    1263        1961 :         while (last_cluster < blob->active.num_clusters) {
    1264        1747 :                 blob_serialize_extent_rle(blob, last_cluster, &last_cluster, buf, remaining_sz);
    1265             : 
    1266        1747 :                 if (last_cluster == blob->active.num_clusters) {
    1267        1725 :                         break;
    1268             :                 }
    1269             : 
    1270          22 :                 rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1271          22 :                 if (rc < 0) {
    1272           0 :                         return rc;
    1273             :                 }
    1274             : 
    1275          22 :                 *buf = (uint8_t *)cur_page->descriptors;
    1276          22 :                 *remaining_sz = sizeof(cur_page->descriptors);
    1277             :         }
    1278             : 
    1279        1939 :         return 0;
    1280             : }
    1281             : 
    1282             : static void
    1283        1100 : blob_serialize_extent_page(const struct spdk_blob *blob,
    1284             :                            uint64_t cluster, struct spdk_blob_md_page *page)
    1285             : {
    1286             :         struct spdk_blob_md_descriptor_extent_page *desc_extent;
    1287             :         uint64_t i, extent_idx;
    1288             :         uint64_t lba, lba_per_cluster;
    1289        1100 :         uint64_t start_cluster_idx = (cluster / SPDK_EXTENTS_PER_EP) * SPDK_EXTENTS_PER_EP;
    1290             : 
    1291        1100 :         desc_extent = (struct spdk_blob_md_descriptor_extent_page *) page->descriptors;
    1292        1100 :         desc_extent->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE;
    1293             : 
    1294        1100 :         lba_per_cluster = bs_cluster_to_lba(blob->bs, 1);
    1295             : 
    1296        1100 :         desc_extent->start_cluster_idx = start_cluster_idx;
    1297        1100 :         extent_idx = 0;
    1298       42406 :         for (i = start_cluster_idx; i < blob->active.num_clusters; i++) {
    1299       41372 :                 lba = blob->active.clusters[i];
    1300       41372 :                 desc_extent->cluster_idx[extent_idx++] = lba / lba_per_cluster;
    1301       41372 :                 if (extent_idx >= SPDK_EXTENTS_PER_EP) {
    1302          66 :                         break;
    1303             :                 }
    1304             :         }
    1305        1100 :         desc_extent->length = sizeof(desc_extent->start_cluster_idx) +
    1306             :                               sizeof(desc_extent->cluster_idx[0]) * extent_idx;
    1307        1100 : }
    1308             : 
    1309             : static void
    1310        3616 : blob_serialize_flags(const struct spdk_blob *blob,
    1311             :                      uint8_t *buf, size_t *buf_sz)
    1312             : {
    1313             :         struct spdk_blob_md_descriptor_flags *desc;
    1314             : 
    1315             :         /*
    1316             :          * Flags get serialized first, so we should always have room for the flags
    1317             :          *  descriptor.
    1318             :          */
    1319        3616 :         assert(*buf_sz >= sizeof(*desc));
    1320             : 
    1321        3616 :         desc = (struct spdk_blob_md_descriptor_flags *)buf;
    1322        3616 :         desc->type = SPDK_MD_DESCRIPTOR_TYPE_FLAGS;
    1323        3616 :         desc->length = sizeof(*desc) - sizeof(struct spdk_blob_md_descriptor);
    1324        3616 :         desc->invalid_flags = blob->invalid_flags;
    1325        3616 :         desc->data_ro_flags = blob->data_ro_flags;
    1326        3616 :         desc->md_ro_flags = blob->md_ro_flags;
    1327             : 
    1328        3616 :         *buf_sz -= sizeof(*desc);
    1329        3616 : }
    1330             : 
    1331             : static int
    1332        7232 : blob_serialize_xattrs(const struct spdk_blob *blob,
    1333             :                       const struct spdk_xattr_tailq *xattrs, bool internal,
    1334             :                       struct spdk_blob_md_page **pages,
    1335             :                       struct spdk_blob_md_page *cur_page,
    1336             :                       uint32_t *page_count, uint8_t **buf,
    1337             :                       size_t *remaining_sz)
    1338             : {
    1339             :         const struct spdk_xattr *xattr;
    1340             :         int     rc;
    1341             : 
    1342        8979 :         TAILQ_FOREACH(xattr, xattrs, link) {
    1343        1747 :                 size_t required_sz = 0;
    1344             : 
    1345        1747 :                 rc = blob_serialize_xattr(xattr,
    1346             :                                           *buf, *remaining_sz,
    1347             :                                           &required_sz, internal);
    1348        1747 :                 if (rc < 0) {
    1349             :                         /* Need to add a new page to the chain */
    1350          48 :                         rc = blob_serialize_add_page(blob, pages, page_count,
    1351             :                                                      &cur_page);
    1352          48 :                         if (rc < 0) {
    1353           0 :                                 spdk_free(*pages);
    1354           0 :                                 *pages = NULL;
    1355           0 :                                 *page_count = 0;
    1356           0 :                                 return rc;
    1357             :                         }
    1358             : 
    1359          48 :                         *buf = (uint8_t *)cur_page->descriptors;
    1360          48 :                         *remaining_sz = sizeof(cur_page->descriptors);
    1361             : 
    1362             :                         /* Try again */
    1363          48 :                         required_sz = 0;
    1364          48 :                         rc = blob_serialize_xattr(xattr,
    1365             :                                                   *buf, *remaining_sz,
    1366             :                                                   &required_sz, internal);
    1367             : 
    1368          48 :                         if (rc < 0) {
    1369           0 :                                 spdk_free(*pages);
    1370           0 :                                 *pages = NULL;
    1371           0 :                                 *page_count = 0;
    1372           0 :                                 return rc;
    1373             :                         }
    1374             :                 }
    1375             : 
    1376        1747 :                 *remaining_sz -= required_sz;
    1377        1747 :                 *buf += required_sz;
    1378             :         }
    1379             : 
    1380        7232 :         return 0;
    1381             : }
    1382             : 
    1383             : static int
    1384        3616 : blob_serialize(const struct spdk_blob *blob, struct spdk_blob_md_page **pages,
    1385             :                uint32_t *page_count)
    1386             : {
    1387        3616 :         struct spdk_blob_md_page                *cur_page;
    1388             :         int                                     rc;
    1389        3616 :         uint8_t                                 *buf;
    1390        3616 :         size_t                                  remaining_sz;
    1391             : 
    1392        3616 :         assert(pages != NULL);
    1393        3616 :         assert(page_count != NULL);
    1394        3616 :         assert(blob != NULL);
    1395        3616 :         assert(blob->state == SPDK_BLOB_STATE_DIRTY);
    1396             : 
    1397        3616 :         *pages = NULL;
    1398        3616 :         *page_count = 0;
    1399             : 
    1400             :         /* A blob always has at least 1 page, even if it has no descriptors */
    1401        3616 :         rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1402        3616 :         if (rc < 0) {
    1403           0 :                 return rc;
    1404             :         }
    1405             : 
    1406        3616 :         buf = (uint8_t *)cur_page->descriptors;
    1407        3616 :         remaining_sz = sizeof(cur_page->descriptors);
    1408             : 
    1409             :         /* Serialize flags */
    1410        3616 :         blob_serialize_flags(blob, buf, &remaining_sz);
    1411        3616 :         buf += sizeof(struct spdk_blob_md_descriptor_flags);
    1412             : 
    1413             :         /* Serialize xattrs */
    1414        3616 :         rc = blob_serialize_xattrs(blob, &blob->xattrs, false,
    1415             :                                    pages, cur_page, page_count, &buf, &remaining_sz);
    1416        3616 :         if (rc < 0) {
    1417           0 :                 return rc;
    1418             :         }
    1419             : 
    1420             :         /* Serialize internal xattrs */
    1421        3616 :         rc = blob_serialize_xattrs(blob, &blob->xattrs_internal, true,
    1422             :                                    pages, cur_page, page_count, &buf, &remaining_sz);
    1423        3616 :         if (rc < 0) {
    1424           0 :                 return rc;
    1425             :         }
    1426             : 
    1427        3616 :         if (blob->use_extent_table) {
    1428             :                 /* Serialize extent table */
    1429        1677 :                 rc = blob_serialize_extent_table(blob, pages, cur_page, page_count, &buf, &remaining_sz);
    1430             :         } else {
    1431             :                 /* Serialize extents */
    1432        1939 :                 rc = blob_serialize_extents_rle(blob, pages, cur_page, page_count, &buf, &remaining_sz);
    1433             :         }
    1434             : 
    1435        3616 :         return rc;
    1436             : }
    1437             : 
    1438             : struct spdk_blob_load_ctx {
    1439             :         struct spdk_blob                *blob;
    1440             : 
    1441             :         struct spdk_blob_md_page        *pages;
    1442             :         uint32_t                        num_pages;
    1443             :         uint32_t                        next_extent_page;
    1444             :         spdk_bs_sequence_t              *seq;
    1445             : 
    1446             :         spdk_bs_sequence_cpl            cb_fn;
    1447             :         void                            *cb_arg;
    1448             : };
    1449             : 
    1450             : static uint32_t
    1451       19958 : blob_md_page_calc_crc(void *page)
    1452             : {
    1453             :         uint32_t                crc;
    1454             : 
    1455       19958 :         crc = BLOB_CRC32C_INITIAL;
    1456       19958 :         crc = spdk_crc32c_update(page, SPDK_BS_PAGE_SIZE - 4, crc);
    1457       19958 :         crc ^= BLOB_CRC32C_INITIAL;
    1458             : 
    1459       19958 :         return crc;
    1460             : 
    1461             : }
    1462             : 
    1463             : static void
    1464        3474 : blob_load_final(struct spdk_blob_load_ctx *ctx, int bserrno)
    1465             : {
    1466        3474 :         struct spdk_blob                *blob = ctx->blob;
    1467             : 
    1468        3474 :         if (bserrno == 0) {
    1469        3410 :                 blob_mark_clean(blob);
    1470             :         }
    1471             : 
    1472        3474 :         ctx->cb_fn(ctx->seq, ctx->cb_arg, bserrno);
    1473             : 
    1474             :         /* Free the memory */
    1475        3474 :         spdk_free(ctx->pages);
    1476        3474 :         free(ctx);
    1477        3474 : }
    1478             : 
    1479             : static void
    1480         454 : blob_load_snapshot_cpl(void *cb_arg, struct spdk_blob *snapshot, int bserrno)
    1481             : {
    1482         454 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1483         454 :         struct spdk_blob                *blob = ctx->blob;
    1484             : 
    1485         454 :         if (bserrno == 0) {
    1486         448 :                 blob->back_bs_dev = bs_create_blob_bs_dev(snapshot);
    1487         448 :                 if (blob->back_bs_dev == NULL) {
    1488           0 :                         bserrno = -ENOMEM;
    1489             :                 }
    1490             :         }
    1491         454 :         if (bserrno != 0) {
    1492           6 :                 SPDK_ERRLOG("Snapshot fail\n");
    1493             :         }
    1494             : 
    1495         454 :         blob_load_final(ctx, bserrno);
    1496         454 : }
    1497             : 
    1498             : static void blob_update_clear_method(struct spdk_blob *blob);
    1499             : 
    1500             : static int
    1501         124 : blob_load_esnap(struct spdk_blob *blob, void *blob_ctx)
    1502             : {
    1503         124 :         struct spdk_blob_store *bs = blob->bs;
    1504         124 :         struct spdk_bs_dev *bs_dev = NULL;
    1505         124 :         const void *esnap_id = NULL;
    1506         124 :         size_t id_len = 0;
    1507             :         int rc;
    1508             : 
    1509         124 :         if (bs->esnap_bs_dev_create == NULL) {
    1510           8 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " is an esnap clone but the blobstore was opened "
    1511             :                                "without support for esnap clones\n", blob->id);
    1512           8 :                 return -ENOTSUP;
    1513             :         }
    1514         116 :         assert(blob->back_bs_dev == NULL);
    1515             : 
    1516         116 :         rc = blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, &esnap_id, &id_len, true);
    1517         116 :         if (rc != 0) {
    1518           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " is an esnap clone but has no esnap ID\n", blob->id);
    1519           0 :                 return -EINVAL;
    1520             :         }
    1521         116 :         assert(id_len > 0 && id_len < UINT32_MAX);
    1522             : 
    1523         116 :         SPDK_INFOLOG(blob, "Creating external snapshot device\n");
    1524             : 
    1525         116 :         rc = bs->esnap_bs_dev_create(bs->esnap_ctx, blob_ctx, blob, esnap_id, (uint32_t)id_len,
    1526             :                                      &bs_dev);
    1527         116 :         if (rc != 0) {
    1528           0 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": failed to load back_bs_dev "
    1529             :                               "with error %d\n", blob->id, rc);
    1530           0 :                 return rc;
    1531             :         }
    1532             : 
    1533             :         /*
    1534             :          * Note: bs_dev might be NULL if the consumer chose to not open the external snapshot.
    1535             :          * This especially might happen during spdk_bs_load() iteration.
    1536             :          */
    1537         116 :         if (bs_dev != NULL) {
    1538         116 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": loaded back_bs_dev\n", blob->id);
    1539         116 :                 if ((bs->io_unit_size % bs_dev->blocklen) != 0) {
    1540           4 :                         SPDK_NOTICELOG("blob 0x%" PRIx64 " external snapshot device block size %u "
    1541             :                                        "is not compatible with blobstore block size %u\n",
    1542             :                                        blob->id, bs_dev->blocklen, bs->io_unit_size);
    1543           4 :                         bs_dev->destroy(bs_dev);
    1544           4 :                         return -EINVAL;
    1545             :                 }
    1546             :         }
    1547             : 
    1548         112 :         blob->back_bs_dev = bs_dev;
    1549         112 :         blob->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
    1550             : 
    1551         112 :         return 0;
    1552             : }
    1553             : 
    1554             : static void
    1555        3428 : blob_load_backing_dev(spdk_bs_sequence_t *seq, void *cb_arg)
    1556             : {
    1557        3428 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1558        3428 :         struct spdk_blob                *blob = ctx->blob;
    1559        3428 :         const void                      *value;
    1560        3428 :         size_t                          len;
    1561             :         int                             rc;
    1562             : 
    1563        3428 :         if (blob_is_esnap_clone(blob)) {
    1564         124 :                 rc = blob_load_esnap(blob, seq->cpl.u.blob_handle.esnap_ctx);
    1565         124 :                 blob_load_final(ctx, rc);
    1566         124 :                 return;
    1567             :         }
    1568             : 
    1569        3304 :         if (spdk_blob_is_thin_provisioned(blob)) {
    1570        1034 :                 rc = blob_get_xattr_value(blob, BLOB_SNAPSHOT, &value, &len, true);
    1571        1034 :                 if (rc == 0) {
    1572         454 :                         if (len != sizeof(spdk_blob_id)) {
    1573           0 :                                 blob_load_final(ctx, -EINVAL);
    1574           0 :                                 return;
    1575             :                         }
    1576             :                         /* open snapshot blob and continue in the callback function */
    1577         454 :                         blob->parent_id = *(spdk_blob_id *)value;
    1578         454 :                         spdk_bs_open_blob(blob->bs, blob->parent_id,
    1579             :                                           blob_load_snapshot_cpl, ctx);
    1580         454 :                         return;
    1581             :                 } else {
    1582             :                         /* add zeroes_dev for thin provisioned blob */
    1583         580 :                         blob->back_bs_dev = bs_create_zeroes_dev();
    1584             :                 }
    1585             :         } else {
    1586             :                 /* standard blob */
    1587        2270 :                 blob->back_bs_dev = NULL;
    1588             :         }
    1589        2850 :         blob_load_final(ctx, 0);
    1590             : }
    1591             : 
    1592             : static void
    1593        2820 : blob_load_cpl_extents_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1594             : {
    1595        2820 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1596        2820 :         struct spdk_blob                *blob = ctx->blob;
    1597             :         struct spdk_blob_md_page        *page;
    1598             :         uint64_t                        i;
    1599             :         uint32_t                        crc;
    1600             :         uint64_t                        lba;
    1601             :         void                            *tmp;
    1602             :         uint64_t                        sz;
    1603             : 
    1604        2820 :         if (bserrno) {
    1605           6 :                 SPDK_ERRLOG("Extent page read failed: %d\n", bserrno);
    1606           6 :                 blob_load_final(ctx, bserrno);
    1607           6 :                 return;
    1608             :         }
    1609             : 
    1610        2814 :         if (ctx->pages == NULL) {
    1611             :                 /* First iteration of this function, allocate buffer for single EXTENT_PAGE */
    1612        1768 :                 ctx->pages = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
    1613             :                                           NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    1614        1768 :                 if (!ctx->pages) {
    1615           0 :                         blob_load_final(ctx, -ENOMEM);
    1616           0 :                         return;
    1617             :                 }
    1618        1768 :                 ctx->num_pages = 1;
    1619        1768 :                 ctx->next_extent_page = 0;
    1620             :         } else {
    1621        1046 :                 page = &ctx->pages[0];
    1622        1046 :                 crc = blob_md_page_calc_crc(page);
    1623        1046 :                 if (crc != page->crc) {
    1624           0 :                         blob_load_final(ctx, -EINVAL);
    1625           0 :                         return;
    1626             :                 }
    1627             : 
    1628        1046 :                 if (page->next != SPDK_INVALID_MD_PAGE) {
    1629           0 :                         blob_load_final(ctx, -EINVAL);
    1630           0 :                         return;
    1631             :                 }
    1632             : 
    1633        1046 :                 bserrno = blob_parse_extent_page(page, blob);
    1634        1046 :                 if (bserrno) {
    1635           0 :                         blob_load_final(ctx, bserrno);
    1636           0 :                         return;
    1637             :                 }
    1638             :         }
    1639             : 
    1640        3240 :         for (i = ctx->next_extent_page; i < blob->active.num_extent_pages; i++) {
    1641        1478 :                 if (blob->active.extent_pages[i] != 0) {
    1642             :                         /* Extent page was allocated, read and parse it. */
    1643        1052 :                         lba = bs_md_page_to_lba(blob->bs, blob->active.extent_pages[i]);
    1644        1052 :                         ctx->next_extent_page = i + 1;
    1645             : 
    1646        1052 :                         bs_sequence_read_dev(seq, &ctx->pages[0], lba,
    1647        1052 :                                              bs_byte_to_lba(blob->bs, SPDK_BS_PAGE_SIZE),
    1648             :                                              blob_load_cpl_extents_cpl, ctx);
    1649        1052 :                         return;
    1650             :                 } else {
    1651             :                         /* Thin provisioned blobs can point to unallocated extent pages.
    1652             :                          * In this case blob size should be increased by up to the amount left in remaining_clusters_in_et. */
    1653             : 
    1654         426 :                         sz = spdk_min(blob->remaining_clusters_in_et, SPDK_EXTENTS_PER_EP);
    1655         426 :                         blob->active.num_clusters += sz;
    1656         426 :                         blob->remaining_clusters_in_et -= sz;
    1657             : 
    1658         426 :                         assert(spdk_blob_is_thin_provisioned(blob));
    1659         426 :                         assert(i + 1 < blob->active.num_extent_pages || blob->remaining_clusters_in_et == 0);
    1660             : 
    1661         426 :                         tmp = realloc(blob->active.clusters, blob->active.num_clusters * sizeof(*blob->active.clusters));
    1662         426 :                         if (tmp == NULL) {
    1663           0 :                                 blob_load_final(ctx, -ENOMEM);
    1664           0 :                                 return;
    1665             :                         }
    1666         426 :                         memset(tmp + sizeof(*blob->active.clusters) * blob->active.cluster_array_size, 0,
    1667         426 :                                sizeof(*blob->active.clusters) * (blob->active.num_clusters - blob->active.cluster_array_size));
    1668         426 :                         blob->active.clusters = tmp;
    1669         426 :                         blob->active.cluster_array_size = blob->active.num_clusters;
    1670             :                 }
    1671             :         }
    1672             : 
    1673        1762 :         blob_load_backing_dev(seq, ctx);
    1674             : }
    1675             : 
    1676             : static void
    1677        3574 : blob_load_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1678             : {
    1679        3574 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1680        3574 :         struct spdk_blob                *blob = ctx->blob;
    1681             :         struct spdk_blob_md_page        *page;
    1682             :         int                             rc;
    1683             :         uint32_t                        crc;
    1684             :         uint32_t                        current_page;
    1685             : 
    1686        3574 :         if (ctx->num_pages == 1) {
    1687        3474 :                 current_page = bs_blobid_to_page(blob->id);
    1688             :         } else {
    1689         100 :                 assert(ctx->num_pages != 0);
    1690         100 :                 page = &ctx->pages[ctx->num_pages - 2];
    1691         100 :                 current_page = page->next;
    1692             :         }
    1693             : 
    1694        3574 :         if (bserrno) {
    1695          20 :                 SPDK_ERRLOG("Metadata page %d read failed for blobid 0x%" PRIx64 ": %d\n",
    1696             :                             current_page, blob->id, bserrno);
    1697          20 :                 blob_load_final(ctx, bserrno);
    1698          20 :                 return;
    1699             :         }
    1700             : 
    1701        3554 :         page = &ctx->pages[ctx->num_pages - 1];
    1702        3554 :         crc = blob_md_page_calc_crc(page);
    1703        3554 :         if (crc != page->crc) {
    1704           8 :                 SPDK_ERRLOG("Metadata page %d crc mismatch for blobid 0x%" PRIx64 "\n",
    1705             :                             current_page, blob->id);
    1706           8 :                 blob_load_final(ctx, -EINVAL);
    1707           8 :                 return;
    1708             :         }
    1709             : 
    1710        3546 :         if (page->next != SPDK_INVALID_MD_PAGE) {
    1711             :                 struct spdk_blob_md_page *tmp_pages;
    1712         100 :                 uint32_t next_page = page->next;
    1713         100 :                 uint64_t next_lba = bs_md_page_to_lba(blob->bs, next_page);
    1714             : 
    1715             :                 /* Read the next page */
    1716         100 :                 tmp_pages = spdk_realloc(ctx->pages, (sizeof(*page) * (ctx->num_pages + 1)), 0);
    1717         100 :                 if (tmp_pages == NULL) {
    1718           0 :                         blob_load_final(ctx, -ENOMEM);
    1719           0 :                         return;
    1720             :                 }
    1721         100 :                 ctx->num_pages++;
    1722         100 :                 ctx->pages = tmp_pages;
    1723             : 
    1724         100 :                 bs_sequence_read_dev(seq, &ctx->pages[ctx->num_pages - 1],
    1725             :                                      next_lba,
    1726         100 :                                      bs_byte_to_lba(blob->bs, sizeof(*page)),
    1727             :                                      blob_load_cpl, ctx);
    1728         100 :                 return;
    1729             :         }
    1730             : 
    1731             :         /* Parse the pages */
    1732        3446 :         rc = blob_parse(ctx->pages, ctx->num_pages, blob);
    1733        3446 :         if (rc) {
    1734          12 :                 blob_load_final(ctx, rc);
    1735          12 :                 return;
    1736             :         }
    1737             : 
    1738        3434 :         if (blob->extent_table_found == true) {
    1739             :                 /* If EXTENT_TABLE was found, that means support for it should be enabled. */
    1740        1768 :                 assert(blob->extent_rle_found == false);
    1741        1768 :                 blob->use_extent_table = true;
    1742             :         } else {
    1743             :                 /* If EXTENT_RLE or no extent_* descriptor was found disable support
    1744             :                  * for extent table. No extent_* descriptors means that blob has length of 0
    1745             :                  * and no extent_rle descriptors were persisted for it.
    1746             :                  * EXTENT_TABLE if used, is always present in metadata regardless of length. */
    1747        1666 :                 blob->use_extent_table = false;
    1748             :         }
    1749             : 
    1750             :         /* Check the clear_method stored in metadata vs what may have been passed
    1751             :          * via spdk_bs_open_blob_ext() and update accordingly.
    1752             :          */
    1753        3434 :         blob_update_clear_method(blob);
    1754             : 
    1755        3434 :         spdk_free(ctx->pages);
    1756        3434 :         ctx->pages = NULL;
    1757             : 
    1758        3434 :         if (blob->extent_table_found) {
    1759        1768 :                 blob_load_cpl_extents_cpl(seq, ctx, 0);
    1760             :         } else {
    1761        1666 :                 blob_load_backing_dev(seq, ctx);
    1762             :         }
    1763             : }
    1764             : 
    1765             : /* Load a blob from disk given a blobid */
    1766             : static void
    1767        3474 : blob_load(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
    1768             :           spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    1769             : {
    1770             :         struct spdk_blob_load_ctx *ctx;
    1771             :         struct spdk_blob_store *bs;
    1772             :         uint32_t page_num;
    1773             :         uint64_t lba;
    1774             : 
    1775        3474 :         blob_verify_md_op(blob);
    1776             : 
    1777        3474 :         bs = blob->bs;
    1778             : 
    1779        3474 :         ctx = calloc(1, sizeof(*ctx));
    1780        3474 :         if (!ctx) {
    1781           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    1782           0 :                 return;
    1783             :         }
    1784             : 
    1785        3474 :         ctx->blob = blob;
    1786        3474 :         ctx->pages = spdk_realloc(ctx->pages, SPDK_BS_PAGE_SIZE, 0);
    1787        3474 :         if (!ctx->pages) {
    1788           0 :                 free(ctx);
    1789           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    1790           0 :                 return;
    1791             :         }
    1792        3474 :         ctx->num_pages = 1;
    1793        3474 :         ctx->cb_fn = cb_fn;
    1794        3474 :         ctx->cb_arg = cb_arg;
    1795        3474 :         ctx->seq = seq;
    1796             : 
    1797        3474 :         page_num = bs_blobid_to_page(blob->id);
    1798        3474 :         lba = bs_md_page_to_lba(blob->bs, page_num);
    1799             : 
    1800        3474 :         blob->state = SPDK_BLOB_STATE_LOADING;
    1801             : 
    1802        3474 :         bs_sequence_read_dev(seq, &ctx->pages[0], lba,
    1803        3474 :                              bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE),
    1804             :                              blob_load_cpl, ctx);
    1805             : }
    1806             : 
    1807             : struct spdk_blob_persist_ctx {
    1808             :         struct spdk_blob                *blob;
    1809             : 
    1810             :         struct spdk_blob_md_page        *pages;
    1811             :         uint32_t                        next_extent_page;
    1812             :         struct spdk_blob_md_page        *extent_page;
    1813             : 
    1814             :         spdk_bs_sequence_t              *seq;
    1815             :         spdk_bs_sequence_cpl            cb_fn;
    1816             :         void                            *cb_arg;
    1817             :         TAILQ_ENTRY(spdk_blob_persist_ctx) link;
    1818             : };
    1819             : 
    1820             : static void
    1821        1262 : bs_batch_clear_dev(struct spdk_blob *blob, spdk_bs_batch_t *batch, uint64_t lba,
    1822             :                    uint64_t lba_count)
    1823             : {
    1824        1262 :         switch (blob->clear_method) {
    1825        1262 :         case BLOB_CLEAR_WITH_DEFAULT:
    1826             :         case BLOB_CLEAR_WITH_UNMAP:
    1827        1262 :                 bs_batch_unmap_dev(batch, lba, lba_count);
    1828        1262 :                 break;
    1829           0 :         case BLOB_CLEAR_WITH_WRITE_ZEROES:
    1830           0 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    1831           0 :                 break;
    1832           0 :         case BLOB_CLEAR_WITH_NONE:
    1833             :         default:
    1834           0 :                 break;
    1835             :         }
    1836        1262 : }
    1837             : 
    1838             : static int
    1839        1152 : bs_super_validate(struct spdk_bs_super_block *super, struct spdk_blob_store *bs)
    1840             : {
    1841             :         uint32_t        crc;
    1842             :         static const char zeros[SPDK_BLOBSTORE_TYPE_LENGTH];
    1843             : 
    1844        1152 :         if (super->version > SPDK_BS_VERSION ||
    1845        1148 :             super->version < SPDK_BS_INITIAL_VERSION) {
    1846           8 :                 return -EILSEQ;
    1847             :         }
    1848             : 
    1849        1144 :         if (memcmp(super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    1850             :                    sizeof(super->signature)) != 0) {
    1851           0 :                 return -EILSEQ;
    1852             :         }
    1853             : 
    1854        1144 :         crc = blob_md_page_calc_crc(super);
    1855        1144 :         if (crc != super->crc) {
    1856           4 :                 return -EILSEQ;
    1857             :         }
    1858             : 
    1859        1140 :         if (memcmp(&bs->bstype, &super->bstype, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
    1860        1126 :                 SPDK_DEBUGLOG(blob, "Bstype matched - loading blobstore\n");
    1861          14 :         } else if (memcmp(&bs->bstype, zeros, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
    1862           6 :                 SPDK_DEBUGLOG(blob, "Bstype wildcard used - loading blobstore regardless bstype\n");
    1863             :         } else {
    1864           8 :                 SPDK_DEBUGLOG(blob, "Unexpected bstype\n");
    1865           8 :                 SPDK_LOGDUMP(blob, "Expected:", bs->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
    1866           8 :                 SPDK_LOGDUMP(blob, "Found:", super->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
    1867           8 :                 return -ENXIO;
    1868             :         }
    1869             : 
    1870        1132 :         if (super->size > bs->dev->blockcnt * bs->dev->blocklen) {
    1871           8 :                 SPDK_NOTICELOG("Size mismatch, dev size: %" PRIu64 ", blobstore size: %" PRIu64 "\n",
    1872             :                                bs->dev->blockcnt * bs->dev->blocklen, super->size);
    1873           8 :                 return -EILSEQ;
    1874             :         }
    1875             : 
    1876        1124 :         return 0;
    1877             : }
    1878             : 
    1879             : static void bs_mark_dirty(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    1880             :                           spdk_bs_sequence_cpl cb_fn, void *cb_arg);
    1881             : 
    1882             : static void
    1883        5116 : blob_persist_complete_cb(void *arg)
    1884             : {
    1885        5116 :         struct spdk_blob_persist_ctx *ctx = arg;
    1886             : 
    1887             :         /* Call user callback */
    1888        5116 :         ctx->cb_fn(ctx->seq, ctx->cb_arg, 0);
    1889             : 
    1890             :         /* Free the memory */
    1891        5116 :         spdk_free(ctx->pages);
    1892        5116 :         free(ctx);
    1893        5116 : }
    1894             : 
    1895             : static void blob_persist_start(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
    1896             : 
    1897             : static void
    1898        5116 : blob_persist_complete(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx, int bserrno)
    1899             : {
    1900             :         struct spdk_blob_persist_ctx    *next_persist, *tmp;
    1901        5116 :         struct spdk_blob                *blob = ctx->blob;
    1902             : 
    1903        5116 :         if (bserrno == 0) {
    1904        5064 :                 blob_mark_clean(blob);
    1905             :         }
    1906             : 
    1907        5116 :         assert(ctx == TAILQ_FIRST(&blob->persists_to_complete));
    1908             : 
    1909             :         /* Complete all persists that were pending when the current persist started */
    1910       10232 :         TAILQ_FOREACH_SAFE(next_persist, &blob->persists_to_complete, link, tmp) {
    1911        5116 :                 TAILQ_REMOVE(&blob->persists_to_complete, next_persist, link);
    1912        5116 :                 spdk_thread_send_msg(spdk_get_thread(), blob_persist_complete_cb, next_persist);
    1913             :         }
    1914             : 
    1915        5116 :         if (TAILQ_EMPTY(&blob->pending_persists)) {
    1916        5093 :                 return;
    1917             :         }
    1918             : 
    1919             :         /* Queue up all pending persists for completion and start blob persist with first one */
    1920          23 :         TAILQ_SWAP(&blob->persists_to_complete, &blob->pending_persists, spdk_blob_persist_ctx, link);
    1921          23 :         next_persist = TAILQ_FIRST(&blob->persists_to_complete);
    1922             : 
    1923          23 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    1924          23 :         bs_mark_dirty(seq, blob->bs, blob_persist_start, next_persist);
    1925             : }
    1926             : 
    1927             : static void
    1928        5064 : blob_persist_clear_extents_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1929             : {
    1930        5064 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    1931        5064 :         struct spdk_blob                *blob = ctx->blob;
    1932        5064 :         struct spdk_blob_store          *bs = blob->bs;
    1933             :         size_t                          i;
    1934             : 
    1935        5064 :         if (bserrno != 0) {
    1936           0 :                 blob_persist_complete(seq, ctx, bserrno);
    1937           0 :                 return;
    1938             :         }
    1939             : 
    1940        5064 :         spdk_spin_lock(&bs->used_lock);
    1941             : 
    1942             :         /* Release all extent_pages that were truncated */
    1943        6800 :         for (i = blob->active.num_extent_pages; i < blob->active.extent_pages_array_size; i++) {
    1944             :                 /* Nothing to release if it was not allocated */
    1945        1736 :                 if (blob->active.extent_pages[i] != 0) {
    1946         626 :                         bs_release_md_page(bs, blob->active.extent_pages[i]);
    1947             :                 }
    1948             :         }
    1949             : 
    1950        5064 :         spdk_spin_unlock(&bs->used_lock);
    1951             : 
    1952        5064 :         if (blob->active.num_extent_pages == 0) {
    1953        3651 :                 free(blob->active.extent_pages);
    1954        3651 :                 blob->active.extent_pages = NULL;
    1955        3651 :                 blob->active.extent_pages_array_size = 0;
    1956        1413 :         } else if (blob->active.num_extent_pages != blob->active.extent_pages_array_size) {
    1957             : #ifndef __clang_analyzer__
    1958             :                 void *tmp;
    1959             : 
    1960             :                 /* scan-build really can't figure reallocs, workaround it */
    1961           2 :                 tmp = realloc(blob->active.extent_pages, sizeof(uint32_t) * blob->active.num_extent_pages);
    1962           2 :                 assert(tmp != NULL);
    1963           2 :                 blob->active.extent_pages = tmp;
    1964             : #endif
    1965           2 :                 blob->active.extent_pages_array_size = blob->active.num_extent_pages;
    1966             :         }
    1967             : 
    1968        5064 :         blob_persist_complete(seq, ctx, bserrno);
    1969             : }
    1970             : 
    1971             : static void
    1972        5064 : blob_persist_clear_extents(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    1973             : {
    1974        5064 :         struct spdk_blob                *blob = ctx->blob;
    1975        5064 :         struct spdk_blob_store          *bs = blob->bs;
    1976             :         size_t                          i;
    1977             :         uint64_t                        lba;
    1978             :         uint64_t                        lba_count;
    1979             :         spdk_bs_batch_t                 *batch;
    1980             : 
    1981        5064 :         batch = bs_sequence_to_batch(seq, blob_persist_clear_extents_cpl, ctx);
    1982        5064 :         lba_count = bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE);
    1983             : 
    1984             :         /* Clear all extent_pages that were truncated */
    1985        6800 :         for (i = blob->active.num_extent_pages; i < blob->active.extent_pages_array_size; i++) {
    1986             :                 /* Nothing to clear if it was not allocated */
    1987        1736 :                 if (blob->active.extent_pages[i] != 0) {
    1988         626 :                         lba = bs_md_page_to_lba(bs, blob->active.extent_pages[i]);
    1989         626 :                         bs_batch_write_zeroes_dev(batch, lba, lba_count);
    1990             :                 }
    1991             :         }
    1992             : 
    1993        5064 :         bs_batch_close(batch);
    1994        5064 : }
    1995             : 
    1996             : static void
    1997        5064 : blob_persist_clear_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1998             : {
    1999        5064 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2000        5064 :         struct spdk_blob                *blob = ctx->blob;
    2001        5064 :         struct spdk_blob_store          *bs = blob->bs;
    2002             :         size_t                          i;
    2003             : 
    2004        5064 :         if (bserrno != 0) {
    2005           0 :                 blob_persist_complete(seq, ctx, bserrno);
    2006           0 :                 return;
    2007             :         }
    2008             : 
    2009        5064 :         spdk_spin_lock(&bs->used_lock);
    2010             :         /* Release all clusters that were truncated */
    2011     1074111 :         for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
    2012     1069047 :                 uint32_t cluster_num = bs_lba_to_cluster(bs, blob->active.clusters[i]);
    2013             : 
    2014             :                 /* Nothing to release if it was not allocated */
    2015     1069047 :                 if (blob->active.clusters[i] != 0) {
    2016        2343 :                         bs_release_cluster(bs, cluster_num);
    2017             :                 }
    2018             :         }
    2019        5064 :         spdk_spin_unlock(&bs->used_lock);
    2020             : 
    2021        5064 :         if (blob->active.num_clusters == 0) {
    2022        1944 :                 free(blob->active.clusters);
    2023        1944 :                 blob->active.clusters = NULL;
    2024        1944 :                 blob->active.cluster_array_size = 0;
    2025        3120 :         } else if (blob->active.num_clusters != blob->active.cluster_array_size) {
    2026             : #ifndef __clang_analyzer__
    2027             :                 void *tmp;
    2028             : 
    2029             :                 /* scan-build really can't figure reallocs, workaround it */
    2030          14 :                 tmp = realloc(blob->active.clusters, sizeof(*blob->active.clusters) * blob->active.num_clusters);
    2031          14 :                 assert(tmp != NULL);
    2032          14 :                 blob->active.clusters = tmp;
    2033             : 
    2034             : #endif
    2035          14 :                 blob->active.cluster_array_size = blob->active.num_clusters;
    2036             :         }
    2037             : 
    2038             :         /* Move on to clearing extent pages */
    2039        5064 :         blob_persist_clear_extents(seq, ctx);
    2040             : }
    2041             : 
    2042             : static void
    2043        5064 : blob_persist_clear_clusters(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    2044             : {
    2045        5064 :         struct spdk_blob                *blob = ctx->blob;
    2046        5064 :         struct spdk_blob_store          *bs = blob->bs;
    2047             :         spdk_bs_batch_t                 *batch;
    2048             :         size_t                          i;
    2049             :         uint64_t                        lba;
    2050             :         uint64_t                        lba_count;
    2051             : 
    2052             :         /* Clusters don't move around in blobs. The list shrinks or grows
    2053             :          * at the end, but no changes ever occur in the middle of the list.
    2054             :          */
    2055             : 
    2056        5064 :         batch = bs_sequence_to_batch(seq, blob_persist_clear_clusters_cpl, ctx);
    2057             : 
    2058             :         /* Clear all clusters that were truncated */
    2059        5064 :         lba = 0;
    2060        5064 :         lba_count = 0;
    2061     1074111 :         for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
    2062     1069047 :                 uint64_t next_lba = blob->active.clusters[i];
    2063     1069047 :                 uint64_t next_lba_count = bs_cluster_to_lba(bs, 1);
    2064             : 
    2065     1069047 :                 if (next_lba > 0 && (lba + lba_count) == next_lba) {
    2066             :                         /* This cluster is contiguous with the previous one. */
    2067        1085 :                         lba_count += next_lba_count;
    2068        1085 :                         continue;
    2069     1067962 :                 } else if (next_lba == 0) {
    2070     1066704 :                         continue;
    2071             :                 }
    2072             : 
    2073             :                 /* This cluster is not contiguous with the previous one. */
    2074             : 
    2075             :                 /* If a run of LBAs previously existing, clear them now */
    2076        1258 :                 if (lba_count > 0) {
    2077          36 :                         bs_batch_clear_dev(ctx->blob, batch, lba, lba_count);
    2078             :                 }
    2079             : 
    2080             :                 /* Start building the next batch */
    2081        1258 :                 lba = next_lba;
    2082        1258 :                 if (next_lba > 0) {
    2083        1258 :                         lba_count = next_lba_count;
    2084             :                 } else {
    2085           0 :                         lba_count = 0;
    2086             :                 }
    2087             :         }
    2088             : 
    2089             :         /* If we ended with a contiguous set of LBAs, clear them now */
    2090        5064 :         if (lba_count > 0) {
    2091        1222 :                 bs_batch_clear_dev(ctx->blob, batch, lba, lba_count);
    2092             :         }
    2093             : 
    2094        5064 :         bs_batch_close(batch);
    2095        5064 : }
    2096             : 
    2097             : static void
    2098        5068 : blob_persist_zero_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2099             : {
    2100        5068 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2101        5068 :         struct spdk_blob                *blob = ctx->blob;
    2102        5068 :         struct spdk_blob_store          *bs = blob->bs;
    2103             :         size_t                          i;
    2104             : 
    2105        5068 :         if (bserrno != 0) {
    2106           4 :                 blob_persist_complete(seq, ctx, bserrno);
    2107           4 :                 return;
    2108             :         }
    2109             : 
    2110        5064 :         spdk_spin_lock(&bs->used_lock);
    2111             : 
    2112             :         /* This loop starts at 1 because the first page is special and handled
    2113             :          * below. The pages (except the first) are never written in place,
    2114             :          * so any pages in the clean list must be zeroed.
    2115             :          */
    2116        5132 :         for (i = 1; i < blob->clean.num_pages; i++) {
    2117          68 :                 bs_release_md_page(bs, blob->clean.pages[i]);
    2118             :         }
    2119             : 
    2120        5064 :         if (blob->active.num_pages == 0) {
    2121             :                 uint32_t page_num;
    2122             : 
    2123        1488 :                 page_num = bs_blobid_to_page(blob->id);
    2124        1488 :                 bs_release_md_page(bs, page_num);
    2125             :         }
    2126             : 
    2127        5064 :         spdk_spin_unlock(&bs->used_lock);
    2128             : 
    2129             :         /* Move on to clearing clusters */
    2130        5064 :         blob_persist_clear_clusters(seq, ctx);
    2131             : }
    2132             : 
    2133             : static void
    2134        5108 : blob_persist_zero_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2135             : {
    2136        5108 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2137        5108 :         struct spdk_blob                *blob = ctx->blob;
    2138        5108 :         struct spdk_blob_store          *bs = blob->bs;
    2139             :         uint64_t                        lba;
    2140             :         uint64_t                        lba_count;
    2141             :         spdk_bs_batch_t                 *batch;
    2142             :         size_t                          i;
    2143             : 
    2144        5108 :         if (bserrno != 0) {
    2145          40 :                 blob_persist_complete(seq, ctx, bserrno);
    2146          40 :                 return;
    2147             :         }
    2148             : 
    2149        5068 :         batch = bs_sequence_to_batch(seq, blob_persist_zero_pages_cpl, ctx);
    2150             : 
    2151        5068 :         lba_count = bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE);
    2152             : 
    2153             :         /* This loop starts at 1 because the first page is special and handled
    2154             :          * below. The pages (except the first) are never written in place,
    2155             :          * so any pages in the clean list must be zeroed.
    2156             :          */
    2157        5136 :         for (i = 1; i < blob->clean.num_pages; i++) {
    2158          68 :                 lba = bs_md_page_to_lba(bs, blob->clean.pages[i]);
    2159             : 
    2160          68 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    2161             :         }
    2162             : 
    2163             :         /* The first page will only be zeroed if this is a delete. */
    2164        5068 :         if (blob->active.num_pages == 0) {
    2165             :                 uint32_t page_num;
    2166             : 
    2167             :                 /* The first page in the metadata goes where the blobid indicates */
    2168        1492 :                 page_num = bs_blobid_to_page(blob->id);
    2169        1492 :                 lba = bs_md_page_to_lba(bs, page_num);
    2170             : 
    2171        1492 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    2172             :         }
    2173             : 
    2174        5068 :         bs_batch_close(batch);
    2175             : }
    2176             : 
    2177             : static void
    2178        3616 : blob_persist_write_page_root(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2179             : {
    2180        3616 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2181        3616 :         struct spdk_blob                *blob = ctx->blob;
    2182        3616 :         struct spdk_blob_store          *bs = blob->bs;
    2183             :         uint64_t                        lba;
    2184             :         uint32_t                        lba_count;
    2185             :         struct spdk_blob_md_page        *page;
    2186             : 
    2187        3616 :         if (bserrno != 0) {
    2188           0 :                 blob_persist_complete(seq, ctx, bserrno);
    2189           0 :                 return;
    2190             :         }
    2191             : 
    2192        3616 :         if (blob->active.num_pages == 0) {
    2193             :                 /* Move on to the next step */
    2194           0 :                 blob_persist_zero_pages(seq, ctx, 0);
    2195           0 :                 return;
    2196             :         }
    2197             : 
    2198        3616 :         lba_count = bs_byte_to_lba(bs, sizeof(*page));
    2199             : 
    2200        3616 :         page = &ctx->pages[0];
    2201             :         /* The first page in the metadata goes where the blobid indicates */
    2202        3616 :         lba = bs_md_page_to_lba(bs, bs_blobid_to_page(blob->id));
    2203             : 
    2204        3616 :         bs_sequence_write_dev(seq, page, lba, lba_count,
    2205             :                               blob_persist_zero_pages, ctx);
    2206             : }
    2207             : 
    2208             : static void
    2209        3616 : blob_persist_write_page_chain(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    2210             : {
    2211        3616 :         struct spdk_blob                *blob = ctx->blob;
    2212        3616 :         struct spdk_blob_store          *bs = blob->bs;
    2213             :         uint64_t                        lba;
    2214             :         uint32_t                        lba_count;
    2215             :         struct spdk_blob_md_page        *page;
    2216             :         spdk_bs_batch_t                 *batch;
    2217             :         size_t                          i;
    2218             : 
    2219             :         /* Clusters don't move around in blobs. The list shrinks or grows
    2220             :          * at the end, but no changes ever occur in the middle of the list.
    2221             :          */
    2222             : 
    2223        3616 :         lba_count = bs_byte_to_lba(bs, sizeof(*page));
    2224             : 
    2225        3616 :         batch = bs_sequence_to_batch(seq, blob_persist_write_page_root, ctx);
    2226             : 
    2227             :         /* This starts at 1. The root page is not written until
    2228             :          * all of the others are finished
    2229             :          */
    2230        3704 :         for (i = 1; i < blob->active.num_pages; i++) {
    2231          88 :                 page = &ctx->pages[i];
    2232          88 :                 assert(page->sequence_num == i);
    2233             : 
    2234          88 :                 lba = bs_md_page_to_lba(bs, blob->active.pages[i]);
    2235             : 
    2236          88 :                 bs_batch_write_dev(batch, page, lba, lba_count);
    2237             :         }
    2238             : 
    2239        3616 :         bs_batch_close(batch);
    2240        3616 : }
    2241             : 
    2242             : static int
    2243        3576 : blob_resize(struct spdk_blob *blob, uint64_t sz)
    2244             : {
    2245             :         uint64_t        i;
    2246             :         uint64_t        *tmp;
    2247        3576 :         uint64_t        cluster;
    2248        3576 :         uint32_t        lfmd; /*  lowest free md page */
    2249             :         uint64_t        num_clusters;
    2250             :         uint32_t        *ep_tmp;
    2251        3576 :         uint64_t        new_num_ep = 0, current_num_ep = 0;
    2252             :         struct spdk_blob_store *bs;
    2253             :         int             rc;
    2254             : 
    2255        3576 :         bs = blob->bs;
    2256             : 
    2257        3576 :         blob_verify_md_op(blob);
    2258             : 
    2259        3576 :         if (blob->active.num_clusters == sz) {
    2260         456 :                 return 0;
    2261             :         }
    2262             : 
    2263        3120 :         if (blob->active.num_clusters < blob->active.cluster_array_size) {
    2264             :                 /* If this blob was resized to be larger, then smaller, then
    2265             :                  * larger without syncing, then the cluster array already
    2266             :                  * contains spare assigned clusters we can use.
    2267             :                  */
    2268           0 :                 num_clusters = spdk_min(blob->active.cluster_array_size,
    2269             :                                         sz);
    2270             :         } else {
    2271        3120 :                 num_clusters = blob->active.num_clusters;
    2272             :         }
    2273             : 
    2274        3120 :         if (blob->use_extent_table) {
    2275             :                 /* Round up since every cluster beyond current Extent Table size,
    2276             :                  * requires new extent page. */
    2277        1582 :                 new_num_ep = spdk_divide_round_up(sz, SPDK_EXTENTS_PER_EP);
    2278        1582 :                 current_num_ep = spdk_divide_round_up(num_clusters, SPDK_EXTENTS_PER_EP);
    2279             :         }
    2280             : 
    2281        3120 :         assert(!spdk_spin_held(&bs->used_lock));
    2282             : 
    2283             :         /* Check first that we have enough clusters and md pages before we start claiming them.
    2284             :          * bs->used_lock is held to ensure that clusters we think are free are still free when we go
    2285             :          * to claim them later in this function.
    2286             :          */
    2287        3120 :         if (sz > num_clusters && spdk_blob_is_thin_provisioned(blob) == false) {
    2288        1302 :                 spdk_spin_lock(&bs->used_lock);
    2289        1302 :                 if ((sz - num_clusters) > bs->num_free_clusters) {
    2290           8 :                         rc = -ENOSPC;
    2291           8 :                         goto out;
    2292             :                 }
    2293        1294 :                 lfmd = 0;
    2294        1938 :                 for (i = current_num_ep; i < new_num_ep ; i++) {
    2295         644 :                         lfmd = spdk_bit_array_find_first_clear(blob->bs->used_md_pages, lfmd);
    2296         644 :                         if (lfmd == UINT32_MAX) {
    2297             :                                 /* No more free md pages. Cannot satisfy the request */
    2298           0 :                                 rc = -ENOSPC;
    2299           0 :                                 goto out;
    2300             :                         }
    2301             :                 }
    2302             :         }
    2303             : 
    2304        3112 :         if (sz > num_clusters) {
    2305             :                 /* Expand the cluster array if necessary.
    2306             :                  * We only shrink the array when persisting.
    2307             :                  */
    2308        1706 :                 tmp = realloc(blob->active.clusters, sizeof(*blob->active.clusters) * sz);
    2309        1706 :                 if (sz > 0 && tmp == NULL) {
    2310           0 :                         rc = -ENOMEM;
    2311           0 :                         goto out;
    2312             :                 }
    2313        1706 :                 memset(tmp + blob->active.cluster_array_size, 0,
    2314        1706 :                        sizeof(*blob->active.clusters) * (sz - blob->active.cluster_array_size));
    2315        1706 :                 blob->active.clusters = tmp;
    2316        1706 :                 blob->active.cluster_array_size = sz;
    2317             : 
    2318             :                 /* Expand the extents table, only if enough clusters were added */
    2319        1706 :                 if (new_num_ep > current_num_ep && blob->use_extent_table) {
    2320         842 :                         ep_tmp = realloc(blob->active.extent_pages, sizeof(*blob->active.extent_pages) * new_num_ep);
    2321         842 :                         if (new_num_ep > 0 && ep_tmp == NULL) {
    2322           0 :                                 rc = -ENOMEM;
    2323           0 :                                 goto out;
    2324             :                         }
    2325         842 :                         memset(ep_tmp + blob->active.extent_pages_array_size, 0,
    2326         842 :                                sizeof(*blob->active.extent_pages) * (new_num_ep - blob->active.extent_pages_array_size));
    2327         842 :                         blob->active.extent_pages = ep_tmp;
    2328         842 :                         blob->active.extent_pages_array_size = new_num_ep;
    2329             :                 }
    2330             :         }
    2331             : 
    2332        3112 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    2333             : 
    2334        3112 :         if (spdk_blob_is_thin_provisioned(blob) == false) {
    2335        2428 :                 cluster = 0;
    2336        2428 :                 lfmd = 0;
    2337        9832 :                 for (i = num_clusters; i < sz; i++) {
    2338        7404 :                         bs_allocate_cluster(blob, i, &cluster, &lfmd, true);
    2339             :                         /* Do not increment lfmd here.  lfmd will get updated
    2340             :                          * to the md_page allocated (if any) when a new extent
    2341             :                          * page is needed.  Just pass that value again,
    2342             :                          * bs_allocate_cluster will just start at that index
    2343             :                          * to find the next free md_page when needed.
    2344             :                          */
    2345             :                 }
    2346             :         }
    2347             : 
    2348             :         /* If we are shrinking the blob, we must adjust num_allocated_clusters */
    2349     1072199 :         for (i = sz; i < num_clusters; i++) {
    2350     1069087 :                 if (blob->active.clusters[i] != 0) {
    2351        2343 :                         blob->active.num_allocated_clusters--;
    2352             :                 }
    2353             :         }
    2354             : 
    2355        3112 :         blob->active.num_clusters = sz;
    2356        3112 :         blob->active.num_extent_pages = new_num_ep;
    2357             : 
    2358        3112 :         rc = 0;
    2359        3120 : out:
    2360        3120 :         if (spdk_spin_held(&bs->used_lock)) {
    2361        1302 :                 spdk_spin_unlock(&bs->used_lock);
    2362             :         }
    2363             : 
    2364        3120 :         return rc;
    2365             : }
    2366             : 
    2367             : static void
    2368        3616 : blob_persist_generate_new_md(struct spdk_blob_persist_ctx *ctx)
    2369             : {
    2370        3616 :         spdk_bs_sequence_t *seq = ctx->seq;
    2371        3616 :         struct spdk_blob *blob = ctx->blob;
    2372        3616 :         struct spdk_blob_store *bs = blob->bs;
    2373             :         uint64_t i;
    2374             :         uint32_t page_num;
    2375             :         void *tmp;
    2376             :         int rc;
    2377             : 
    2378             :         /* Generate the new metadata */
    2379        3616 :         rc = blob_serialize(blob, &ctx->pages, &blob->active.num_pages);
    2380        3616 :         if (rc < 0) {
    2381           0 :                 blob_persist_complete(seq, ctx, rc);
    2382           0 :                 return;
    2383             :         }
    2384             : 
    2385        3616 :         assert(blob->active.num_pages >= 1);
    2386             : 
    2387             :         /* Resize the cache of page indices */
    2388        3616 :         tmp = realloc(blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
    2389        3616 :         if (!tmp) {
    2390           0 :                 blob_persist_complete(seq, ctx, -ENOMEM);
    2391           0 :                 return;
    2392             :         }
    2393        3616 :         blob->active.pages = tmp;
    2394             : 
    2395             :         /* Assign this metadata to pages. This requires two passes - one to verify that there are
    2396             :          * enough pages and a second to actually claim them. The used_lock is held across
    2397             :          * both passes to ensure things don't change in the middle.
    2398             :          */
    2399        3616 :         spdk_spin_lock(&bs->used_lock);
    2400        3616 :         page_num = 0;
    2401             :         /* Note that this loop starts at one. The first page location is fixed by the blobid. */
    2402        3704 :         for (i = 1; i < blob->active.num_pages; i++) {
    2403          88 :                 page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
    2404          88 :                 if (page_num == UINT32_MAX) {
    2405           0 :                         spdk_spin_unlock(&bs->used_lock);
    2406           0 :                         blob_persist_complete(seq, ctx, -ENOMEM);
    2407           0 :                         return;
    2408             :                 }
    2409          88 :                 page_num++;
    2410             :         }
    2411             : 
    2412        3616 :         page_num = 0;
    2413        3616 :         blob->active.pages[0] = bs_blobid_to_page(blob->id);
    2414        3704 :         for (i = 1; i < blob->active.num_pages; i++) {
    2415          88 :                 page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
    2416          88 :                 ctx->pages[i - 1].next = page_num;
    2417             :                 /* Now that previous metadata page is complete, calculate the crc for it. */
    2418          88 :                 ctx->pages[i - 1].crc = blob_md_page_calc_crc(&ctx->pages[i - 1]);
    2419          88 :                 blob->active.pages[i] = page_num;
    2420          88 :                 bs_claim_md_page(bs, page_num);
    2421          88 :                 SPDK_DEBUGLOG(blob, "Claiming page %u for blob 0x%" PRIx64 "\n", page_num,
    2422             :                               blob->id);
    2423          88 :                 page_num++;
    2424             :         }
    2425        3616 :         spdk_spin_unlock(&bs->used_lock);
    2426        3616 :         ctx->pages[i - 1].crc = blob_md_page_calc_crc(&ctx->pages[i - 1]);
    2427             :         /* Start writing the metadata from last page to first */
    2428        3616 :         blob->state = SPDK_BLOB_STATE_CLEAN;
    2429        3616 :         blob_persist_write_page_chain(seq, ctx);
    2430             : }
    2431             : 
    2432             : static void
    2433        2358 : blob_persist_write_extent_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2434             : {
    2435        2358 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2436        2358 :         struct spdk_blob                *blob = ctx->blob;
    2437             :         size_t                          i;
    2438             :         uint32_t                        extent_page_id;
    2439        2358 :         uint32_t                        page_count = 0;
    2440             :         int                             rc;
    2441             : 
    2442        2358 :         if (ctx->extent_page != NULL) {
    2443         666 :                 spdk_free(ctx->extent_page);
    2444         666 :                 ctx->extent_page = NULL;
    2445             :         }
    2446             : 
    2447        2358 :         if (bserrno != 0) {
    2448           0 :                 blob_persist_complete(seq, ctx, bserrno);
    2449           0 :                 return;
    2450             :         }
    2451             : 
    2452             :         /* Only write out Extent Pages when blob was resized. */
    2453        4614 :         for (i = ctx->next_extent_page; i < blob->active.extent_pages_array_size; i++) {
    2454        2922 :                 extent_page_id = blob->active.extent_pages[i];
    2455        2922 :                 if (extent_page_id == 0) {
    2456             :                         /* No Extent Page to persist */
    2457        2256 :                         assert(spdk_blob_is_thin_provisioned(blob));
    2458        2256 :                         continue;
    2459             :                 }
    2460         666 :                 assert(spdk_bit_array_get(blob->bs->used_md_pages, extent_page_id));
    2461         666 :                 ctx->next_extent_page = i + 1;
    2462         666 :                 rc = blob_serialize_add_page(ctx->blob, &ctx->extent_page, &page_count, &ctx->extent_page);
    2463         666 :                 if (rc < 0) {
    2464           0 :                         blob_persist_complete(seq, ctx, rc);
    2465           0 :                         return;
    2466             :                 }
    2467             : 
    2468         666 :                 blob->state = SPDK_BLOB_STATE_DIRTY;
    2469         666 :                 blob_serialize_extent_page(blob, i * SPDK_EXTENTS_PER_EP, ctx->extent_page);
    2470             : 
    2471         666 :                 ctx->extent_page->crc = blob_md_page_calc_crc(ctx->extent_page);
    2472             : 
    2473         666 :                 bs_sequence_write_dev(seq, ctx->extent_page, bs_md_page_to_lba(blob->bs, extent_page_id),
    2474         666 :                                       bs_byte_to_lba(blob->bs, SPDK_BS_PAGE_SIZE),
    2475             :                                       blob_persist_write_extent_pages, ctx);
    2476         666 :                 return;
    2477             :         }
    2478             : 
    2479        1692 :         blob_persist_generate_new_md(ctx);
    2480             : }
    2481             : 
    2482             : static void
    2483        5116 : blob_persist_start(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2484             : {
    2485        5116 :         struct spdk_blob_persist_ctx *ctx = cb_arg;
    2486        5116 :         struct spdk_blob *blob = ctx->blob;
    2487             : 
    2488        5116 :         if (bserrno != 0) {
    2489           8 :                 blob_persist_complete(seq, ctx, bserrno);
    2490           8 :                 return;
    2491             :         }
    2492             : 
    2493        5108 :         if (blob->active.num_pages == 0) {
    2494             :                 /* This is the signal that the blob should be deleted.
    2495             :                  * Immediately jump to the clean up routine. */
    2496        1492 :                 assert(blob->clean.num_pages > 0);
    2497        1492 :                 blob->state = SPDK_BLOB_STATE_CLEAN;
    2498        1492 :                 blob_persist_zero_pages(seq, ctx, 0);
    2499        1492 :                 return;
    2500             : 
    2501             :         }
    2502             : 
    2503        3616 :         if (blob->clean.num_clusters < blob->active.num_clusters) {
    2504             :                 /* Blob was resized up */
    2505        1678 :                 assert(blob->clean.num_extent_pages <= blob->active.num_extent_pages);
    2506        1678 :                 ctx->next_extent_page = spdk_max(1, blob->clean.num_extent_pages) - 1;
    2507        1938 :         } else if (blob->active.num_clusters < blob->active.cluster_array_size) {
    2508             :                 /* Blob was resized down */
    2509          14 :                 assert(blob->clean.num_extent_pages >= blob->active.num_extent_pages);
    2510          14 :                 ctx->next_extent_page = spdk_max(1, blob->active.num_extent_pages) - 1;
    2511             :         } else {
    2512             :                 /* No change in size occurred */
    2513        1924 :                 blob_persist_generate_new_md(ctx);
    2514        1924 :                 return;
    2515             :         }
    2516             : 
    2517        1692 :         blob_persist_write_extent_pages(seq, ctx, 0);
    2518             : }
    2519             : 
    2520             : struct spdk_bs_mark_dirty {
    2521             :         struct spdk_blob_store          *bs;
    2522             :         struct spdk_bs_super_block      *super;
    2523             :         spdk_bs_sequence_cpl            cb_fn;
    2524             :         void                            *cb_arg;
    2525             : };
    2526             : 
    2527             : static void
    2528         158 : bs_mark_dirty_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2529             : {
    2530         158 :         struct spdk_bs_mark_dirty *ctx = cb_arg;
    2531             : 
    2532         158 :         if (bserrno == 0) {
    2533         150 :                 ctx->bs->clean = 0;
    2534             :         }
    2535             : 
    2536         158 :         ctx->cb_fn(seq, ctx->cb_arg, bserrno);
    2537             : 
    2538         158 :         spdk_free(ctx->super);
    2539         158 :         free(ctx);
    2540         158 : }
    2541             : 
    2542             : static void bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    2543             :                            struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg);
    2544             : 
    2545             : 
    2546             : static void
    2547         158 : bs_mark_dirty_write(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2548             : {
    2549         158 :         struct spdk_bs_mark_dirty *ctx = cb_arg;
    2550             :         int rc;
    2551             : 
    2552         158 :         if (bserrno != 0) {
    2553           4 :                 bs_mark_dirty_write_cpl(seq, ctx, bserrno);
    2554           4 :                 return;
    2555             :         }
    2556             : 
    2557         154 :         rc = bs_super_validate(ctx->super, ctx->bs);
    2558         154 :         if (rc != 0) {
    2559           0 :                 bs_mark_dirty_write_cpl(seq, ctx, rc);
    2560           0 :                 return;
    2561             :         }
    2562             : 
    2563         154 :         ctx->super->clean = 0;
    2564         154 :         if (ctx->super->size == 0) {
    2565           4 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    2566             :         }
    2567             : 
    2568         154 :         bs_write_super(seq, ctx->bs, ctx->super, bs_mark_dirty_write_cpl, ctx);
    2569             : }
    2570             : 
    2571             : static void
    2572        5550 : bs_mark_dirty(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    2573             :               spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    2574             : {
    2575             :         struct spdk_bs_mark_dirty *ctx;
    2576             : 
    2577             :         /* Blobstore is already marked dirty */
    2578        5550 :         if (bs->clean == 0) {
    2579        5392 :                 cb_fn(seq, cb_arg, 0);
    2580        5392 :                 return;
    2581             :         }
    2582             : 
    2583         158 :         ctx = calloc(1, sizeof(*ctx));
    2584         158 :         if (!ctx) {
    2585           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2586           0 :                 return;
    2587             :         }
    2588         158 :         ctx->bs = bs;
    2589         158 :         ctx->cb_fn = cb_fn;
    2590         158 :         ctx->cb_arg = cb_arg;
    2591             : 
    2592         158 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    2593             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    2594         158 :         if (!ctx->super) {
    2595           0 :                 free(ctx);
    2596           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2597           0 :                 return;
    2598             :         }
    2599             : 
    2600         158 :         bs_sequence_read_dev(seq, ctx->super, bs_page_to_lba(bs, 0),
    2601         158 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    2602             :                              bs_mark_dirty_write, ctx);
    2603             : }
    2604             : 
    2605             : /* Write a blob to disk */
    2606             : static void
    2607        9144 : blob_persist(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
    2608             :              spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    2609             : {
    2610             :         struct spdk_blob_persist_ctx *ctx;
    2611             : 
    2612        9144 :         blob_verify_md_op(blob);
    2613             : 
    2614        9144 :         if (blob->state == SPDK_BLOB_STATE_CLEAN && TAILQ_EMPTY(&blob->persists_to_complete)) {
    2615        4028 :                 cb_fn(seq, cb_arg, 0);
    2616        4028 :                 return;
    2617             :         }
    2618             : 
    2619        5116 :         ctx = calloc(1, sizeof(*ctx));
    2620        5116 :         if (!ctx) {
    2621           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2622           0 :                 return;
    2623             :         }
    2624        5116 :         ctx->blob = blob;
    2625        5116 :         ctx->seq = seq;
    2626        5116 :         ctx->cb_fn = cb_fn;
    2627        5116 :         ctx->cb_arg = cb_arg;
    2628             : 
    2629             :         /* Multiple blob persists can affect one another, via blob->state or
    2630             :          * blob mutable data changes. To prevent it, queue up the persists. */
    2631        5116 :         if (!TAILQ_EMPTY(&blob->persists_to_complete)) {
    2632          23 :                 TAILQ_INSERT_TAIL(&blob->pending_persists, ctx, link);
    2633          23 :                 return;
    2634             :         }
    2635        5093 :         TAILQ_INSERT_HEAD(&blob->persists_to_complete, ctx, link);
    2636             : 
    2637        5093 :         bs_mark_dirty(seq, blob->bs, blob_persist_start, ctx);
    2638             : }
    2639             : 
    2640             : struct spdk_blob_copy_cluster_ctx {
    2641             :         struct spdk_blob *blob;
    2642             :         uint8_t *buf;
    2643             :         uint64_t page;
    2644             :         uint64_t new_cluster;
    2645             :         uint32_t new_extent_page;
    2646             :         spdk_bs_sequence_t *seq;
    2647             :         struct spdk_blob_md_page *new_cluster_page;
    2648             : };
    2649             : 
    2650             : struct spdk_blob_free_cluster_ctx {
    2651             :         struct spdk_blob *blob;
    2652             :         uint64_t page;
    2653             :         struct spdk_blob_md_page *md_page;
    2654             :         uint64_t cluster_num;
    2655             :         uint32_t extent_page;
    2656             :         spdk_bs_sequence_t *seq;
    2657             : };
    2658             : 
    2659             : static void
    2660         812 : blob_allocate_and_copy_cluster_cpl(void *cb_arg, int bserrno)
    2661             : {
    2662         812 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2663         812 :         struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)ctx->seq;
    2664         812 :         TAILQ_HEAD(, spdk_bs_request_set) requests;
    2665             :         spdk_bs_user_op_t *op;
    2666             : 
    2667         812 :         TAILQ_INIT(&requests);
    2668         812 :         TAILQ_SWAP(&set->channel->need_cluster_alloc, &requests, spdk_bs_request_set, link);
    2669             : 
    2670        1624 :         while (!TAILQ_EMPTY(&requests)) {
    2671         812 :                 op = TAILQ_FIRST(&requests);
    2672         812 :                 TAILQ_REMOVE(&requests, op, link);
    2673         812 :                 if (bserrno == 0) {
    2674         812 :                         bs_user_op_execute(op);
    2675             :                 } else {
    2676           0 :                         bs_user_op_abort(op, bserrno);
    2677             :                 }
    2678             :         }
    2679             : 
    2680         812 :         spdk_free(ctx->buf);
    2681         812 :         free(ctx);
    2682         812 : }
    2683             : 
    2684             : static void
    2685          60 : blob_free_cluster_cpl(void *cb_arg, int bserrno)
    2686             : {
    2687          60 :         struct spdk_blob_free_cluster_ctx *ctx = cb_arg;
    2688          60 :         spdk_bs_sequence_t *seq = ctx->seq;
    2689             : 
    2690          60 :         bs_sequence_finish(seq, bserrno);
    2691             : 
    2692          60 :         free(ctx);
    2693          60 : }
    2694             : 
    2695             : static void
    2696           4 : blob_insert_cluster_revert(struct spdk_blob_copy_cluster_ctx *ctx)
    2697             : {
    2698           4 :         spdk_spin_lock(&ctx->blob->bs->used_lock);
    2699           4 :         bs_release_cluster(ctx->blob->bs, ctx->new_cluster);
    2700           4 :         if (ctx->new_extent_page != 0) {
    2701           2 :                 bs_release_md_page(ctx->blob->bs, ctx->new_extent_page);
    2702             :         }
    2703           4 :         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    2704           4 : }
    2705             : 
    2706             : static void
    2707           4 : blob_insert_cluster_clear_cpl(void *cb_arg, int bserrno)
    2708             : {
    2709           4 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2710             : 
    2711           4 :         if (bserrno) {
    2712           0 :                 SPDK_WARNLOG("Failed to clear cluster: %d\n", bserrno);
    2713             :         }
    2714             : 
    2715           4 :         blob_insert_cluster_revert(ctx);
    2716           4 :         bs_sequence_finish(ctx->seq, bserrno);
    2717           4 : }
    2718             : 
    2719             : static void
    2720           4 : blob_insert_cluster_clear(struct spdk_blob_copy_cluster_ctx *ctx)
    2721             : {
    2722           4 :         struct spdk_bs_cpl cpl;
    2723             :         spdk_bs_batch_t *batch;
    2724           4 :         struct spdk_io_channel *ch = spdk_io_channel_from_ctx(ctx->seq->channel);
    2725             : 
    2726             :         /*
    2727             :          * We allocated a cluster and we copied data to it. But now, we realized that we don't need
    2728             :          * this cluster and we want to release it. We must ensure that we clear the data on this
    2729             :          * cluster.
    2730             :          * The cluster may later be re-allocated by a thick-provisioned blob for example. When
    2731             :          * reading from this thick-provisioned blob before writing data, we should read zeroes.
    2732             :          */
    2733             : 
    2734           4 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    2735           4 :         cpl.u.blob_basic.cb_fn = blob_insert_cluster_clear_cpl;
    2736           4 :         cpl.u.blob_basic.cb_arg = ctx;
    2737             : 
    2738           4 :         batch = bs_batch_open(ch, &cpl, ctx->blob);
    2739           4 :         if (!batch) {
    2740           0 :                 blob_insert_cluster_clear_cpl(ctx, -ENOMEM);
    2741           0 :                 return;
    2742             :         }
    2743             : 
    2744           4 :         bs_batch_clear_dev(ctx->blob, batch, bs_cluster_to_lba(ctx->blob->bs, ctx->new_cluster),
    2745           4 :                            bs_cluster_to_lba(ctx->blob->bs, 1));
    2746           4 :         bs_batch_close(batch);
    2747             : }
    2748             : 
    2749             : static void
    2750         812 : blob_insert_cluster_cpl(void *cb_arg, int bserrno)
    2751             : {
    2752         812 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2753             : 
    2754         812 :         if (bserrno) {
    2755           4 :                 if (bserrno == -EEXIST) {
    2756             :                         /* The metadata insert failed because another thread
    2757             :                          * allocated the cluster first. Clear and free our cluster
    2758             :                          * but continue without error. */
    2759           4 :                         blob_insert_cluster_clear(ctx);
    2760           4 :                         return;
    2761             :                 }
    2762             : 
    2763           0 :                 blob_insert_cluster_revert(ctx);
    2764             :         }
    2765             : 
    2766         808 :         bs_sequence_finish(ctx->seq, bserrno);
    2767             : }
    2768             : 
    2769             : static void
    2770         408 : blob_write_copy_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2771             : {
    2772         408 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2773             :         uint32_t cluster_number;
    2774             : 
    2775         408 :         if (bserrno) {
    2776             :                 /* The write failed, so jump to the final completion handler */
    2777           0 :                 bs_sequence_finish(seq, bserrno);
    2778           0 :                 return;
    2779             :         }
    2780             : 
    2781         408 :         cluster_number = bs_page_to_cluster(ctx->blob->bs, ctx->page);
    2782             : 
    2783         408 :         blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
    2784             :                                          ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
    2785             : }
    2786             : 
    2787             : static void
    2788         280 : blob_write_copy(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2789             : {
    2790         280 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2791             : 
    2792         280 :         if (bserrno != 0) {
    2793             :                 /* The read failed, so jump to the final completion handler */
    2794           0 :                 bs_sequence_finish(seq, bserrno);
    2795           0 :                 return;
    2796             :         }
    2797             : 
    2798             :         /* Write whole cluster */
    2799         280 :         bs_sequence_write_dev(seq, ctx->buf,
    2800         280 :                               bs_cluster_to_lba(ctx->blob->bs, ctx->new_cluster),
    2801         280 :                               bs_cluster_to_lba(ctx->blob->bs, 1),
    2802             :                               blob_write_copy_cpl, ctx);
    2803             : }
    2804             : 
    2805             : static bool
    2806         796 : blob_can_copy(struct spdk_blob *blob, uint32_t cluster_start_page, uint64_t *base_lba)
    2807             : {
    2808         796 :         uint64_t lba = bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page);
    2809             : 
    2810        1146 :         return (!blob_is_esnap_clone(blob) && blob->bs->dev->copy != NULL) &&
    2811         350 :                blob->back_bs_dev->translate_lba(blob->back_bs_dev, lba, base_lba);
    2812             : }
    2813             : 
    2814             : static void
    2815         128 : blob_copy(struct spdk_blob_copy_cluster_ctx *ctx, spdk_bs_user_op_t *op, uint64_t src_lba)
    2816             : {
    2817         128 :         struct spdk_blob *blob = ctx->blob;
    2818         128 :         uint64_t lba_count = bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz);
    2819             : 
    2820         128 :         bs_sequence_copy_dev(ctx->seq,
    2821         128 :                              bs_cluster_to_lba(blob->bs, ctx->new_cluster),
    2822             :                              src_lba,
    2823             :                              lba_count,
    2824             :                              blob_write_copy_cpl, ctx);
    2825         128 : }
    2826             : 
    2827             : static void
    2828         812 : bs_allocate_and_copy_cluster(struct spdk_blob *blob,
    2829             :                              struct spdk_io_channel *_ch,
    2830             :                              uint64_t io_unit, spdk_bs_user_op_t *op)
    2831             : {
    2832         812 :         struct spdk_bs_cpl cpl;
    2833             :         struct spdk_bs_channel *ch;
    2834             :         struct spdk_blob_copy_cluster_ctx *ctx;
    2835             :         uint32_t cluster_start_page;
    2836             :         uint32_t cluster_number;
    2837             :         bool is_zeroes;
    2838             :         bool can_copy;
    2839             :         bool is_valid_range;
    2840         812 :         uint64_t copy_src_lba;
    2841             :         int rc;
    2842             : 
    2843         812 :         ch = spdk_io_channel_get_ctx(_ch);
    2844             : 
    2845         812 :         if (!TAILQ_EMPTY(&ch->need_cluster_alloc)) {
    2846             :                 /* There are already operations pending. Queue this user op
    2847             :                  * and return because it will be re-executed when the outstanding
    2848             :                  * cluster allocation completes. */
    2849           0 :                 TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
    2850           0 :                 return;
    2851             :         }
    2852             : 
    2853             :         /* Round the io_unit offset down to the first page in the cluster */
    2854         812 :         cluster_start_page = bs_io_unit_to_cluster_start(blob, io_unit);
    2855             : 
    2856             :         /* Calculate which index in the metadata cluster array the corresponding
    2857             :          * cluster is supposed to be at. */
    2858         812 :         cluster_number = bs_io_unit_to_cluster_number(blob, io_unit);
    2859             : 
    2860         812 :         ctx = calloc(1, sizeof(*ctx));
    2861         812 :         if (!ctx) {
    2862           0 :                 bs_user_op_abort(op, -ENOMEM);
    2863           0 :                 return;
    2864             :         }
    2865             : 
    2866         812 :         assert(blob->bs->cluster_sz % blob->back_bs_dev->blocklen == 0);
    2867             : 
    2868         812 :         ctx->blob = blob;
    2869         812 :         ctx->page = cluster_start_page;
    2870         812 :         ctx->new_cluster_page = ch->new_cluster_page;
    2871         812 :         memset(ctx->new_cluster_page, 0, SPDK_BS_PAGE_SIZE);
    2872             : 
    2873             :         /* Check if the cluster that we intend to do CoW for is valid for
    2874             :          * the backing dev. For zeroes backing dev, it'll be always valid.
    2875             :          * For other backing dev e.g. a snapshot, it could be invalid if
    2876             :          * the blob has been resized after snapshot was taken. */
    2877         812 :         is_valid_range = blob->back_bs_dev->is_range_valid(blob->back_bs_dev,
    2878             :                          bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
    2879         812 :                          bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz));
    2880             : 
    2881         812 :         can_copy = is_valid_range && blob_can_copy(blob, cluster_start_page, &copy_src_lba);
    2882             : 
    2883        1608 :         is_zeroes = is_valid_range && blob->back_bs_dev->is_zeroes(blob->back_bs_dev,
    2884             :                         bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
    2885         796 :                         bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz));
    2886         812 :         if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes && !can_copy) {
    2887         280 :                 ctx->buf = spdk_malloc(blob->bs->cluster_sz, blob->back_bs_dev->blocklen,
    2888             :                                        NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    2889         280 :                 if (!ctx->buf) {
    2890           0 :                         SPDK_ERRLOG("DMA allocation for cluster of size = %" PRIu32 " failed.\n",
    2891             :                                     blob->bs->cluster_sz);
    2892           0 :                         free(ctx);
    2893           0 :                         bs_user_op_abort(op, -ENOMEM);
    2894           0 :                         return;
    2895             :                 }
    2896             :         }
    2897             : 
    2898         812 :         spdk_spin_lock(&blob->bs->used_lock);
    2899         812 :         rc = bs_allocate_cluster(blob, cluster_number, &ctx->new_cluster, &ctx->new_extent_page,
    2900             :                                  false);
    2901         812 :         spdk_spin_unlock(&blob->bs->used_lock);
    2902         812 :         if (rc != 0) {
    2903           0 :                 spdk_free(ctx->buf);
    2904           0 :                 free(ctx);
    2905           0 :                 bs_user_op_abort(op, rc);
    2906           0 :                 return;
    2907             :         }
    2908             : 
    2909         812 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    2910         812 :         cpl.u.blob_basic.cb_fn = blob_allocate_and_copy_cluster_cpl;
    2911         812 :         cpl.u.blob_basic.cb_arg = ctx;
    2912             : 
    2913         812 :         ctx->seq = bs_sequence_start_blob(_ch, &cpl, blob);
    2914         812 :         if (!ctx->seq) {
    2915           0 :                 spdk_spin_lock(&blob->bs->used_lock);
    2916           0 :                 bs_release_cluster(blob->bs, ctx->new_cluster);
    2917           0 :                 spdk_spin_unlock(&blob->bs->used_lock);
    2918           0 :                 spdk_free(ctx->buf);
    2919           0 :                 free(ctx);
    2920           0 :                 bs_user_op_abort(op, -ENOMEM);
    2921           0 :                 return;
    2922             :         }
    2923             : 
    2924             :         /* Queue the user op to block other incoming operations */
    2925         812 :         TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
    2926             : 
    2927         812 :         if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes) {
    2928         408 :                 if (can_copy) {
    2929         128 :                         blob_copy(ctx, op, copy_src_lba);
    2930             :                 } else {
    2931             :                         /* Read cluster from backing device */
    2932         280 :                         bs_sequence_read_bs_dev(ctx->seq, blob->back_bs_dev, ctx->buf,
    2933             :                                                 bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
    2934         280 :                                                 bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz),
    2935             :                                                 blob_write_copy, ctx);
    2936             :                 }
    2937             : 
    2938             :         } else {
    2939         404 :                 blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
    2940             :                                                  ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
    2941             :         }
    2942             : }
    2943             : 
    2944             : static inline bool
    2945       40206 : blob_calculate_lba_and_lba_count(struct spdk_blob *blob, uint64_t io_unit, uint64_t length,
    2946             :                                  uint64_t *lba, uint64_t *lba_count)
    2947             : {
    2948       40206 :         *lba_count = length;
    2949             : 
    2950       40206 :         if (!bs_io_unit_is_allocated(blob, io_unit)) {
    2951        2992 :                 assert(blob->back_bs_dev != NULL);
    2952        2992 :                 *lba = bs_io_unit_to_back_dev_lba(blob, io_unit);
    2953        2992 :                 *lba_count = bs_io_unit_to_back_dev_lba(blob, *lba_count);
    2954        2992 :                 return false;
    2955             :         } else {
    2956       37214 :                 *lba = bs_blob_io_unit_to_lba(blob, io_unit);
    2957       37214 :                 return true;
    2958             :         }
    2959             : }
    2960             : 
    2961             : struct op_split_ctx {
    2962             :         struct spdk_blob *blob;
    2963             :         struct spdk_io_channel *channel;
    2964             :         uint64_t io_unit_offset;
    2965             :         uint64_t io_units_remaining;
    2966             :         void *curr_payload;
    2967             :         enum spdk_blob_op_type op_type;
    2968             :         spdk_bs_sequence_t *seq;
    2969             :         bool in_submit_ctx;
    2970             :         bool completed_in_submit_ctx;
    2971             :         bool done;
    2972             : };
    2973             : 
    2974             : static void
    2975         774 : blob_request_submit_op_split_next(void *cb_arg, int bserrno)
    2976             : {
    2977         774 :         struct op_split_ctx     *ctx = cb_arg;
    2978         774 :         struct spdk_blob        *blob = ctx->blob;
    2979         774 :         struct spdk_io_channel  *ch = ctx->channel;
    2980         774 :         enum spdk_blob_op_type  op_type = ctx->op_type;
    2981             :         uint8_t                 *buf;
    2982             :         uint64_t                offset;
    2983             :         uint64_t                length;
    2984             :         uint64_t                op_length;
    2985             : 
    2986         774 :         if (bserrno != 0 || ctx->io_units_remaining == 0) {
    2987         178 :                 bs_sequence_finish(ctx->seq, bserrno);
    2988         178 :                 if (ctx->in_submit_ctx) {
    2989             :                         /* Defer freeing of the ctx object, since it will be
    2990             :                          * accessed when this unwinds back to the submisison
    2991             :                          * context.
    2992             :                          */
    2993          40 :                         ctx->done = true;
    2994             :                 } else {
    2995         138 :                         free(ctx);
    2996             :                 }
    2997         178 :                 return;
    2998             :         }
    2999             : 
    3000         596 :         if (ctx->in_submit_ctx) {
    3001             :                 /* If this split operation completed in the context
    3002             :                  * of its submission, mark the flag and return immediately
    3003             :                  * to avoid recursion.
    3004             :                  */
    3005          68 :                 ctx->completed_in_submit_ctx = true;
    3006          68 :                 return;
    3007             :         }
    3008             : 
    3009             :         while (true) {
    3010         596 :                 ctx->completed_in_submit_ctx = false;
    3011             : 
    3012         596 :                 offset = ctx->io_unit_offset;
    3013         596 :                 length = ctx->io_units_remaining;
    3014         596 :                 buf = ctx->curr_payload;
    3015         596 :                 op_length = spdk_min(length, bs_num_io_units_to_cluster_boundary(blob,
    3016             :                                      offset));
    3017             : 
    3018             :                 /* Update length and payload for next operation */
    3019         596 :                 ctx->io_units_remaining -= op_length;
    3020         596 :                 ctx->io_unit_offset += op_length;
    3021         596 :                 if (op_type == SPDK_BLOB_WRITE || op_type == SPDK_BLOB_READ) {
    3022         528 :                         ctx->curr_payload += op_length * blob->bs->io_unit_size;
    3023             :                 }
    3024             : 
    3025         596 :                 assert(!ctx->in_submit_ctx);
    3026         596 :                 ctx->in_submit_ctx = true;
    3027             : 
    3028         596 :                 switch (op_type) {
    3029         418 :                 case SPDK_BLOB_READ:
    3030         418 :                         spdk_blob_io_read(blob, ch, buf, offset, op_length,
    3031             :                                           blob_request_submit_op_split_next, ctx);
    3032         418 :                         break;
    3033         110 :                 case SPDK_BLOB_WRITE:
    3034         110 :                         spdk_blob_io_write(blob, ch, buf, offset, op_length,
    3035             :                                            blob_request_submit_op_split_next, ctx);
    3036         110 :                         break;
    3037          36 :                 case SPDK_BLOB_UNMAP:
    3038          36 :                         spdk_blob_io_unmap(blob, ch, offset, op_length,
    3039             :                                            blob_request_submit_op_split_next, ctx);
    3040          36 :                         break;
    3041          32 :                 case SPDK_BLOB_WRITE_ZEROES:
    3042          32 :                         spdk_blob_io_write_zeroes(blob, ch, offset, op_length,
    3043             :                                                   blob_request_submit_op_split_next, ctx);
    3044          32 :                         break;
    3045           0 :                 case SPDK_BLOB_READV:
    3046             :                 case SPDK_BLOB_WRITEV:
    3047           0 :                         SPDK_ERRLOG("readv/write not valid\n");
    3048           0 :                         bs_sequence_finish(ctx->seq, -EINVAL);
    3049           0 :                         free(ctx);
    3050           0 :                         return;
    3051             :                 }
    3052             : 
    3053             : #ifndef __clang_analyzer__
    3054             :                 /* scan-build reports a false positive around accessing the ctx here. It
    3055             :                  * forms a path that recursively calls this function, but then says
    3056             :                  * "assuming ctx->in_submit_ctx is false", when that isn't possible.
    3057             :                  * This path does free(ctx), returns to here, and reports a use-after-free
    3058             :                  * bug.  Wrapping this bit of code so that scan-build doesn't see it
    3059             :                  * works around the scan-build bug.
    3060             :                  */
    3061         596 :                 assert(ctx->in_submit_ctx);
    3062         596 :                 ctx->in_submit_ctx = false;
    3063             : 
    3064             :                 /* If the operation completed immediately, loop back and submit the
    3065             :                  * next operation.  Otherwise we can return and the next split
    3066             :                  * operation will get submitted when this current operation is
    3067             :                  * later completed asynchronously.
    3068             :                  */
    3069         596 :                 if (ctx->completed_in_submit_ctx) {
    3070          68 :                         continue;
    3071         528 :                 } else if (ctx->done) {
    3072          40 :                         free(ctx);
    3073             :                 }
    3074             : #endif
    3075         528 :                 break;
    3076             :         }
    3077             : }
    3078             : 
    3079             : static void
    3080         178 : blob_request_submit_op_split(struct spdk_io_channel *ch, struct spdk_blob *blob,
    3081             :                              void *payload, uint64_t offset, uint64_t length,
    3082             :                              spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    3083             : {
    3084             :         struct op_split_ctx *ctx;
    3085             :         spdk_bs_sequence_t *seq;
    3086         178 :         struct spdk_bs_cpl cpl;
    3087             : 
    3088         178 :         assert(blob != NULL);
    3089             : 
    3090         178 :         ctx = calloc(1, sizeof(struct op_split_ctx));
    3091         178 :         if (ctx == NULL) {
    3092           0 :                 cb_fn(cb_arg, -ENOMEM);
    3093           0 :                 return;
    3094             :         }
    3095             : 
    3096         178 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    3097         178 :         cpl.u.blob_basic.cb_fn = cb_fn;
    3098         178 :         cpl.u.blob_basic.cb_arg = cb_arg;
    3099             : 
    3100         178 :         seq = bs_sequence_start_blob(ch, &cpl, blob);
    3101         178 :         if (!seq) {
    3102           0 :                 free(ctx);
    3103           0 :                 cb_fn(cb_arg, -ENOMEM);
    3104           0 :                 return;
    3105             :         }
    3106             : 
    3107         178 :         ctx->blob = blob;
    3108         178 :         ctx->channel = ch;
    3109         178 :         ctx->curr_payload = payload;
    3110         178 :         ctx->io_unit_offset = offset;
    3111         178 :         ctx->io_units_remaining = length;
    3112         178 :         ctx->op_type = op_type;
    3113         178 :         ctx->seq = seq;
    3114             : 
    3115         178 :         blob_request_submit_op_split_next(ctx, 0);
    3116             : }
    3117             : 
    3118             : static void
    3119          60 : spdk_free_cluster_unmap_complete(void *cb_arg, int bserrno)
    3120             : {
    3121          60 :         struct spdk_blob_free_cluster_ctx *ctx = cb_arg;
    3122             : 
    3123          60 :         if (bserrno) {
    3124           0 :                 bs_sequence_finish(ctx->seq, bserrno);
    3125           0 :                 free(ctx);
    3126           0 :                 return;
    3127             :         }
    3128             : 
    3129          60 :         blob_free_cluster_on_md_thread(ctx->blob, ctx->cluster_num,
    3130             :                                        ctx->extent_page, ctx->md_page, blob_free_cluster_cpl, ctx);
    3131             : }
    3132             : 
    3133             : static void
    3134       37834 : blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blob,
    3135             :                               void *payload, uint64_t offset, uint64_t length,
    3136             :                               spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    3137             : {
    3138       37834 :         struct spdk_bs_cpl cpl;
    3139       37834 :         uint64_t lba;
    3140       37834 :         uint64_t lba_count;
    3141             :         bool is_allocated;
    3142             : 
    3143       37834 :         assert(blob != NULL);
    3144             : 
    3145       37834 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    3146       37834 :         cpl.u.blob_basic.cb_fn = cb_fn;
    3147       37834 :         cpl.u.blob_basic.cb_arg = cb_arg;
    3148             : 
    3149       37834 :         if (blob->frozen_refcnt) {
    3150             :                 /* This blob I/O is frozen */
    3151             :                 spdk_bs_user_op_t *op;
    3152           4 :                 struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_ch);
    3153             : 
    3154           4 :                 op = bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
    3155           4 :                 if (!op) {
    3156           0 :                         cb_fn(cb_arg, -ENOMEM);
    3157           0 :                         return;
    3158             :                 }
    3159             : 
    3160           4 :                 TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
    3161             : 
    3162           4 :                 return;
    3163             :         }
    3164             : 
    3165       37830 :         is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
    3166             : 
    3167       37830 :         switch (op_type) {
    3168       16887 :         case SPDK_BLOB_READ: {
    3169             :                 spdk_bs_batch_t *batch;
    3170             : 
    3171       16887 :                 batch = bs_batch_open(_ch, &cpl, blob);
    3172       16887 :                 if (!batch) {
    3173           0 :                         cb_fn(cb_arg, -ENOMEM);
    3174           0 :                         return;
    3175             :                 }
    3176             : 
    3177       16887 :                 if (is_allocated) {
    3178             :                         /* Read from the blob */
    3179       15799 :                         bs_batch_read_dev(batch, payload, lba, lba_count);
    3180             :                 } else {
    3181             :                         /* Read from the backing block device */
    3182        1088 :                         bs_batch_read_bs_dev(batch, blob->back_bs_dev, payload, lba, lba_count);
    3183             :                 }
    3184             : 
    3185       16887 :                 bs_batch_close(batch);
    3186       16887 :                 break;
    3187             :         }
    3188       20851 :         case SPDK_BLOB_WRITE:
    3189             :         case SPDK_BLOB_WRITE_ZEROES: {
    3190       20851 :                 if (is_allocated) {
    3191             :                         /* Write to the blob */
    3192             :                         spdk_bs_batch_t *batch;
    3193             : 
    3194       20507 :                         if (lba_count == 0) {
    3195           0 :                                 cb_fn(cb_arg, 0);
    3196           0 :                                 return;
    3197             :                         }
    3198             : 
    3199       20507 :                         batch = bs_batch_open(_ch, &cpl, blob);
    3200       20507 :                         if (!batch) {
    3201           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3202           0 :                                 return;
    3203             :                         }
    3204             : 
    3205       20507 :                         if (op_type == SPDK_BLOB_WRITE) {
    3206       20475 :                                 bs_batch_write_dev(batch, payload, lba, lba_count);
    3207             :                         } else {
    3208          32 :                                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    3209             :                         }
    3210             : 
    3211       20507 :                         bs_batch_close(batch);
    3212             :                 } else {
    3213             :                         /* Queue this operation and allocate the cluster */
    3214             :                         spdk_bs_user_op_t *op;
    3215             : 
    3216         344 :                         op = bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
    3217         344 :                         if (!op) {
    3218           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3219           0 :                                 return;
    3220             :                         }
    3221             : 
    3222         344 :                         bs_allocate_and_copy_cluster(blob, _ch, offset, op);
    3223             :                 }
    3224       20851 :                 break;
    3225             :         }
    3226          92 :         case SPDK_BLOB_UNMAP: {
    3227          92 :                 struct spdk_blob_free_cluster_ctx *ctx = NULL;
    3228             :                 spdk_bs_batch_t *batch;
    3229             : 
    3230             :                 /* if aligned with cluster release cluster */
    3231         160 :                 if (spdk_blob_is_thin_provisioned(blob) && is_allocated &&
    3232          68 :                     bs_io_units_per_cluster(blob) == length) {
    3233          60 :                         struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_ch);
    3234             :                         uint32_t cluster_start_page;
    3235             :                         uint32_t cluster_number;
    3236             : 
    3237          60 :                         assert(offset % bs_io_units_per_cluster(blob) == 0);
    3238             : 
    3239             :                         /* Round the io_unit offset down to the first page in the cluster */
    3240          60 :                         cluster_start_page = bs_io_unit_to_cluster_start(blob, offset);
    3241             : 
    3242             :                         /* Calculate which index in the metadata cluster array the corresponding
    3243             :                          * cluster is supposed to be at. */
    3244          60 :                         cluster_number = bs_io_unit_to_cluster_number(blob, offset);
    3245             : 
    3246          60 :                         ctx = calloc(1, sizeof(*ctx));
    3247          60 :                         if (!ctx) {
    3248           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3249           0 :                                 return;
    3250             :                         }
    3251             :                         /* When freeing a cluster the flow should be (in order):
    3252             :                          * 1. Unmap the underlying area (so if the cluster is reclaimed in the future, it won't leak
    3253             :                          * old data)
    3254             :                          * 2. Once the unmap completes (to avoid any races with incoming writes that may claim the
    3255             :                          * cluster), update and sync metadata freeing the cluster
    3256             :                          * 3. Once metadata update is done, complete the user unmap request
    3257             :                          */
    3258          60 :                         ctx->blob = blob;
    3259          60 :                         ctx->page = cluster_start_page;
    3260          60 :                         ctx->cluster_num = cluster_number;
    3261          60 :                         ctx->md_page = bs_channel->new_cluster_page;
    3262          60 :                         ctx->seq = bs_sequence_start_bs(_ch, &cpl);
    3263          60 :                         if (!ctx->seq) {
    3264           0 :                                 free(ctx);
    3265           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3266           0 :                                 return;
    3267             :                         }
    3268             : 
    3269          60 :                         if (blob->use_extent_table) {
    3270          30 :                                 ctx->extent_page = *bs_cluster_to_extent_page(blob, cluster_number);
    3271             :                         }
    3272             : 
    3273          60 :                         cpl.u.blob_basic.cb_fn = spdk_free_cluster_unmap_complete;
    3274          60 :                         cpl.u.blob_basic.cb_arg = ctx;
    3275             :                 }
    3276             : 
    3277          92 :                 batch = bs_batch_open(_ch, &cpl, blob);
    3278          92 :                 if (!batch) {
    3279           0 :                         free(ctx);
    3280           0 :                         cb_fn(cb_arg, -ENOMEM);
    3281           0 :                         return;
    3282             :                 }
    3283             : 
    3284          92 :                 if (is_allocated) {
    3285          92 :                         bs_batch_unmap_dev(batch, lba, lba_count);
    3286             :                 }
    3287             : 
    3288          92 :                 bs_batch_close(batch);
    3289          92 :                 break;
    3290             :         }
    3291           0 :         case SPDK_BLOB_READV:
    3292             :         case SPDK_BLOB_WRITEV:
    3293           0 :                 SPDK_ERRLOG("readv/write not valid\n");
    3294           0 :                 cb_fn(cb_arg, -EINVAL);
    3295           0 :                 break;
    3296             :         }
    3297             : }
    3298             : 
    3299             : static void
    3300       38524 : blob_request_submit_op(struct spdk_blob *blob, struct spdk_io_channel *_channel,
    3301             :                        void *payload, uint64_t offset, uint64_t length,
    3302             :                        spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    3303             : {
    3304       38524 :         assert(blob != NULL);
    3305             : 
    3306       38524 :         if (blob->data_ro && op_type != SPDK_BLOB_READ) {
    3307           4 :                 cb_fn(cb_arg, -EPERM);
    3308           4 :                 return;
    3309             :         }
    3310             : 
    3311       38520 :         if (length == 0) {
    3312         492 :                 cb_fn(cb_arg, 0);
    3313         492 :                 return;
    3314             :         }
    3315             : 
    3316       38028 :         if (offset + length > bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
    3317          24 :                 cb_fn(cb_arg, -EINVAL);
    3318          24 :                 return;
    3319             :         }
    3320       38004 :         if (length <= bs_num_io_units_to_cluster_boundary(blob, offset)) {
    3321       37826 :                 blob_request_submit_op_single(_channel, blob, payload, offset, length,
    3322             :                                               cb_fn, cb_arg, op_type);
    3323             :         } else {
    3324         178 :                 blob_request_submit_op_split(_channel, blob, payload, offset, length,
    3325             :                                              cb_fn, cb_arg, op_type);
    3326             :         }
    3327             : }
    3328             : 
    3329             : struct rw_iov_ctx {
    3330             :         struct spdk_blob *blob;
    3331             :         struct spdk_io_channel *channel;
    3332             :         spdk_blob_op_complete cb_fn;
    3333             :         void *cb_arg;
    3334             :         bool read;
    3335             :         int iovcnt;
    3336             :         struct iovec *orig_iov;
    3337             :         uint64_t io_unit_offset;
    3338             :         uint64_t io_units_remaining;
    3339             :         uint64_t io_units_done;
    3340             :         struct spdk_blob_ext_io_opts *ext_io_opts;
    3341             :         struct iovec iov[0];
    3342             : };
    3343             : 
    3344             : static void
    3345        2360 : rw_iov_done(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    3346             : {
    3347        2360 :         assert(cb_arg == NULL);
    3348        2360 :         bs_sequence_finish(seq, bserrno);
    3349        2360 : }
    3350             : 
    3351             : static void
    3352         744 : rw_iov_split_next(void *cb_arg, int bserrno)
    3353             : {
    3354         744 :         struct rw_iov_ctx *ctx = cb_arg;
    3355         744 :         struct spdk_blob *blob = ctx->blob;
    3356             :         struct iovec *iov, *orig_iov;
    3357             :         int iovcnt;
    3358             :         size_t orig_iovoff;
    3359             :         uint64_t io_units_count, io_units_to_boundary, io_unit_offset;
    3360             :         uint64_t byte_count;
    3361             : 
    3362         744 :         if (bserrno != 0 || ctx->io_units_remaining == 0) {
    3363         204 :                 ctx->cb_fn(ctx->cb_arg, bserrno);
    3364         204 :                 free(ctx);
    3365         204 :                 return;
    3366             :         }
    3367             : 
    3368         540 :         io_unit_offset = ctx->io_unit_offset;
    3369         540 :         io_units_to_boundary = bs_num_io_units_to_cluster_boundary(blob, io_unit_offset);
    3370         540 :         io_units_count = spdk_min(ctx->io_units_remaining, io_units_to_boundary);
    3371             :         /*
    3372             :          * Get index and offset into the original iov array for our current position in the I/O sequence.
    3373             :          *  byte_count will keep track of how many bytes remaining until orig_iov and orig_iovoff will
    3374             :          *  point to the current position in the I/O sequence.
    3375             :          */
    3376         540 :         byte_count = ctx->io_units_done * blob->bs->io_unit_size;
    3377         540 :         orig_iov = &ctx->orig_iov[0];
    3378         540 :         orig_iovoff = 0;
    3379        1148 :         while (byte_count > 0) {
    3380         608 :                 if (byte_count >= orig_iov->iov_len) {
    3381         352 :                         byte_count -= orig_iov->iov_len;
    3382         352 :                         orig_iov++;
    3383             :                 } else {
    3384         256 :                         orig_iovoff = byte_count;
    3385         256 :                         byte_count = 0;
    3386             :                 }
    3387             :         }
    3388             : 
    3389             :         /*
    3390             :          * Build an iov array for the next I/O in the sequence.  byte_count will keep track of how many
    3391             :          *  bytes of this next I/O remain to be accounted for in the new iov array.
    3392             :          */
    3393         540 :         byte_count = io_units_count * blob->bs->io_unit_size;
    3394         540 :         iov = &ctx->iov[0];
    3395         540 :         iovcnt = 0;
    3396        1380 :         while (byte_count > 0) {
    3397         840 :                 assert(iovcnt < ctx->iovcnt);
    3398         840 :                 iov->iov_len = spdk_min(byte_count, orig_iov->iov_len - orig_iovoff);
    3399         840 :                 iov->iov_base = orig_iov->iov_base + orig_iovoff;
    3400         840 :                 byte_count -= iov->iov_len;
    3401         840 :                 orig_iovoff = 0;
    3402         840 :                 orig_iov++;
    3403         840 :                 iov++;
    3404         840 :                 iovcnt++;
    3405             :         }
    3406             : 
    3407         540 :         ctx->io_unit_offset += io_units_count;
    3408         540 :         ctx->io_units_remaining -= io_units_count;
    3409         540 :         ctx->io_units_done += io_units_count;
    3410         540 :         iov = &ctx->iov[0];
    3411             : 
    3412         540 :         if (ctx->read) {
    3413         408 :                 spdk_blob_io_readv_ext(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
    3414             :                                        io_units_count, rw_iov_split_next, ctx, ctx->ext_io_opts);
    3415             :         } else {
    3416         132 :                 spdk_blob_io_writev_ext(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
    3417             :                                         io_units_count, rw_iov_split_next, ctx, ctx->ext_io_opts);
    3418             :         }
    3419             : }
    3420             : 
    3421             : static void
    3422        2588 : blob_request_submit_rw_iov(struct spdk_blob *blob, struct spdk_io_channel *_channel,
    3423             :                            struct iovec *iov, int iovcnt,
    3424             :                            uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg, bool read,
    3425             :                            struct spdk_blob_ext_io_opts *ext_io_opts)
    3426             : {
    3427        2588 :         struct spdk_bs_cpl      cpl;
    3428             : 
    3429        2588 :         assert(blob != NULL);
    3430             : 
    3431        2588 :         if (!read && blob->data_ro) {
    3432           4 :                 cb_fn(cb_arg, -EPERM);
    3433           4 :                 return;
    3434             :         }
    3435             : 
    3436        2584 :         if (length == 0) {
    3437           0 :                 cb_fn(cb_arg, 0);
    3438           0 :                 return;
    3439             :         }
    3440             : 
    3441        2584 :         if (offset + length > bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
    3442           0 :                 cb_fn(cb_arg, -EINVAL);
    3443           0 :                 return;
    3444             :         }
    3445             : 
    3446             :         /*
    3447             :          * For now, we implement readv/writev using a sequence (instead of a batch) to account for having
    3448             :          *  to split a request that spans a cluster boundary.  For I/O that do not span a cluster boundary,
    3449             :          *  there will be no noticeable difference compared to using a batch.  For I/O that do span a cluster
    3450             :          *  boundary, the target LBAs (after blob offset to LBA translation) may not be contiguous, so we need
    3451             :          *  to allocate a separate iov array and split the I/O such that none of the resulting
    3452             :          *  smaller I/O cross a cluster boundary.  These smaller I/O will be issued in sequence (not in parallel)
    3453             :          *  but since this case happens very infrequently, any performance impact will be negligible.
    3454             :          *
    3455             :          * This could be optimized in the future to allocate a big enough iov array to account for all of the iovs
    3456             :          *  for all of the smaller I/Os, pre-build all of the iov arrays for the smaller I/Os, then issue them
    3457             :          *  in a batch.  That would also require creating an intermediate spdk_bs_cpl that would get called
    3458             :          *  when the batch was completed, to allow for freeing the memory for the iov arrays.
    3459             :          */
    3460        2584 :         if (spdk_likely(length <= bs_num_io_units_to_cluster_boundary(blob, offset))) {
    3461        2376 :                 uint64_t lba_count;
    3462        2376 :                 uint64_t lba;
    3463             :                 bool is_allocated;
    3464             : 
    3465        2376 :                 cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    3466        2376 :                 cpl.u.blob_basic.cb_fn = cb_fn;
    3467        2376 :                 cpl.u.blob_basic.cb_arg = cb_arg;
    3468             : 
    3469        2376 :                 if (blob->frozen_refcnt) {
    3470             :                         /* This blob I/O is frozen */
    3471             :                         enum spdk_blob_op_type op_type;
    3472             :                         spdk_bs_user_op_t *op;
    3473           0 :                         struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_channel);
    3474             : 
    3475           0 :                         op_type = read ? SPDK_BLOB_READV : SPDK_BLOB_WRITEV;
    3476           0 :                         op = bs_user_op_alloc(_channel, &cpl, op_type, blob, iov, iovcnt, offset, length);
    3477           0 :                         if (!op) {
    3478           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3479           0 :                                 return;
    3480             :                         }
    3481             : 
    3482           0 :                         TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
    3483             : 
    3484           0 :                         return;
    3485             :                 }
    3486             : 
    3487        2376 :                 is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
    3488             : 
    3489        2376 :                 if (read) {
    3490             :                         spdk_bs_sequence_t *seq;
    3491             : 
    3492        2084 :                         seq = bs_sequence_start_blob(_channel, &cpl, blob);
    3493        2084 :                         if (!seq) {
    3494           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3495           0 :                                 return;
    3496             :                         }
    3497             : 
    3498        2084 :                         seq->ext_io_opts = ext_io_opts;
    3499             : 
    3500        2084 :                         if (is_allocated) {
    3501         540 :                                 bs_sequence_readv_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL);
    3502             :                         } else {
    3503        1544 :                                 bs_sequence_readv_bs_dev(seq, blob->back_bs_dev, iov, iovcnt, lba, lba_count,
    3504             :                                                          rw_iov_done, NULL);
    3505             :                         }
    3506             :                 } else {
    3507         292 :                         if (is_allocated) {
    3508             :                                 spdk_bs_sequence_t *seq;
    3509             : 
    3510         276 :                                 seq = bs_sequence_start_blob(_channel, &cpl, blob);
    3511         276 :                                 if (!seq) {
    3512           0 :                                         cb_fn(cb_arg, -ENOMEM);
    3513           0 :                                         return;
    3514             :                                 }
    3515             : 
    3516         276 :                                 seq->ext_io_opts = ext_io_opts;
    3517             : 
    3518         276 :                                 bs_sequence_writev_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL);
    3519             :                         } else {
    3520             :                                 /* Queue this operation and allocate the cluster */
    3521             :                                 spdk_bs_user_op_t *op;
    3522             : 
    3523          16 :                                 op = bs_user_op_alloc(_channel, &cpl, SPDK_BLOB_WRITEV, blob, iov, iovcnt, offset,
    3524             :                                                       length);
    3525          16 :                                 if (!op) {
    3526           0 :                                         cb_fn(cb_arg, -ENOMEM);
    3527           0 :                                         return;
    3528             :                                 }
    3529             : 
    3530          16 :                                 op->ext_io_opts = ext_io_opts;
    3531             : 
    3532          16 :                                 bs_allocate_and_copy_cluster(blob, _channel, offset, op);
    3533             :                         }
    3534             :                 }
    3535             :         } else {
    3536             :                 struct rw_iov_ctx *ctx;
    3537             : 
    3538         208 :                 ctx = calloc(1, sizeof(struct rw_iov_ctx) + iovcnt * sizeof(struct iovec));
    3539         208 :                 if (ctx == NULL) {
    3540           4 :                         cb_fn(cb_arg, -ENOMEM);
    3541           4 :                         return;
    3542             :                 }
    3543             : 
    3544         204 :                 ctx->blob = blob;
    3545         204 :                 ctx->channel = _channel;
    3546         204 :                 ctx->cb_fn = cb_fn;
    3547         204 :                 ctx->cb_arg = cb_arg;
    3548         204 :                 ctx->read = read;
    3549         204 :                 ctx->orig_iov = iov;
    3550         204 :                 ctx->iovcnt = iovcnt;
    3551         204 :                 ctx->io_unit_offset = offset;
    3552         204 :                 ctx->io_units_remaining = length;
    3553         204 :                 ctx->io_units_done = 0;
    3554         204 :                 ctx->ext_io_opts = ext_io_opts;
    3555             : 
    3556         204 :                 rw_iov_split_next(ctx, 0);
    3557             :         }
    3558             : }
    3559             : 
    3560             : static struct spdk_blob *
    3561        7733 : blob_lookup(struct spdk_blob_store *bs, spdk_blob_id blobid)
    3562             : {
    3563        7733 :         struct spdk_blob find;
    3564             : 
    3565        7733 :         if (spdk_bit_array_get(bs->open_blobids, blobid) == 0) {
    3566        6948 :                 return NULL;
    3567             :         }
    3568             : 
    3569         785 :         find.id = blobid;
    3570         785 :         return RB_FIND(spdk_blob_tree, &bs->open_blobs, &find);
    3571             : }
    3572             : 
    3573             : static void
    3574        1810 : blob_get_snapshot_and_clone_entries(struct spdk_blob *blob,
    3575             :                                     struct spdk_blob_list **snapshot_entry, struct spdk_blob_list **clone_entry)
    3576             : {
    3577        1810 :         assert(blob != NULL);
    3578        1810 :         *snapshot_entry = NULL;
    3579        1810 :         *clone_entry = NULL;
    3580             : 
    3581        1810 :         if (blob->parent_id == SPDK_BLOBID_INVALID) {
    3582        1518 :                 return;
    3583             :         }
    3584             : 
    3585         440 :         TAILQ_FOREACH(*snapshot_entry, &blob->bs->snapshots, link) {
    3586         380 :                 if ((*snapshot_entry)->id == blob->parent_id) {
    3587         232 :                         break;
    3588             :                 }
    3589             :         }
    3590             : 
    3591         292 :         if (*snapshot_entry != NULL) {
    3592         276 :                 TAILQ_FOREACH(*clone_entry, &(*snapshot_entry)->clones, link) {
    3593         276 :                         if ((*clone_entry)->id == blob->id) {
    3594         232 :                                 break;
    3595             :                         }
    3596             :                 }
    3597             : 
    3598         232 :                 assert(*clone_entry != NULL);
    3599             :         }
    3600             : }
    3601             : 
    3602             : static int
    3603         796 : bs_channel_create(void *io_device, void *ctx_buf)
    3604             : {
    3605         796 :         struct spdk_blob_store          *bs = io_device;
    3606         796 :         struct spdk_bs_channel          *channel = ctx_buf;
    3607             :         struct spdk_bs_dev              *dev;
    3608         796 :         uint32_t                        max_ops = bs->max_channel_ops;
    3609             :         uint32_t                        i;
    3610             : 
    3611         796 :         dev = bs->dev;
    3612             : 
    3613         796 :         channel->req_mem = calloc(max_ops, sizeof(struct spdk_bs_request_set));
    3614         796 :         if (!channel->req_mem) {
    3615           0 :                 return -1;
    3616             :         }
    3617             : 
    3618         796 :         TAILQ_INIT(&channel->reqs);
    3619             : 
    3620      408348 :         for (i = 0; i < max_ops; i++) {
    3621      407552 :                 TAILQ_INSERT_TAIL(&channel->reqs, &channel->req_mem[i], link);
    3622             :         }
    3623             : 
    3624         796 :         channel->bs = bs;
    3625         796 :         channel->dev = dev;
    3626         796 :         channel->dev_channel = dev->create_channel(dev);
    3627             : 
    3628         796 :         if (!channel->dev_channel) {
    3629           0 :                 SPDK_ERRLOG("Failed to create device channel.\n");
    3630           0 :                 free(channel->req_mem);
    3631           0 :                 return -1;
    3632             :         }
    3633             : 
    3634         796 :         channel->new_cluster_page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0, NULL, SPDK_ENV_SOCKET_ID_ANY,
    3635             :                                     SPDK_MALLOC_DMA);
    3636         796 :         if (!channel->new_cluster_page) {
    3637           0 :                 SPDK_ERRLOG("Failed to allocate new cluster page\n");
    3638           0 :                 free(channel->req_mem);
    3639           0 :                 channel->dev->destroy_channel(channel->dev, channel->dev_channel);
    3640           0 :                 return -1;
    3641             :         }
    3642             : 
    3643         796 :         TAILQ_INIT(&channel->need_cluster_alloc);
    3644         796 :         TAILQ_INIT(&channel->queued_io);
    3645         796 :         RB_INIT(&channel->esnap_channels);
    3646             : 
    3647         796 :         return 0;
    3648             : }
    3649             : 
    3650             : static void
    3651         796 : bs_channel_destroy(void *io_device, void *ctx_buf)
    3652             : {
    3653         796 :         struct spdk_bs_channel *channel = ctx_buf;
    3654             :         spdk_bs_user_op_t *op;
    3655             : 
    3656         796 :         while (!TAILQ_EMPTY(&channel->need_cluster_alloc)) {
    3657           0 :                 op = TAILQ_FIRST(&channel->need_cluster_alloc);
    3658           0 :                 TAILQ_REMOVE(&channel->need_cluster_alloc, op, link);
    3659           0 :                 bs_user_op_abort(op, -EIO);
    3660             :         }
    3661             : 
    3662         796 :         while (!TAILQ_EMPTY(&channel->queued_io)) {
    3663           0 :                 op = TAILQ_FIRST(&channel->queued_io);
    3664           0 :                 TAILQ_REMOVE(&channel->queued_io, op, link);
    3665           0 :                 bs_user_op_abort(op, -EIO);
    3666             :         }
    3667             : 
    3668         796 :         blob_esnap_destroy_bs_channel(channel);
    3669             : 
    3670         796 :         free(channel->req_mem);
    3671         796 :         spdk_free(channel->new_cluster_page);
    3672         796 :         channel->dev->destroy_channel(channel->dev, channel->dev_channel);
    3673         796 : }
    3674             : 
    3675             : static void
    3676         780 : bs_dev_destroy(void *io_device)
    3677             : {
    3678         780 :         struct spdk_blob_store *bs = io_device;
    3679             :         struct spdk_blob        *blob, *blob_tmp;
    3680             : 
    3681         780 :         bs->dev->destroy(bs->dev);
    3682             : 
    3683         780 :         RB_FOREACH_SAFE(blob, spdk_blob_tree, &bs->open_blobs, blob_tmp) {
    3684           0 :                 RB_REMOVE(spdk_blob_tree, &bs->open_blobs, blob);
    3685           0 :                 spdk_bit_array_clear(bs->open_blobids, blob->id);
    3686           0 :                 blob_free(blob);
    3687             :         }
    3688             : 
    3689         780 :         spdk_spin_destroy(&bs->used_lock);
    3690             : 
    3691         780 :         spdk_bit_array_free(&bs->open_blobids);
    3692         780 :         spdk_bit_array_free(&bs->used_blobids);
    3693         780 :         spdk_bit_array_free(&bs->used_md_pages);
    3694         780 :         spdk_bit_pool_free(&bs->used_clusters);
    3695             :         /*
    3696             :          * If this function is called for any reason except a successful unload,
    3697             :          * the unload_cpl type will be NONE and this will be a nop.
    3698             :          */
    3699         780 :         bs_call_cpl(&bs->unload_cpl, bs->unload_err);
    3700             : 
    3701         780 :         free(bs);
    3702         780 : }
    3703             : 
    3704             : static int
    3705         908 : bs_blob_list_add(struct spdk_blob *blob)
    3706             : {
    3707             :         spdk_blob_id snapshot_id;
    3708         908 :         struct spdk_blob_list *snapshot_entry = NULL;
    3709         908 :         struct spdk_blob_list *clone_entry = NULL;
    3710             : 
    3711         908 :         assert(blob != NULL);
    3712             : 
    3713         908 :         snapshot_id = blob->parent_id;
    3714         908 :         if (snapshot_id == SPDK_BLOBID_INVALID ||
    3715             :             snapshot_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    3716         492 :                 return 0;
    3717             :         }
    3718             : 
    3719         416 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, snapshot_id);
    3720         416 :         if (snapshot_entry == NULL) {
    3721             :                 /* Snapshot not found */
    3722         288 :                 snapshot_entry = calloc(1, sizeof(struct spdk_blob_list));
    3723         288 :                 if (snapshot_entry == NULL) {
    3724           0 :                         return -ENOMEM;
    3725             :                 }
    3726         288 :                 snapshot_entry->id = snapshot_id;
    3727         288 :                 TAILQ_INIT(&snapshot_entry->clones);
    3728         288 :                 TAILQ_INSERT_TAIL(&blob->bs->snapshots, snapshot_entry, link);
    3729             :         } else {
    3730         204 :                 TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    3731          76 :                         if (clone_entry->id == blob->id) {
    3732           0 :                                 break;
    3733             :                         }
    3734             :                 }
    3735             :         }
    3736             : 
    3737         416 :         if (clone_entry == NULL) {
    3738             :                 /* Clone not found */
    3739         416 :                 clone_entry = calloc(1, sizeof(struct spdk_blob_list));
    3740         416 :                 if (clone_entry == NULL) {
    3741           0 :                         return -ENOMEM;
    3742             :                 }
    3743         416 :                 clone_entry->id = blob->id;
    3744         416 :                 TAILQ_INIT(&clone_entry->clones);
    3745         416 :                 TAILQ_INSERT_TAIL(&snapshot_entry->clones, clone_entry, link);
    3746         416 :                 snapshot_entry->clone_count++;
    3747             :         }
    3748             : 
    3749         416 :         return 0;
    3750             : }
    3751             : 
    3752             : static void
    3753        1732 : bs_blob_list_remove(struct spdk_blob *blob)
    3754             : {
    3755        1732 :         struct spdk_blob_list *snapshot_entry = NULL;
    3756        1732 :         struct spdk_blob_list *clone_entry = NULL;
    3757             : 
    3758        1732 :         blob_get_snapshot_and_clone_entries(blob, &snapshot_entry, &clone_entry);
    3759             : 
    3760        1732 :         if (snapshot_entry == NULL) {
    3761        1516 :                 return;
    3762             :         }
    3763             : 
    3764         216 :         blob->parent_id = SPDK_BLOBID_INVALID;
    3765         216 :         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    3766         216 :         free(clone_entry);
    3767             : 
    3768         216 :         snapshot_entry->clone_count--;
    3769             : }
    3770             : 
    3771             : static int
    3772         780 : bs_blob_list_free(struct spdk_blob_store *bs)
    3773             : {
    3774             :         struct spdk_blob_list *snapshot_entry;
    3775             :         struct spdk_blob_list *snapshot_entry_tmp;
    3776             :         struct spdk_blob_list *clone_entry;
    3777             :         struct spdk_blob_list *clone_entry_tmp;
    3778             : 
    3779         924 :         TAILQ_FOREACH_SAFE(snapshot_entry, &bs->snapshots, link, snapshot_entry_tmp) {
    3780         296 :                 TAILQ_FOREACH_SAFE(clone_entry, &snapshot_entry->clones, link, clone_entry_tmp) {
    3781         152 :                         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    3782         152 :                         free(clone_entry);
    3783             :                 }
    3784         144 :                 TAILQ_REMOVE(&bs->snapshots, snapshot_entry, link);
    3785         144 :                 free(snapshot_entry);
    3786             :         }
    3787             : 
    3788         780 :         return 0;
    3789             : }
    3790             : 
    3791             : static void
    3792         780 : bs_free(struct spdk_blob_store *bs)
    3793             : {
    3794         780 :         bs_blob_list_free(bs);
    3795             : 
    3796         780 :         bs_unregister_md_thread(bs);
    3797         780 :         spdk_io_device_unregister(bs, bs_dev_destroy);
    3798         780 : }
    3799             : 
    3800             : void
    3801        1048 : spdk_bs_opts_init(struct spdk_bs_opts *opts, size_t opts_size)
    3802             : {
    3803             : 
    3804        1048 :         if (!opts) {
    3805           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
    3806           0 :                 return;
    3807             :         }
    3808             : 
    3809        1048 :         if (!opts_size) {
    3810           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
    3811           0 :                 return;
    3812             :         }
    3813             : 
    3814        1048 :         memset(opts, 0, opts_size);
    3815        1048 :         opts->opts_size = opts_size;
    3816             : 
    3817             : #define FIELD_OK(field) \
    3818             :         offsetof(struct spdk_bs_opts, field) + sizeof(opts->field) <= opts_size
    3819             : 
    3820             : #define SET_FIELD(field, value) \
    3821             :         if (FIELD_OK(field)) { \
    3822             :                 opts->field = value; \
    3823             :         } \
    3824             : 
    3825        1048 :         SET_FIELD(cluster_sz, SPDK_BLOB_OPTS_CLUSTER_SZ);
    3826        1048 :         SET_FIELD(num_md_pages, SPDK_BLOB_OPTS_NUM_MD_PAGES);
    3827        1048 :         SET_FIELD(max_md_ops, SPDK_BLOB_OPTS_NUM_MD_PAGES);
    3828        1048 :         SET_FIELD(max_channel_ops, SPDK_BLOB_OPTS_DEFAULT_CHANNEL_OPS);
    3829        1048 :         SET_FIELD(clear_method,  BS_CLEAR_WITH_UNMAP);
    3830             : 
    3831        1048 :         if (FIELD_OK(bstype)) {
    3832        1048 :                 memset(&opts->bstype, 0, sizeof(opts->bstype));
    3833             :         }
    3834             : 
    3835        1048 :         SET_FIELD(iter_cb_fn, NULL);
    3836        1048 :         SET_FIELD(iter_cb_arg, NULL);
    3837        1048 :         SET_FIELD(force_recover, false);
    3838        1048 :         SET_FIELD(esnap_bs_dev_create, NULL);
    3839        1048 :         SET_FIELD(esnap_ctx, NULL);
    3840             : 
    3841             : #undef FIELD_OK
    3842             : #undef SET_FIELD
    3843             : }
    3844             : 
    3845             : static int
    3846         484 : bs_opts_verify(struct spdk_bs_opts *opts)
    3847             : {
    3848         484 :         if (opts->cluster_sz == 0 || opts->num_md_pages == 0 || opts->max_md_ops == 0 ||
    3849         480 :             opts->max_channel_ops == 0) {
    3850           4 :                 SPDK_ERRLOG("Blobstore options cannot be set to 0\n");
    3851           4 :                 return -1;
    3852             :         }
    3853             : 
    3854         480 :         return 0;
    3855             : }
    3856             : 
    3857             : /* START spdk_bs_load */
    3858             : 
    3859             : /* spdk_bs_load_ctx is used for init, load, unload and dump code paths. */
    3860             : 
    3861             : struct spdk_bs_load_ctx {
    3862             :         struct spdk_blob_store          *bs;
    3863             :         struct spdk_bs_super_block      *super;
    3864             : 
    3865             :         struct spdk_bs_md_mask          *mask;
    3866             :         bool                            in_page_chain;
    3867             :         uint32_t                        page_index;
    3868             :         uint32_t                        cur_page;
    3869             :         struct spdk_blob_md_page        *page;
    3870             : 
    3871             :         uint64_t                        num_extent_pages;
    3872             :         uint32_t                        *extent_page_num;
    3873             :         struct spdk_blob_md_page        *extent_pages;
    3874             :         struct spdk_bit_array           *used_clusters;
    3875             : 
    3876             :         spdk_bs_sequence_t                      *seq;
    3877             :         spdk_blob_op_with_handle_complete       iter_cb_fn;
    3878             :         void                                    *iter_cb_arg;
    3879             :         struct spdk_blob                        *blob;
    3880             :         spdk_blob_id                            blobid;
    3881             : 
    3882             :         bool                                    force_recover;
    3883             : 
    3884             :         /* These fields are used in the spdk_bs_dump path. */
    3885             :         bool                                    dumping;
    3886             :         FILE                                    *fp;
    3887             :         spdk_bs_dump_print_xattr                print_xattr_fn;
    3888             :         char                                    xattr_name[4096];
    3889             : };
    3890             : 
    3891             : static int
    3892         784 : bs_alloc(struct spdk_bs_dev *dev, struct spdk_bs_opts *opts, struct spdk_blob_store **_bs,
    3893             :          struct spdk_bs_load_ctx **_ctx)
    3894             : {
    3895             :         struct spdk_blob_store  *bs;
    3896             :         struct spdk_bs_load_ctx *ctx;
    3897             :         uint64_t dev_size;
    3898             :         int rc;
    3899             : 
    3900         784 :         dev_size = dev->blocklen * dev->blockcnt;
    3901         784 :         if (dev_size < opts->cluster_sz) {
    3902             :                 /* Device size cannot be smaller than cluster size of blobstore */
    3903           0 :                 SPDK_INFOLOG(blob, "Device size %" PRIu64 " is smaller than cluster size %" PRIu32 "\n",
    3904             :                              dev_size, opts->cluster_sz);
    3905           0 :                 return -ENOSPC;
    3906             :         }
    3907         784 :         if (opts->cluster_sz < SPDK_BS_PAGE_SIZE) {
    3908             :                 /* Cluster size cannot be smaller than page size */
    3909           4 :                 SPDK_ERRLOG("Cluster size %" PRIu32 " is smaller than page size %d\n",
    3910             :                             opts->cluster_sz, SPDK_BS_PAGE_SIZE);
    3911           4 :                 return -EINVAL;
    3912             :         }
    3913         780 :         bs = calloc(1, sizeof(struct spdk_blob_store));
    3914         780 :         if (!bs) {
    3915           0 :                 return -ENOMEM;
    3916             :         }
    3917             : 
    3918         780 :         ctx = calloc(1, sizeof(struct spdk_bs_load_ctx));
    3919         780 :         if (!ctx) {
    3920           0 :                 free(bs);
    3921           0 :                 return -ENOMEM;
    3922             :         }
    3923             : 
    3924         780 :         ctx->bs = bs;
    3925         780 :         ctx->iter_cb_fn = opts->iter_cb_fn;
    3926         780 :         ctx->iter_cb_arg = opts->iter_cb_arg;
    3927         780 :         ctx->force_recover = opts->force_recover;
    3928             : 
    3929         780 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    3930             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    3931         780 :         if (!ctx->super) {
    3932           0 :                 free(ctx);
    3933           0 :                 free(bs);
    3934           0 :                 return -ENOMEM;
    3935             :         }
    3936             : 
    3937         780 :         RB_INIT(&bs->open_blobs);
    3938         780 :         TAILQ_INIT(&bs->snapshots);
    3939         780 :         bs->dev = dev;
    3940         780 :         bs->md_thread = spdk_get_thread();
    3941         780 :         assert(bs->md_thread != NULL);
    3942             : 
    3943             :         /*
    3944             :          * Do not use bs_lba_to_cluster() here since blockcnt may not be an
    3945             :          *  even multiple of the cluster size.
    3946             :          */
    3947         780 :         bs->cluster_sz = opts->cluster_sz;
    3948         780 :         bs->total_clusters = dev->blockcnt / (bs->cluster_sz / dev->blocklen);
    3949         780 :         ctx->used_clusters = spdk_bit_array_create(bs->total_clusters);
    3950         780 :         if (!ctx->used_clusters) {
    3951           0 :                 spdk_free(ctx->super);
    3952           0 :                 free(ctx);
    3953           0 :                 free(bs);
    3954           0 :                 return -ENOMEM;
    3955             :         }
    3956             : 
    3957         780 :         bs->pages_per_cluster = bs->cluster_sz / SPDK_BS_PAGE_SIZE;
    3958         780 :         if (spdk_u32_is_pow2(bs->pages_per_cluster)) {
    3959         780 :                 bs->pages_per_cluster_shift = spdk_u32log2(bs->pages_per_cluster);
    3960             :         }
    3961         780 :         bs->num_free_clusters = bs->total_clusters;
    3962         780 :         bs->io_unit_size = dev->blocklen;
    3963             : 
    3964         780 :         bs->max_channel_ops = opts->max_channel_ops;
    3965         780 :         bs->super_blob = SPDK_BLOBID_INVALID;
    3966         780 :         memcpy(&bs->bstype, &opts->bstype, sizeof(opts->bstype));
    3967         780 :         bs->esnap_bs_dev_create = opts->esnap_bs_dev_create;
    3968         780 :         bs->esnap_ctx = opts->esnap_ctx;
    3969             : 
    3970             :         /* The metadata is assumed to be at least 1 page */
    3971         780 :         bs->used_md_pages = spdk_bit_array_create(1);
    3972         780 :         bs->used_blobids = spdk_bit_array_create(0);
    3973         780 :         bs->open_blobids = spdk_bit_array_create(0);
    3974             : 
    3975         780 :         spdk_spin_init(&bs->used_lock);
    3976             : 
    3977         780 :         spdk_io_device_register(bs, bs_channel_create, bs_channel_destroy,
    3978             :                                 sizeof(struct spdk_bs_channel), "blobstore");
    3979         780 :         rc = bs_register_md_thread(bs);
    3980         780 :         if (rc == -1) {
    3981           0 :                 spdk_io_device_unregister(bs, NULL);
    3982           0 :                 spdk_spin_destroy(&bs->used_lock);
    3983           0 :                 spdk_bit_array_free(&bs->open_blobids);
    3984           0 :                 spdk_bit_array_free(&bs->used_blobids);
    3985           0 :                 spdk_bit_array_free(&bs->used_md_pages);
    3986           0 :                 spdk_bit_array_free(&ctx->used_clusters);
    3987           0 :                 spdk_free(ctx->super);
    3988           0 :                 free(ctx);
    3989           0 :                 free(bs);
    3990             :                 /* FIXME: this is a lie but don't know how to get a proper error code here */
    3991           0 :                 return -ENOMEM;
    3992             :         }
    3993             : 
    3994         780 :         *_ctx = ctx;
    3995         780 :         *_bs = bs;
    3996         780 :         return 0;
    3997             : }
    3998             : 
    3999             : static void
    4000          24 : bs_load_ctx_fail(struct spdk_bs_load_ctx *ctx, int bserrno)
    4001             : {
    4002          24 :         assert(bserrno != 0);
    4003             : 
    4004          24 :         spdk_free(ctx->super);
    4005          24 :         bs_sequence_finish(ctx->seq, bserrno);
    4006          24 :         bs_free(ctx->bs);
    4007          24 :         spdk_bit_array_free(&ctx->used_clusters);
    4008          24 :         free(ctx);
    4009          24 : }
    4010             : 
    4011             : static void
    4012         824 : bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    4013             :                struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    4014             : {
    4015             :         /* Update the values in the super block */
    4016         824 :         super->super_blob = bs->super_blob;
    4017         824 :         memcpy(&super->bstype, &bs->bstype, sizeof(bs->bstype));
    4018         824 :         super->crc = blob_md_page_calc_crc(super);
    4019         824 :         bs_sequence_write_dev(seq, super, bs_page_to_lba(bs, 0),
    4020         824 :                               bs_byte_to_lba(bs, sizeof(*super)),
    4021             :                               cb_fn, cb_arg);
    4022         824 : }
    4023             : 
    4024             : static void
    4025         760 : bs_write_used_clusters(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    4026             : {
    4027         760 :         struct spdk_bs_load_ctx *ctx = arg;
    4028             :         uint64_t        mask_size, lba, lba_count;
    4029             : 
    4030             :         /* Write out the used clusters mask */
    4031         760 :         mask_size = ctx->super->used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
    4032         760 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    4033             :                                  SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    4034         760 :         if (!ctx->mask) {
    4035           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4036           0 :                 return;
    4037             :         }
    4038             : 
    4039         760 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_CLUSTERS;
    4040         760 :         ctx->mask->length = ctx->bs->total_clusters;
    4041             :         /* We could get here through the normal unload path, or through dirty
    4042             :          * shutdown recovery.  For the normal unload path, we use the mask from
    4043             :          * the bit pool.  For dirty shutdown recovery, we don't have a bit pool yet -
    4044             :          * only the bit array from the load ctx.
    4045             :          */
    4046         760 :         if (ctx->bs->used_clusters) {
    4047         654 :                 assert(ctx->mask->length == spdk_bit_pool_capacity(ctx->bs->used_clusters));
    4048         654 :                 spdk_bit_pool_store_mask(ctx->bs->used_clusters, ctx->mask->mask);
    4049             :         } else {
    4050         106 :                 assert(ctx->mask->length == spdk_bit_array_capacity(ctx->used_clusters));
    4051         106 :                 spdk_bit_array_store_mask(ctx->used_clusters, ctx->mask->mask);
    4052             :         }
    4053         760 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    4054         760 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    4055         760 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    4056             : }
    4057             : 
    4058             : static void
    4059         760 : bs_write_used_md(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    4060             : {
    4061         760 :         struct spdk_bs_load_ctx *ctx = arg;
    4062             :         uint64_t        mask_size, lba, lba_count;
    4063             : 
    4064         760 :         mask_size = ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE;
    4065         760 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    4066             :                                  SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    4067         760 :         if (!ctx->mask) {
    4068           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4069           0 :                 return;
    4070             :         }
    4071             : 
    4072         760 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_PAGES;
    4073         760 :         ctx->mask->length = ctx->super->md_len;
    4074         760 :         assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_md_pages));
    4075             : 
    4076         760 :         spdk_bit_array_store_mask(ctx->bs->used_md_pages, ctx->mask->mask);
    4077         760 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
    4078         760 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
    4079         760 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    4080             : }
    4081             : 
    4082             : static void
    4083         760 : bs_write_used_blobids(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    4084             : {
    4085         760 :         struct spdk_bs_load_ctx *ctx = arg;
    4086             :         uint64_t        mask_size, lba, lba_count;
    4087             : 
    4088         760 :         if (ctx->super->used_blobid_mask_len == 0) {
    4089             :                 /*
    4090             :                  * This is a pre-v3 on-disk format where the blobid mask does not get
    4091             :                  *  written to disk.
    4092             :                  */
    4093          24 :                 cb_fn(seq, arg, 0);
    4094          24 :                 return;
    4095             :         }
    4096             : 
    4097         736 :         mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE;
    4098         736 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
    4099             :                                  SPDK_MALLOC_DMA);
    4100         736 :         if (!ctx->mask) {
    4101           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4102           0 :                 return;
    4103             :         }
    4104             : 
    4105         736 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_BLOBIDS;
    4106         736 :         ctx->mask->length = ctx->super->md_len;
    4107         736 :         assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_blobids));
    4108             : 
    4109         736 :         spdk_bit_array_store_mask(ctx->bs->used_blobids, ctx->mask->mask);
    4110         736 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
    4111         736 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
    4112         736 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    4113             : }
    4114             : 
    4115             : static void
    4116         704 : blob_set_thin_provision(struct spdk_blob *blob)
    4117             : {
    4118         704 :         blob_verify_md_op(blob);
    4119         704 :         blob->invalid_flags |= SPDK_BLOB_THIN_PROV;
    4120         704 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    4121         704 : }
    4122             : 
    4123             : static void
    4124        2094 : blob_set_clear_method(struct spdk_blob *blob, enum blob_clear_method clear_method)
    4125             : {
    4126        2094 :         blob_verify_md_op(blob);
    4127        2094 :         blob->clear_method = clear_method;
    4128        2094 :         blob->md_ro_flags |= (clear_method << SPDK_BLOB_CLEAR_METHOD_SHIFT);
    4129        2094 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    4130        2094 : }
    4131             : 
    4132             : static void bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno);
    4133             : 
    4134             : static void
    4135          24 : bs_delete_corrupted_blob_cpl(void *cb_arg, int bserrno)
    4136             : {
    4137          24 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4138             :         spdk_blob_id id;
    4139             :         int64_t page_num;
    4140             : 
    4141             :         /* Iterate to next blob (we can't use spdk_bs_iter_next function as our
    4142             :          * last blob has been removed */
    4143          24 :         page_num = bs_blobid_to_page(ctx->blobid);
    4144          24 :         page_num++;
    4145          24 :         page_num = spdk_bit_array_find_first_set(ctx->bs->used_blobids, page_num);
    4146          24 :         if (page_num >= spdk_bit_array_capacity(ctx->bs->used_blobids)) {
    4147          24 :                 bs_load_iter(ctx, NULL, -ENOENT);
    4148          24 :                 return;
    4149             :         }
    4150             : 
    4151           0 :         id = bs_page_to_blobid(page_num);
    4152             : 
    4153           0 :         spdk_bs_open_blob(ctx->bs, id, bs_load_iter, ctx);
    4154             : }
    4155             : 
    4156             : static void
    4157          24 : bs_delete_corrupted_close_cb(void *cb_arg, int bserrno)
    4158             : {
    4159          24 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4160             : 
    4161          24 :         if (bserrno != 0) {
    4162           0 :                 SPDK_ERRLOG("Failed to close corrupted blob\n");
    4163           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4164           0 :                 return;
    4165             :         }
    4166             : 
    4167          24 :         spdk_bs_delete_blob(ctx->bs, ctx->blobid, bs_delete_corrupted_blob_cpl, ctx);
    4168             : }
    4169             : 
    4170             : static void
    4171          24 : bs_delete_corrupted_blob(void *cb_arg, int bserrno)
    4172             : {
    4173          24 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4174             :         uint64_t i;
    4175             : 
    4176          24 :         if (bserrno != 0) {
    4177           0 :                 SPDK_ERRLOG("Failed to close clone of a corrupted blob\n");
    4178           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4179           0 :                 return;
    4180             :         }
    4181             : 
    4182             :         /* Snapshot and clone have the same copy of cluster map and extent pages
    4183             :          * at this point. Let's clear both for snapshot now,
    4184             :          * so that it won't be cleared for clone later when we remove snapshot.
    4185             :          * Also set thin provision to pass data corruption check */
    4186         264 :         for (i = 0; i < ctx->blob->active.num_clusters; i++) {
    4187         240 :                 ctx->blob->active.clusters[i] = 0;
    4188             :         }
    4189          36 :         for (i = 0; i < ctx->blob->active.num_extent_pages; i++) {
    4190          12 :                 ctx->blob->active.extent_pages[i] = 0;
    4191             :         }
    4192             : 
    4193          24 :         ctx->blob->active.num_allocated_clusters = 0;
    4194             : 
    4195          24 :         ctx->blob->md_ro = false;
    4196             : 
    4197          24 :         blob_set_thin_provision(ctx->blob);
    4198             : 
    4199          24 :         ctx->blobid = ctx->blob->id;
    4200             : 
    4201          24 :         spdk_blob_close(ctx->blob, bs_delete_corrupted_close_cb, ctx);
    4202             : }
    4203             : 
    4204             : static void
    4205          12 : bs_update_corrupted_blob(void *cb_arg, int bserrno)
    4206             : {
    4207          12 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4208             : 
    4209          12 :         if (bserrno != 0) {
    4210           0 :                 SPDK_ERRLOG("Failed to close clone of a corrupted blob\n");
    4211           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4212           0 :                 return;
    4213             :         }
    4214             : 
    4215          12 :         ctx->blob->md_ro = false;
    4216          12 :         blob_remove_xattr(ctx->blob, SNAPSHOT_PENDING_REMOVAL, true);
    4217          12 :         blob_remove_xattr(ctx->blob, SNAPSHOT_IN_PROGRESS, true);
    4218          12 :         spdk_blob_set_read_only(ctx->blob);
    4219             : 
    4220          12 :         if (ctx->iter_cb_fn) {
    4221           0 :                 ctx->iter_cb_fn(ctx->iter_cb_arg, ctx->blob, 0);
    4222             :         }
    4223          12 :         bs_blob_list_add(ctx->blob);
    4224             : 
    4225          12 :         spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4226             : }
    4227             : 
    4228             : static void
    4229          36 : bs_examine_clone(void *cb_arg, struct spdk_blob *blob, int bserrno)
    4230             : {
    4231          36 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4232             : 
    4233          36 :         if (bserrno != 0) {
    4234           0 :                 SPDK_ERRLOG("Failed to open clone of a corrupted blob\n");
    4235           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4236           0 :                 return;
    4237             :         }
    4238             : 
    4239          36 :         if (blob->parent_id == ctx->blob->id) {
    4240             :                 /* Power failure occurred before updating clone (snapshot delete case)
    4241             :                  * or after updating clone (creating snapshot case) - keep snapshot */
    4242          12 :                 spdk_blob_close(blob, bs_update_corrupted_blob, ctx);
    4243             :         } else {
    4244             :                 /* Power failure occurred after updating clone (snapshot delete case)
    4245             :                  * or before updating clone (creating snapshot case) - remove snapshot */
    4246          24 :                 spdk_blob_close(blob, bs_delete_corrupted_blob, ctx);
    4247             :         }
    4248             : }
    4249             : 
    4250             : static void
    4251         720 : bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno)
    4252             : {
    4253         720 :         struct spdk_bs_load_ctx *ctx = arg;
    4254         720 :         const void *value;
    4255         720 :         size_t len;
    4256         720 :         int rc = 0;
    4257             : 
    4258         720 :         if (bserrno == 0) {
    4259             :                 /* Examine blob if it is corrupted after power failure. Fix
    4260             :                  * the ones that can be fixed and remove any other corrupted
    4261             :                  * ones. If it is not corrupted just process it */
    4262         440 :                 rc = blob_get_xattr_value(blob, SNAPSHOT_PENDING_REMOVAL, &value, &len, true);
    4263         440 :                 if (rc != 0) {
    4264         420 :                         rc = blob_get_xattr_value(blob, SNAPSHOT_IN_PROGRESS, &value, &len, true);
    4265         420 :                         if (rc != 0) {
    4266             :                                 /* Not corrupted - process it and continue with iterating through blobs */
    4267         404 :                                 if (ctx->iter_cb_fn) {
    4268          34 :                                         ctx->iter_cb_fn(ctx->iter_cb_arg, blob, 0);
    4269             :                                 }
    4270         404 :                                 bs_blob_list_add(blob);
    4271         404 :                                 spdk_bs_iter_next(ctx->bs, blob, bs_load_iter, ctx);
    4272         404 :                                 return;
    4273             :                         }
    4274             : 
    4275             :                 }
    4276             : 
    4277          36 :                 assert(len == sizeof(spdk_blob_id));
    4278             : 
    4279          36 :                 ctx->blob = blob;
    4280             : 
    4281             :                 /* Open clone to check if we are able to fix this blob or should we remove it */
    4282          36 :                 spdk_bs_open_blob(ctx->bs, *(spdk_blob_id *)value, bs_examine_clone, ctx);
    4283          36 :                 return;
    4284         280 :         } else if (bserrno == -ENOENT) {
    4285         280 :                 bserrno = 0;
    4286             :         } else {
    4287             :                 /*
    4288             :                  * This case needs to be looked at further.  Same problem
    4289             :                  *  exists with applications that rely on explicit blob
    4290             :                  *  iteration.  We should just skip the blob that failed
    4291             :                  *  to load and continue on to the next one.
    4292             :                  */
    4293           0 :                 SPDK_ERRLOG("Error in iterating blobs\n");
    4294             :         }
    4295             : 
    4296         280 :         ctx->iter_cb_fn = NULL;
    4297             : 
    4298         280 :         spdk_free(ctx->super);
    4299         280 :         spdk_free(ctx->mask);
    4300         280 :         bs_sequence_finish(ctx->seq, bserrno);
    4301         280 :         free(ctx);
    4302             : }
    4303             : 
    4304             : static void bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg);
    4305             : 
    4306             : static void
    4307         280 : bs_load_complete(struct spdk_bs_load_ctx *ctx)
    4308             : {
    4309         280 :         ctx->bs->used_clusters = spdk_bit_pool_create_from_array(ctx->used_clusters);
    4310         280 :         if (ctx->dumping) {
    4311           0 :                 bs_dump_read_md_page(ctx->seq, ctx);
    4312           0 :                 return;
    4313             :         }
    4314         280 :         spdk_bs_iter_first(ctx->bs, bs_load_iter, ctx);
    4315             : }
    4316             : 
    4317             : static void
    4318         174 : bs_load_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4319             : {
    4320         174 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4321             :         int rc;
    4322             : 
    4323             :         /* The type must be correct */
    4324         174 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_BLOBIDS);
    4325             : 
    4326             :         /* The length of the mask (in bits) must not be greater than
    4327             :          * the length of the buffer (converted to bits) */
    4328         174 :         assert(ctx->mask->length <= (ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE * 8));
    4329             : 
    4330             :         /* The length of the mask must be exactly equal to the size
    4331             :          * (in pages) of the metadata region */
    4332         174 :         assert(ctx->mask->length == ctx->super->md_len);
    4333             : 
    4334         174 :         rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->mask->length);
    4335         174 :         if (rc < 0) {
    4336           0 :                 spdk_free(ctx->mask);
    4337           0 :                 bs_load_ctx_fail(ctx, rc);
    4338           0 :                 return;
    4339             :         }
    4340             : 
    4341         174 :         spdk_bit_array_load_mask(ctx->bs->used_blobids, ctx->mask->mask);
    4342         174 :         bs_load_complete(ctx);
    4343             : }
    4344             : 
    4345             : static void
    4346         174 : bs_load_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4347             : {
    4348         174 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4349             :         uint64_t                lba, lba_count, mask_size;
    4350             :         int                     rc;
    4351             : 
    4352         174 :         if (bserrno != 0) {
    4353           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4354           0 :                 return;
    4355             :         }
    4356             : 
    4357             :         /* The type must be correct */
    4358         174 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS);
    4359             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    4360         174 :         assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof(
    4361             :                                              struct spdk_blob_md_page) * 8));
    4362             :         /*
    4363             :          * The length of the mask must be equal to or larger than the total number of clusters. It may be
    4364             :          * larger than the total number of clusters due to a failure spdk_bs_grow.
    4365             :          */
    4366         174 :         assert(ctx->mask->length >= ctx->bs->total_clusters);
    4367         174 :         if (ctx->mask->length > ctx->bs->total_clusters) {
    4368           4 :                 SPDK_WARNLOG("Shrink the used_custers mask length to total_clusters");
    4369           4 :                 ctx->mask->length = ctx->bs->total_clusters;
    4370             :         }
    4371             : 
    4372         174 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->mask->length);
    4373         174 :         if (rc < 0) {
    4374           0 :                 spdk_free(ctx->mask);
    4375           0 :                 bs_load_ctx_fail(ctx, rc);
    4376           0 :                 return;
    4377             :         }
    4378             : 
    4379         174 :         spdk_bit_array_load_mask(ctx->used_clusters, ctx->mask->mask);
    4380         174 :         ctx->bs->num_free_clusters = spdk_bit_array_count_clear(ctx->used_clusters);
    4381         174 :         assert(ctx->bs->num_free_clusters <= ctx->bs->total_clusters);
    4382             : 
    4383         174 :         spdk_free(ctx->mask);
    4384             : 
    4385             :         /* Read the used blobids mask */
    4386         174 :         mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE;
    4387         174 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
    4388             :                                  SPDK_MALLOC_DMA);
    4389         174 :         if (!ctx->mask) {
    4390           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4391           0 :                 return;
    4392             :         }
    4393         174 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
    4394         174 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
    4395         174 :         bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
    4396             :                              bs_load_used_blobids_cpl, ctx);
    4397             : }
    4398             : 
    4399             : static void
    4400         174 : bs_load_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4401             : {
    4402         174 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4403             :         uint64_t                lba, lba_count, mask_size;
    4404             :         int                     rc;
    4405             : 
    4406         174 :         if (bserrno != 0) {
    4407           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4408           0 :                 return;
    4409             :         }
    4410             : 
    4411             :         /* The type must be correct */
    4412         174 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_PAGES);
    4413             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    4414         174 :         assert(ctx->mask->length <= (ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE *
    4415             :                                      8));
    4416             :         /* The length of the mask must be exactly equal to the size (in pages) of the metadata region */
    4417         174 :         if (ctx->mask->length != ctx->super->md_len) {
    4418           0 :                 SPDK_ERRLOG("mismatched md_len in used_pages mask: "
    4419             :                             "mask->length=%" PRIu32 " super->md_len=%" PRIu32 "\n",
    4420             :                             ctx->mask->length, ctx->super->md_len);
    4421           0 :                 assert(false);
    4422             :         }
    4423             : 
    4424         174 :         rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->mask->length);
    4425         174 :         if (rc < 0) {
    4426           0 :                 spdk_free(ctx->mask);
    4427           0 :                 bs_load_ctx_fail(ctx, rc);
    4428           0 :                 return;
    4429             :         }
    4430             : 
    4431         174 :         spdk_bit_array_load_mask(ctx->bs->used_md_pages, ctx->mask->mask);
    4432         174 :         spdk_free(ctx->mask);
    4433             : 
    4434             :         /* Read the used clusters mask */
    4435         174 :         mask_size = ctx->super->used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
    4436         174 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
    4437             :                                  SPDK_MALLOC_DMA);
    4438         174 :         if (!ctx->mask) {
    4439           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4440           0 :                 return;
    4441             :         }
    4442         174 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    4443         174 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    4444         174 :         bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
    4445             :                              bs_load_used_clusters_cpl, ctx);
    4446             : }
    4447             : 
    4448             : static void
    4449         174 : bs_load_read_used_pages(struct spdk_bs_load_ctx *ctx)
    4450             : {
    4451             :         uint64_t lba, lba_count, mask_size;
    4452             : 
    4453             :         /* Read the used pages mask */
    4454         174 :         mask_size = ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE;
    4455         174 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    4456             :                                  SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    4457         174 :         if (!ctx->mask) {
    4458           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4459           0 :                 return;
    4460             :         }
    4461             : 
    4462         174 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
    4463         174 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
    4464         174 :         bs_sequence_read_dev(ctx->seq, ctx->mask, lba, lba_count,
    4465             :                              bs_load_used_pages_cpl, ctx);
    4466             : }
    4467             : 
    4468             : static int
    4469         246 : bs_load_replay_md_parse_page(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_page *page)
    4470             : {
    4471         246 :         struct spdk_blob_store *bs = ctx->bs;
    4472             :         struct spdk_blob_md_descriptor *desc;
    4473         246 :         size_t  cur_desc = 0;
    4474             : 
    4475         246 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    4476         718 :         while (cur_desc < sizeof(page->descriptors)) {
    4477         718 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
    4478         226 :                         if (desc->length == 0) {
    4479             :                                 /* If padding and length are 0, this terminates the page */
    4480         226 :                                 break;
    4481             :                         }
    4482         492 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
    4483             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
    4484             :                         unsigned int                            i, j;
    4485          68 :                         unsigned int                            cluster_count = 0;
    4486             :                         uint32_t                                cluster_idx;
    4487             : 
    4488          68 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
    4489             : 
    4490         136 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
    4491         828 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
    4492         760 :                                         cluster_idx = desc_extent_rle->extents[i].cluster_idx;
    4493             :                                         /*
    4494             :                                          * cluster_idx = 0 means an unallocated cluster - don't mark that
    4495             :                                          * in the used cluster map.
    4496             :                                          */
    4497         760 :                                         if (cluster_idx != 0) {
    4498         540 :                                                 SPDK_NOTICELOG("Recover: cluster %" PRIu32 "\n", cluster_idx + j);
    4499         540 :                                                 spdk_bit_array_set(ctx->used_clusters, cluster_idx + j);
    4500         540 :                                                 if (bs->num_free_clusters == 0) {
    4501           0 :                                                         return -ENOSPC;
    4502             :                                                 }
    4503         540 :                                                 bs->num_free_clusters--;
    4504             :                                         }
    4505         760 :                                         cluster_count++;
    4506             :                                 }
    4507             :                         }
    4508          68 :                         if (cluster_count == 0) {
    4509           0 :                                 return -EINVAL;
    4510             :                         }
    4511         424 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    4512             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
    4513             :                         uint32_t                                        i;
    4514          52 :                         uint32_t                                        cluster_count = 0;
    4515             :                         uint32_t                                        cluster_idx;
    4516             :                         size_t                                          cluster_idx_length;
    4517             : 
    4518          52 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
    4519          52 :                         cluster_idx_length = desc_extent->length - sizeof(desc_extent->start_cluster_idx);
    4520             : 
    4521          52 :                         if (desc_extent->length <= sizeof(desc_extent->start_cluster_idx) ||
    4522          52 :                             (cluster_idx_length % sizeof(desc_extent->cluster_idx[0]) != 0)) {
    4523           0 :                                 return -EINVAL;
    4524             :                         }
    4525             : 
    4526         652 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
    4527         600 :                                 cluster_idx = desc_extent->cluster_idx[i];
    4528             :                                 /*
    4529             :                                  * cluster_idx = 0 means an unallocated cluster - don't mark that
    4530             :                                  * in the used cluster map.
    4531             :                                  */
    4532         600 :                                 if (cluster_idx != 0) {
    4533         600 :                                         if (cluster_idx < desc_extent->start_cluster_idx &&
    4534           0 :                                             cluster_idx >= desc_extent->start_cluster_idx + cluster_count) {
    4535           0 :                                                 return -EINVAL;
    4536             :                                         }
    4537         600 :                                         spdk_bit_array_set(ctx->used_clusters, cluster_idx);
    4538         600 :                                         if (bs->num_free_clusters == 0) {
    4539           0 :                                                 return -ENOSPC;
    4540             :                                         }
    4541         600 :                                         bs->num_free_clusters--;
    4542             :                                 }
    4543         600 :                                 cluster_count++;
    4544             :                         }
    4545             : 
    4546          52 :                         if (cluster_count == 0) {
    4547           0 :                                 return -EINVAL;
    4548             :                         }
    4549         372 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    4550             :                         /* Skip this item */
    4551         296 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    4552             :                         /* Skip this item */
    4553         236 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
    4554             :                         /* Skip this item */
    4555          82 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
    4556             :                         struct spdk_blob_md_descriptor_extent_table *desc_extent_table;
    4557          82 :                         uint32_t num_extent_pages = ctx->num_extent_pages;
    4558             :                         uint32_t i;
    4559             :                         size_t extent_pages_length;
    4560             :                         void *tmp;
    4561             : 
    4562          82 :                         desc_extent_table = (struct spdk_blob_md_descriptor_extent_table *)desc;
    4563          82 :                         extent_pages_length = desc_extent_table->length - sizeof(desc_extent_table->num_clusters);
    4564             : 
    4565          82 :                         if (desc_extent_table->length == 0 ||
    4566          82 :                             (extent_pages_length % sizeof(desc_extent_table->extent_page[0]) != 0)) {
    4567           0 :                                 return -EINVAL;
    4568             :                         }
    4569             : 
    4570         160 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
    4571          78 :                                 if (desc_extent_table->extent_page[i].page_idx != 0) {
    4572          52 :                                         if (desc_extent_table->extent_page[i].num_pages != 1) {
    4573           0 :                                                 return -EINVAL;
    4574             :                                         }
    4575          52 :                                         num_extent_pages += 1;
    4576             :                                 }
    4577             :                         }
    4578             : 
    4579          82 :                         if (num_extent_pages > 0) {
    4580          52 :                                 tmp = realloc(ctx->extent_page_num, num_extent_pages * sizeof(uint32_t));
    4581          52 :                                 if (tmp == NULL) {
    4582           0 :                                         return -ENOMEM;
    4583             :                                 }
    4584          52 :                                 ctx->extent_page_num = tmp;
    4585             : 
    4586             :                                 /* Extent table entries contain md page numbers for extent pages.
    4587             :                                  * Zeroes represent unallocated extent pages, those are run-length-encoded.
    4588             :                                  */
    4589         104 :                                 for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
    4590          52 :                                         if (desc_extent_table->extent_page[i].page_idx != 0) {
    4591          52 :                                                 ctx->extent_page_num[ctx->num_extent_pages] = desc_extent_table->extent_page[i].page_idx;
    4592          52 :                                                 ctx->num_extent_pages += 1;
    4593             :                                         }
    4594             :                                 }
    4595             :                         }
    4596             :                 } else {
    4597             :                         /* Error */
    4598           0 :                         return -EINVAL;
    4599             :                 }
    4600             :                 /* Advance to the next descriptor */
    4601         492 :                 cur_desc += sizeof(*desc) + desc->length;
    4602         492 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
    4603          20 :                         break;
    4604             :                 }
    4605         472 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
    4606             :         }
    4607         246 :         return 0;
    4608             : }
    4609             : 
    4610             : static bool
    4611        1296 : bs_load_cur_extent_page_valid(struct spdk_blob_md_page *page)
    4612             : {
    4613             :         uint32_t crc;
    4614        1296 :         struct spdk_blob_md_descriptor *desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    4615             :         size_t desc_len;
    4616             : 
    4617        1296 :         crc = blob_md_page_calc_crc(page);
    4618        1296 :         if (crc != page->crc) {
    4619           0 :                 return false;
    4620             :         }
    4621             : 
    4622             :         /* Extent page should always be of sequence num 0. */
    4623        1296 :         if (page->sequence_num != 0) {
    4624          44 :                 return false;
    4625             :         }
    4626             : 
    4627             :         /* Descriptor type must be EXTENT_PAGE. */
    4628        1252 :         if (desc->type != SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    4629         154 :                 return false;
    4630             :         }
    4631             : 
    4632             :         /* Descriptor length cannot exceed the page. */
    4633        1098 :         desc_len = sizeof(*desc) + desc->length;
    4634        1098 :         if (desc_len > sizeof(page->descriptors)) {
    4635           0 :                 return false;
    4636             :         }
    4637             : 
    4638             :         /* It has to be the only descriptor in the page. */
    4639        1098 :         if (desc_len + sizeof(*desc) <= sizeof(page->descriptors)) {
    4640        1098 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + desc_len);
    4641        1098 :                 if (desc->length != 0) {
    4642           0 :                         return false;
    4643             :                 }
    4644             :         }
    4645             : 
    4646        1098 :         return true;
    4647             : }
    4648             : 
    4649             : static bool
    4650        6754 : bs_load_cur_md_page_valid(struct spdk_bs_load_ctx *ctx)
    4651             : {
    4652             :         uint32_t crc;
    4653        6754 :         struct spdk_blob_md_page *page = ctx->page;
    4654             : 
    4655        6754 :         crc = blob_md_page_calc_crc(page);
    4656        6754 :         if (crc != page->crc) {
    4657        6538 :                 return false;
    4658             :         }
    4659             : 
    4660             :         /* First page of a sequence should match the blobid. */
    4661         216 :         if (page->sequence_num == 0 &&
    4662         172 :             bs_page_to_blobid(ctx->cur_page) != page->id) {
    4663          18 :                 return false;
    4664             :         }
    4665         198 :         assert(bs_load_cur_extent_page_valid(page) == false);
    4666             : 
    4667         198 :         return true;
    4668             : }
    4669             : 
    4670             : static void bs_load_replay_cur_md_page(struct spdk_bs_load_ctx *ctx);
    4671             : 
    4672             : static void
    4673         106 : bs_load_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4674             : {
    4675         106 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4676             : 
    4677         106 :         if (bserrno != 0) {
    4678           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4679           0 :                 return;
    4680             :         }
    4681             : 
    4682         106 :         bs_load_complete(ctx);
    4683             : }
    4684             : 
    4685             : static void
    4686         106 : bs_load_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4687             : {
    4688         106 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4689             : 
    4690         106 :         spdk_free(ctx->mask);
    4691         106 :         ctx->mask = NULL;
    4692             : 
    4693         106 :         if (bserrno != 0) {
    4694           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4695           0 :                 return;
    4696             :         }
    4697             : 
    4698         106 :         bs_write_used_clusters(seq, ctx, bs_load_write_used_clusters_cpl);
    4699             : }
    4700             : 
    4701             : static void
    4702         106 : bs_load_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4703             : {
    4704         106 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4705             : 
    4706         106 :         spdk_free(ctx->mask);
    4707         106 :         ctx->mask = NULL;
    4708             : 
    4709         106 :         if (bserrno != 0) {
    4710           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4711           0 :                 return;
    4712             :         }
    4713             : 
    4714         106 :         bs_write_used_blobids(seq, ctx, bs_load_write_used_blobids_cpl);
    4715             : }
    4716             : 
    4717             : static void
    4718         106 : bs_load_write_used_md(struct spdk_bs_load_ctx *ctx)
    4719             : {
    4720         106 :         bs_write_used_md(ctx->seq, ctx, bs_load_write_used_pages_cpl);
    4721         106 : }
    4722             : 
    4723             : static void
    4724        6714 : bs_load_replay_md_chain_cpl(struct spdk_bs_load_ctx *ctx)
    4725             : {
    4726             :         uint64_t num_md_clusters;
    4727             :         uint64_t i;
    4728             : 
    4729        6714 :         ctx->in_page_chain = false;
    4730             : 
    4731             :         do {
    4732        6784 :                 ctx->page_index++;
    4733        6784 :         } while (spdk_bit_array_get(ctx->bs->used_md_pages, ctx->page_index) == true);
    4734             : 
    4735        6714 :         if (ctx->page_index < ctx->super->md_len) {
    4736        6608 :                 ctx->cur_page = ctx->page_index;
    4737        6608 :                 bs_load_replay_cur_md_page(ctx);
    4738             :         } else {
    4739             :                 /* Claim all of the clusters used by the metadata */
    4740         106 :                 num_md_clusters = spdk_divide_round_up(
    4741         106 :                                           ctx->super->md_start + ctx->super->md_len, ctx->bs->pages_per_cluster);
    4742         480 :                 for (i = 0; i < num_md_clusters; i++) {
    4743         374 :                         spdk_bit_array_set(ctx->used_clusters, i);
    4744             :                 }
    4745         106 :                 ctx->bs->num_free_clusters -= num_md_clusters;
    4746         106 :                 spdk_free(ctx->page);
    4747         106 :                 bs_load_write_used_md(ctx);
    4748             :         }
    4749        6714 : }
    4750             : 
    4751             : static void
    4752          52 : bs_load_replay_extent_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4753             : {
    4754          52 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4755             :         uint32_t page_num;
    4756             :         uint64_t i;
    4757             : 
    4758          52 :         if (bserrno != 0) {
    4759           0 :                 spdk_free(ctx->extent_pages);
    4760           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4761           0 :                 return;
    4762             :         }
    4763             : 
    4764         104 :         for (i = 0; i < ctx->num_extent_pages; i++) {
    4765             :                 /* Extent pages are only read when present within in chain md.
    4766             :                  * Integrity of md is not right if that page was not a valid extent page. */
    4767          52 :                 if (bs_load_cur_extent_page_valid(&ctx->extent_pages[i]) != true) {
    4768           0 :                         spdk_free(ctx->extent_pages);
    4769           0 :                         bs_load_ctx_fail(ctx, -EILSEQ);
    4770           0 :                         return;
    4771             :                 }
    4772             : 
    4773          52 :                 page_num = ctx->extent_page_num[i];
    4774          52 :                 spdk_bit_array_set(ctx->bs->used_md_pages, page_num);
    4775          52 :                 if (bs_load_replay_md_parse_page(ctx, &ctx->extent_pages[i])) {
    4776           0 :                         spdk_free(ctx->extent_pages);
    4777           0 :                         bs_load_ctx_fail(ctx, -EILSEQ);
    4778           0 :                         return;
    4779             :                 }
    4780             :         }
    4781             : 
    4782          52 :         spdk_free(ctx->extent_pages);
    4783          52 :         free(ctx->extent_page_num);
    4784          52 :         ctx->extent_page_num = NULL;
    4785          52 :         ctx->num_extent_pages = 0;
    4786             : 
    4787          52 :         bs_load_replay_md_chain_cpl(ctx);
    4788             : }
    4789             : 
    4790             : static void
    4791          52 : bs_load_replay_extent_pages(struct spdk_bs_load_ctx *ctx)
    4792             : {
    4793             :         spdk_bs_batch_t *batch;
    4794             :         uint32_t page;
    4795             :         uint64_t lba;
    4796             :         uint64_t i;
    4797             : 
    4798          52 :         ctx->extent_pages = spdk_zmalloc(SPDK_BS_PAGE_SIZE * ctx->num_extent_pages, 0,
    4799             :                                          NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    4800          52 :         if (!ctx->extent_pages) {
    4801           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4802           0 :                 return;
    4803             :         }
    4804             : 
    4805          52 :         batch = bs_sequence_to_batch(ctx->seq, bs_load_replay_extent_page_cpl, ctx);
    4806             : 
    4807         104 :         for (i = 0; i < ctx->num_extent_pages; i++) {
    4808          52 :                 page = ctx->extent_page_num[i];
    4809          52 :                 assert(page < ctx->super->md_len);
    4810          52 :                 lba = bs_md_page_to_lba(ctx->bs, page);
    4811          52 :                 bs_batch_read_dev(batch, &ctx->extent_pages[i], lba,
    4812          52 :                                   bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE));
    4813             :         }
    4814             : 
    4815          52 :         bs_batch_close(batch);
    4816             : }
    4817             : 
    4818             : static void
    4819        6754 : bs_load_replay_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4820             : {
    4821        6754 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4822             :         uint32_t page_num;
    4823             :         struct spdk_blob_md_page *page;
    4824             : 
    4825        6754 :         if (bserrno != 0) {
    4826           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4827           0 :                 return;
    4828             :         }
    4829             : 
    4830        6754 :         page_num = ctx->cur_page;
    4831        6754 :         page = ctx->page;
    4832        6754 :         if (bs_load_cur_md_page_valid(ctx) == true) {
    4833         198 :                 if (page->sequence_num == 0 || ctx->in_page_chain == true) {
    4834         194 :                         spdk_spin_lock(&ctx->bs->used_lock);
    4835         194 :                         bs_claim_md_page(ctx->bs, page_num);
    4836         194 :                         spdk_spin_unlock(&ctx->bs->used_lock);
    4837         194 :                         if (page->sequence_num == 0) {
    4838         154 :                                 SPDK_NOTICELOG("Recover: blob 0x%" PRIx32 "\n", page_num);
    4839         154 :                                 spdk_bit_array_set(ctx->bs->used_blobids, page_num);
    4840             :                         }
    4841         194 :                         if (bs_load_replay_md_parse_page(ctx, page)) {
    4842           0 :                                 bs_load_ctx_fail(ctx, -EILSEQ);
    4843           0 :                                 return;
    4844             :                         }
    4845         194 :                         if (page->next != SPDK_INVALID_MD_PAGE) {
    4846          40 :                                 ctx->in_page_chain = true;
    4847          40 :                                 ctx->cur_page = page->next;
    4848          40 :                                 bs_load_replay_cur_md_page(ctx);
    4849          40 :                                 return;
    4850             :                         }
    4851         154 :                         if (ctx->num_extent_pages != 0) {
    4852          52 :                                 bs_load_replay_extent_pages(ctx);
    4853          52 :                                 return;
    4854             :                         }
    4855             :                 }
    4856             :         }
    4857        6662 :         bs_load_replay_md_chain_cpl(ctx);
    4858             : }
    4859             : 
    4860             : static void
    4861        6754 : bs_load_replay_cur_md_page(struct spdk_bs_load_ctx *ctx)
    4862             : {
    4863             :         uint64_t lba;
    4864             : 
    4865        6754 :         assert(ctx->cur_page < ctx->super->md_len);
    4866        6754 :         lba = bs_md_page_to_lba(ctx->bs, ctx->cur_page);
    4867        6754 :         bs_sequence_read_dev(ctx->seq, ctx->page, lba,
    4868        6754 :                              bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
    4869             :                              bs_load_replay_md_cpl, ctx);
    4870        6754 : }
    4871             : 
    4872             : static void
    4873         106 : bs_load_replay_md(struct spdk_bs_load_ctx *ctx)
    4874             : {
    4875         106 :         ctx->page_index = 0;
    4876         106 :         ctx->cur_page = 0;
    4877         106 :         ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
    4878             :                                  NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    4879         106 :         if (!ctx->page) {
    4880           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4881           0 :                 return;
    4882             :         }
    4883         106 :         bs_load_replay_cur_md_page(ctx);
    4884             : }
    4885             : 
    4886             : static void
    4887         106 : bs_recover(struct spdk_bs_load_ctx *ctx)
    4888             : {
    4889             :         int             rc;
    4890             : 
    4891         106 :         SPDK_NOTICELOG("Performing recovery on blobstore\n");
    4892         106 :         rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->super->md_len);
    4893         106 :         if (rc < 0) {
    4894           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4895           0 :                 return;
    4896             :         }
    4897             : 
    4898         106 :         rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->super->md_len);
    4899         106 :         if (rc < 0) {
    4900           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4901           0 :                 return;
    4902             :         }
    4903             : 
    4904         106 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    4905         106 :         if (rc < 0) {
    4906           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4907           0 :                 return;
    4908             :         }
    4909             : 
    4910         106 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->super->md_len);
    4911         106 :         if (rc < 0) {
    4912           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4913           0 :                 return;
    4914             :         }
    4915             : 
    4916         106 :         ctx->bs->num_free_clusters = ctx->bs->total_clusters;
    4917         106 :         bs_load_replay_md(ctx);
    4918             : }
    4919             : 
    4920             : static int
    4921         276 : bs_parse_super(struct spdk_bs_load_ctx *ctx)
    4922             : {
    4923             :         int rc;
    4924             : 
    4925         276 :         if (ctx->super->size == 0) {
    4926           8 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    4927             :         }
    4928             : 
    4929         276 :         if (ctx->super->io_unit_size == 0) {
    4930           8 :                 ctx->super->io_unit_size = SPDK_BS_PAGE_SIZE;
    4931             :         }
    4932             : 
    4933         276 :         ctx->bs->clean = 1;
    4934         276 :         ctx->bs->cluster_sz = ctx->super->cluster_size;
    4935         276 :         ctx->bs->total_clusters = ctx->super->size / ctx->super->cluster_size;
    4936         276 :         ctx->bs->pages_per_cluster = ctx->bs->cluster_sz / SPDK_BS_PAGE_SIZE;
    4937         276 :         if (spdk_u32_is_pow2(ctx->bs->pages_per_cluster)) {
    4938         276 :                 ctx->bs->pages_per_cluster_shift = spdk_u32log2(ctx->bs->pages_per_cluster);
    4939             :         }
    4940         276 :         ctx->bs->io_unit_size = ctx->super->io_unit_size;
    4941         276 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    4942         276 :         if (rc < 0) {
    4943           0 :                 return -ENOMEM;
    4944             :         }
    4945         276 :         ctx->bs->md_start = ctx->super->md_start;
    4946         276 :         ctx->bs->md_len = ctx->super->md_len;
    4947         276 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->bs->md_len);
    4948         276 :         if (rc < 0) {
    4949           0 :                 return -ENOMEM;
    4950             :         }
    4951             : 
    4952         552 :         ctx->bs->total_data_clusters = ctx->bs->total_clusters - spdk_divide_round_up(
    4953         276 :                                                ctx->bs->md_start + ctx->bs->md_len, ctx->bs->pages_per_cluster);
    4954         276 :         ctx->bs->super_blob = ctx->super->super_blob;
    4955         276 :         memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
    4956             : 
    4957         276 :         return 0;
    4958             : }
    4959             : 
    4960             : static void
    4961         300 : bs_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4962             : {
    4963         300 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4964             :         int rc;
    4965             : 
    4966         300 :         rc = bs_super_validate(ctx->super, ctx->bs);
    4967         300 :         if (rc != 0) {
    4968          24 :                 bs_load_ctx_fail(ctx, rc);
    4969          24 :                 return;
    4970             :         }
    4971             : 
    4972         276 :         rc = bs_parse_super(ctx);
    4973         276 :         if (rc < 0) {
    4974           0 :                 bs_load_ctx_fail(ctx, rc);
    4975           0 :                 return;
    4976             :         }
    4977             : 
    4978         276 :         if (ctx->super->used_blobid_mask_len == 0 || ctx->super->clean == 0 || ctx->force_recover) {
    4979         106 :                 bs_recover(ctx);
    4980             :         } else {
    4981         170 :                 bs_load_read_used_pages(ctx);
    4982             :         }
    4983             : }
    4984             : 
    4985             : static inline int
    4986         308 : bs_opts_copy(struct spdk_bs_opts *src, struct spdk_bs_opts *dst)
    4987             : {
    4988             : 
    4989         308 :         if (!src->opts_size) {
    4990           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
    4991           0 :                 return -1;
    4992             :         }
    4993             : 
    4994             : #define FIELD_OK(field) \
    4995             :         offsetof(struct spdk_bs_opts, field) + sizeof(src->field) <= src->opts_size
    4996             : 
    4997             : #define SET_FIELD(field) \
    4998             :         if (FIELD_OK(field)) { \
    4999             :                 dst->field = src->field; \
    5000             :         } \
    5001             : 
    5002         308 :         SET_FIELD(cluster_sz);
    5003         308 :         SET_FIELD(num_md_pages);
    5004         308 :         SET_FIELD(max_md_ops);
    5005         308 :         SET_FIELD(max_channel_ops);
    5006         308 :         SET_FIELD(clear_method);
    5007             : 
    5008         308 :         if (FIELD_OK(bstype)) {
    5009         308 :                 memcpy(&dst->bstype, &src->bstype, sizeof(dst->bstype));
    5010             :         }
    5011         308 :         SET_FIELD(iter_cb_fn);
    5012         308 :         SET_FIELD(iter_cb_arg);
    5013         308 :         SET_FIELD(force_recover);
    5014         308 :         SET_FIELD(esnap_bs_dev_create);
    5015         308 :         SET_FIELD(esnap_ctx);
    5016             : 
    5017         308 :         dst->opts_size = src->opts_size;
    5018             : 
    5019             :         /* You should not remove this statement, but need to update the assert statement
    5020             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    5021             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_bs_opts) == 88, "Incorrect size");
    5022             : 
    5023             : #undef FIELD_OK
    5024             : #undef SET_FIELD
    5025             : 
    5026         308 :         return 0;
    5027             : }
    5028             : 
    5029             : void
    5030         312 : spdk_bs_load(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
    5031             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
    5032             : {
    5033         312 :         struct spdk_blob_store  *bs;
    5034         312 :         struct spdk_bs_cpl      cpl;
    5035         312 :         struct spdk_bs_load_ctx *ctx;
    5036         312 :         struct spdk_bs_opts     opts = {};
    5037             :         int err;
    5038             : 
    5039         312 :         SPDK_DEBUGLOG(blob, "Loading blobstore from dev %p\n", dev);
    5040             : 
    5041         312 :         if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
    5042           4 :                 SPDK_DEBUGLOG(blob, "unsupported dev block length of %d\n", dev->blocklen);
    5043           4 :                 dev->destroy(dev);
    5044           4 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5045           4 :                 return;
    5046             :         }
    5047             : 
    5048         308 :         spdk_bs_opts_init(&opts, sizeof(opts));
    5049         308 :         if (o) {
    5050         122 :                 if (bs_opts_copy(o, &opts)) {
    5051           0 :                         return;
    5052             :                 }
    5053             :         }
    5054             : 
    5055         308 :         if (opts.max_md_ops == 0 || opts.max_channel_ops == 0) {
    5056           8 :                 dev->destroy(dev);
    5057           8 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5058           8 :                 return;
    5059             :         }
    5060             : 
    5061         300 :         err = bs_alloc(dev, &opts, &bs, &ctx);
    5062         300 :         if (err) {
    5063           0 :                 dev->destroy(dev);
    5064           0 :                 cb_fn(cb_arg, NULL, err);
    5065           0 :                 return;
    5066             :         }
    5067             : 
    5068         300 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
    5069         300 :         cpl.u.bs_handle.cb_fn = cb_fn;
    5070         300 :         cpl.u.bs_handle.cb_arg = cb_arg;
    5071         300 :         cpl.u.bs_handle.bs = bs;
    5072             : 
    5073         300 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5074         300 :         if (!ctx->seq) {
    5075           0 :                 spdk_free(ctx->super);
    5076           0 :                 free(ctx);
    5077           0 :                 bs_free(bs);
    5078           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5079           0 :                 return;
    5080             :         }
    5081             : 
    5082             :         /* Read the super block */
    5083         300 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    5084         300 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5085             :                              bs_load_super_cpl, ctx);
    5086             : }
    5087             : 
    5088             : /* END spdk_bs_load */
    5089             : 
    5090             : /* START spdk_bs_dump */
    5091             : 
    5092             : static void
    5093           0 : bs_dump_finish(spdk_bs_sequence_t *seq, struct spdk_bs_load_ctx *ctx, int bserrno)
    5094             : {
    5095           0 :         spdk_free(ctx->super);
    5096             : 
    5097             :         /*
    5098             :          * We need to defer calling bs_call_cpl() until after
    5099             :          * dev destruction, so tuck these away for later use.
    5100             :          */
    5101           0 :         ctx->bs->unload_err = bserrno;
    5102           0 :         memcpy(&ctx->bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    5103           0 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    5104             : 
    5105           0 :         bs_sequence_finish(seq, 0);
    5106           0 :         bs_free(ctx->bs);
    5107           0 :         free(ctx);
    5108           0 : }
    5109             : 
    5110             : static void
    5111           0 : bs_dump_print_xattr(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    5112             : {
    5113             :         struct spdk_blob_md_descriptor_xattr *desc_xattr;
    5114             :         uint32_t i;
    5115             :         const char *type;
    5116             : 
    5117           0 :         desc_xattr = (struct spdk_blob_md_descriptor_xattr *)desc;
    5118             : 
    5119           0 :         if (desc_xattr->length !=
    5120             :             sizeof(desc_xattr->name_length) + sizeof(desc_xattr->value_length) +
    5121           0 :             desc_xattr->name_length + desc_xattr->value_length) {
    5122             :         }
    5123             : 
    5124           0 :         memcpy(ctx->xattr_name, desc_xattr->name, desc_xattr->name_length);
    5125           0 :         ctx->xattr_name[desc_xattr->name_length] = '\0';
    5126           0 :         if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    5127           0 :                 type = "XATTR";
    5128           0 :         } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    5129           0 :                 type = "XATTR_INTERNAL";
    5130             :         } else {
    5131           0 :                 assert(false);
    5132             :                 type = "XATTR_?";
    5133             :         }
    5134           0 :         fprintf(ctx->fp, "%s: name = \"%s\"\n", type, ctx->xattr_name);
    5135           0 :         fprintf(ctx->fp, "       value = \"");
    5136           0 :         ctx->print_xattr_fn(ctx->fp, ctx->super->bstype.bstype, ctx->xattr_name,
    5137           0 :                             (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
    5138           0 :                             desc_xattr->value_length);
    5139           0 :         fprintf(ctx->fp, "\"\n");
    5140           0 :         for (i = 0; i < desc_xattr->value_length; i++) {
    5141           0 :                 if (i % 16 == 0) {
    5142           0 :                         fprintf(ctx->fp, "               ");
    5143             :                 }
    5144           0 :                 fprintf(ctx->fp, "%02" PRIx8 " ", *((uint8_t *)desc_xattr->name + desc_xattr->name_length + i));
    5145           0 :                 if ((i + 1) % 16 == 0) {
    5146           0 :                         fprintf(ctx->fp, "\n");
    5147             :                 }
    5148             :         }
    5149           0 :         if (i % 16 != 0) {
    5150           0 :                 fprintf(ctx->fp, "\n");
    5151             :         }
    5152           0 : }
    5153             : 
    5154             : struct type_flag_desc {
    5155             :         uint64_t mask;
    5156             :         uint64_t val;
    5157             :         const char *name;
    5158             : };
    5159             : 
    5160             : static void
    5161           0 : bs_dump_print_type_bits(struct spdk_bs_load_ctx *ctx, uint64_t flags,
    5162             :                         struct type_flag_desc *desc, size_t numflags)
    5163             : {
    5164           0 :         uint64_t covered = 0;
    5165             :         size_t i;
    5166             : 
    5167           0 :         for (i = 0; i < numflags; i++) {
    5168           0 :                 if ((desc[i].mask & flags) != desc[i].val) {
    5169           0 :                         continue;
    5170             :                 }
    5171           0 :                 fprintf(ctx->fp, "\t\t 0x%016" PRIx64 " %s", desc[i].val, desc[i].name);
    5172           0 :                 if (desc[i].mask != desc[i].val) {
    5173           0 :                         fprintf(ctx->fp, " (mask 0x%" PRIx64 " value 0x%" PRIx64 ")",
    5174           0 :                                 desc[i].mask, desc[i].val);
    5175             :                 }
    5176           0 :                 fprintf(ctx->fp, "\n");
    5177           0 :                 covered |= desc[i].mask;
    5178             :         }
    5179           0 :         if ((flags & ~covered) != 0) {
    5180           0 :                 fprintf(ctx->fp, "\t\t 0x%016" PRIx64 " Unknown\n", flags & ~covered);
    5181             :         }
    5182           0 : }
    5183             : 
    5184             : static void
    5185           0 : bs_dump_print_type_flags(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    5186             : {
    5187             :         struct spdk_blob_md_descriptor_flags *type_desc;
    5188             : #define ADD_FLAG(f) { f, f, #f }
    5189             : #define ADD_MASK_VAL(m, v) { m, v, #v }
    5190             :         static struct type_flag_desc invalid[] = {
    5191             :                 ADD_FLAG(SPDK_BLOB_THIN_PROV),
    5192             :                 ADD_FLAG(SPDK_BLOB_INTERNAL_XATTR),
    5193             :                 ADD_FLAG(SPDK_BLOB_EXTENT_TABLE),
    5194             :         };
    5195             :         static struct type_flag_desc data_ro[] = {
    5196             :                 ADD_FLAG(SPDK_BLOB_READ_ONLY),
    5197             :         };
    5198             :         static struct type_flag_desc md_ro[] = {
    5199             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_DEFAULT),
    5200             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_NONE),
    5201             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_UNMAP),
    5202             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_WRITE_ZEROES),
    5203             :         };
    5204             : #undef ADD_FLAG
    5205             : #undef ADD_MASK_VAL
    5206             : 
    5207           0 :         type_desc = (struct spdk_blob_md_descriptor_flags *)desc;
    5208           0 :         fprintf(ctx->fp, "Flags:\n");
    5209           0 :         fprintf(ctx->fp, "\tinvalid: 0x%016" PRIx64 "\n", type_desc->invalid_flags);
    5210           0 :         bs_dump_print_type_bits(ctx, type_desc->invalid_flags, invalid,
    5211             :                                 SPDK_COUNTOF(invalid));
    5212           0 :         fprintf(ctx->fp, "\tdata_ro: 0x%016" PRIx64 "\n", type_desc->data_ro_flags);
    5213           0 :         bs_dump_print_type_bits(ctx, type_desc->data_ro_flags, data_ro,
    5214             :                                 SPDK_COUNTOF(data_ro));
    5215           0 :         fprintf(ctx->fp, "\t  md_ro: 0x%016" PRIx64 "\n", type_desc->md_ro_flags);
    5216           0 :         bs_dump_print_type_bits(ctx, type_desc->md_ro_flags, md_ro,
    5217             :                                 SPDK_COUNTOF(md_ro));
    5218           0 : }
    5219             : 
    5220             : static void
    5221           0 : bs_dump_print_extent_table(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    5222             : {
    5223             :         struct spdk_blob_md_descriptor_extent_table *et_desc;
    5224             :         uint64_t num_extent_pages;
    5225             :         uint32_t et_idx;
    5226             : 
    5227           0 :         et_desc = (struct spdk_blob_md_descriptor_extent_table *)desc;
    5228           0 :         num_extent_pages = (et_desc->length - sizeof(et_desc->num_clusters)) /
    5229             :                            sizeof(et_desc->extent_page[0]);
    5230             : 
    5231           0 :         fprintf(ctx->fp, "Extent table:\n");
    5232           0 :         for (et_idx = 0; et_idx < num_extent_pages; et_idx++) {
    5233           0 :                 if (et_desc->extent_page[et_idx].page_idx == 0) {
    5234             :                         /* Zeroes represent unallocated extent pages. */
    5235           0 :                         continue;
    5236             :                 }
    5237           0 :                 fprintf(ctx->fp, "\tExtent page: %5" PRIu32 " length %3" PRIu32
    5238             :                         " at LBA %" PRIu64 "\n", et_desc->extent_page[et_idx].page_idx,
    5239             :                         et_desc->extent_page[et_idx].num_pages,
    5240             :                         bs_md_page_to_lba(ctx->bs, et_desc->extent_page[et_idx].page_idx));
    5241             :         }
    5242           0 : }
    5243             : 
    5244             : static void
    5245           0 : bs_dump_print_md_page(struct spdk_bs_load_ctx *ctx)
    5246             : {
    5247           0 :         uint32_t page_idx = ctx->cur_page;
    5248           0 :         struct spdk_blob_md_page *page = ctx->page;
    5249             :         struct spdk_blob_md_descriptor *desc;
    5250           0 :         size_t cur_desc = 0;
    5251             :         uint32_t crc;
    5252             : 
    5253           0 :         fprintf(ctx->fp, "=========\n");
    5254           0 :         fprintf(ctx->fp, "Metadata Page Index: %" PRIu32 " (0x%" PRIx32 ")\n", page_idx, page_idx);
    5255           0 :         fprintf(ctx->fp, "Start LBA: %" PRIu64 "\n", bs_md_page_to_lba(ctx->bs, page_idx));
    5256           0 :         fprintf(ctx->fp, "Blob ID: 0x%" PRIx64 "\n", page->id);
    5257           0 :         fprintf(ctx->fp, "Sequence: %" PRIu32 "\n", page->sequence_num);
    5258           0 :         if (page->next == SPDK_INVALID_MD_PAGE) {
    5259           0 :                 fprintf(ctx->fp, "Next: None\n");
    5260             :         } else {
    5261           0 :                 fprintf(ctx->fp, "Next: %" PRIu32 "\n", page->next);
    5262             :         }
    5263           0 :         fprintf(ctx->fp, "In used bit array%s:", ctx->super->clean ? "" : " (not clean: dubious)");
    5264           0 :         if (spdk_bit_array_get(ctx->bs->used_md_pages, page_idx)) {
    5265           0 :                 fprintf(ctx->fp, " md");
    5266             :         }
    5267           0 :         if (spdk_bit_array_get(ctx->bs->used_blobids, page_idx)) {
    5268           0 :                 fprintf(ctx->fp, " blob");
    5269             :         }
    5270           0 :         fprintf(ctx->fp, "\n");
    5271             : 
    5272           0 :         crc = blob_md_page_calc_crc(page);
    5273           0 :         fprintf(ctx->fp, "CRC: 0x%" PRIx32 " (%s)\n", page->crc, crc == page->crc ? "OK" : "Mismatch");
    5274             : 
    5275           0 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    5276           0 :         while (cur_desc < sizeof(page->descriptors)) {
    5277           0 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
    5278           0 :                         if (desc->length == 0) {
    5279             :                                 /* If padding and length are 0, this terminates the page */
    5280           0 :                                 break;
    5281             :                         }
    5282           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
    5283             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
    5284             :                         unsigned int                            i;
    5285             : 
    5286           0 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
    5287             : 
    5288           0 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
    5289           0 :                                 if (desc_extent_rle->extents[i].cluster_idx != 0) {
    5290           0 :                                         fprintf(ctx->fp, "Allocated Extent - Start: %" PRIu32,
    5291             :                                                 desc_extent_rle->extents[i].cluster_idx);
    5292             :                                 } else {
    5293           0 :                                         fprintf(ctx->fp, "Unallocated Extent - ");
    5294             :                                 }
    5295           0 :                                 fprintf(ctx->fp, " Length: %" PRIu32, desc_extent_rle->extents[i].length);
    5296           0 :                                 fprintf(ctx->fp, "\n");
    5297             :                         }
    5298           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    5299             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
    5300             :                         unsigned int                                    i;
    5301             : 
    5302           0 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
    5303             : 
    5304           0 :                         for (i = 0; i < desc_extent->length / sizeof(desc_extent->cluster_idx[0]); i++) {
    5305           0 :                                 if (desc_extent->cluster_idx[i] != 0) {
    5306           0 :                                         fprintf(ctx->fp, "Allocated Extent - Start: %" PRIu32,
    5307             :                                                 desc_extent->cluster_idx[i]);
    5308             :                                 } else {
    5309           0 :                                         fprintf(ctx->fp, "Unallocated Extent");
    5310             :                                 }
    5311           0 :                                 fprintf(ctx->fp, "\n");
    5312             :                         }
    5313           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    5314           0 :                         bs_dump_print_xattr(ctx, desc);
    5315           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    5316           0 :                         bs_dump_print_xattr(ctx, desc);
    5317           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
    5318           0 :                         bs_dump_print_type_flags(ctx, desc);
    5319           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
    5320           0 :                         bs_dump_print_extent_table(ctx, desc);
    5321             :                 } else {
    5322             :                         /* Error */
    5323           0 :                         fprintf(ctx->fp, "Unknown descriptor type %" PRIu8 "\n", desc->type);
    5324             :                 }
    5325             :                 /* Advance to the next descriptor */
    5326           0 :                 cur_desc += sizeof(*desc) + desc->length;
    5327           0 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
    5328           0 :                         break;
    5329             :                 }
    5330           0 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
    5331             :         }
    5332           0 : }
    5333             : 
    5334             : static void
    5335           0 : bs_dump_read_md_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5336             : {
    5337           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5338             : 
    5339           0 :         if (bserrno != 0) {
    5340           0 :                 bs_dump_finish(seq, ctx, bserrno);
    5341           0 :                 return;
    5342             :         }
    5343             : 
    5344           0 :         if (ctx->page->id != 0) {
    5345           0 :                 bs_dump_print_md_page(ctx);
    5346             :         }
    5347             : 
    5348           0 :         ctx->cur_page++;
    5349             : 
    5350           0 :         if (ctx->cur_page < ctx->super->md_len) {
    5351           0 :                 bs_dump_read_md_page(seq, ctx);
    5352             :         } else {
    5353           0 :                 spdk_free(ctx->page);
    5354           0 :                 bs_dump_finish(seq, ctx, 0);
    5355             :         }
    5356             : }
    5357             : 
    5358             : static void
    5359           0 : bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg)
    5360             : {
    5361           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5362             :         uint64_t lba;
    5363             : 
    5364           0 :         assert(ctx->cur_page < ctx->super->md_len);
    5365           0 :         lba = bs_page_to_lba(ctx->bs, ctx->super->md_start + ctx->cur_page);
    5366           0 :         bs_sequence_read_dev(seq, ctx->page, lba,
    5367           0 :                              bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
    5368             :                              bs_dump_read_md_page_cpl, ctx);
    5369           0 : }
    5370             : 
    5371             : static void
    5372           0 : bs_dump_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5373             : {
    5374           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5375             :         int rc;
    5376             : 
    5377           0 :         fprintf(ctx->fp, "Signature: \"%.8s\" ", ctx->super->signature);
    5378           0 :         if (memcmp(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    5379             :                    sizeof(ctx->super->signature)) != 0) {
    5380           0 :                 fprintf(ctx->fp, "(Mismatch)\n");
    5381           0 :                 bs_dump_finish(seq, ctx, bserrno);
    5382           0 :                 return;
    5383             :         } else {
    5384           0 :                 fprintf(ctx->fp, "(OK)\n");
    5385             :         }
    5386           0 :         fprintf(ctx->fp, "Version: %" PRIu32 "\n", ctx->super->version);
    5387           0 :         fprintf(ctx->fp, "CRC: 0x%x (%s)\n", ctx->super->crc,
    5388           0 :                 (ctx->super->crc == blob_md_page_calc_crc(ctx->super)) ? "OK" : "Mismatch");
    5389           0 :         fprintf(ctx->fp, "Blobstore Type: %.*s\n", SPDK_BLOBSTORE_TYPE_LENGTH, ctx->super->bstype.bstype);
    5390           0 :         fprintf(ctx->fp, "Cluster Size: %" PRIu32 "\n", ctx->super->cluster_size);
    5391           0 :         fprintf(ctx->fp, "Super Blob ID: ");
    5392           0 :         if (ctx->super->super_blob == SPDK_BLOBID_INVALID) {
    5393           0 :                 fprintf(ctx->fp, "(None)\n");
    5394             :         } else {
    5395           0 :                 fprintf(ctx->fp, "0x%" PRIx64 "\n", ctx->super->super_blob);
    5396             :         }
    5397           0 :         fprintf(ctx->fp, "Clean: %" PRIu32 "\n", ctx->super->clean);
    5398           0 :         fprintf(ctx->fp, "Used Metadata Page Mask Start: %" PRIu32 "\n", ctx->super->used_page_mask_start);
    5399           0 :         fprintf(ctx->fp, "Used Metadata Page Mask Length: %" PRIu32 "\n", ctx->super->used_page_mask_len);
    5400           0 :         fprintf(ctx->fp, "Used Cluster Mask Start: %" PRIu32 "\n", ctx->super->used_cluster_mask_start);
    5401           0 :         fprintf(ctx->fp, "Used Cluster Mask Length: %" PRIu32 "\n", ctx->super->used_cluster_mask_len);
    5402           0 :         fprintf(ctx->fp, "Used Blob ID Mask Start: %" PRIu32 "\n", ctx->super->used_blobid_mask_start);
    5403           0 :         fprintf(ctx->fp, "Used Blob ID Mask Length: %" PRIu32 "\n", ctx->super->used_blobid_mask_len);
    5404           0 :         fprintf(ctx->fp, "Metadata Start: %" PRIu32 "\n", ctx->super->md_start);
    5405           0 :         fprintf(ctx->fp, "Metadata Length: %" PRIu32 "\n", ctx->super->md_len);
    5406             : 
    5407           0 :         ctx->cur_page = 0;
    5408           0 :         ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
    5409             :                                  NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    5410           0 :         if (!ctx->page) {
    5411           0 :                 bs_dump_finish(seq, ctx, -ENOMEM);
    5412           0 :                 return;
    5413             :         }
    5414             : 
    5415           0 :         rc = bs_parse_super(ctx);
    5416           0 :         if (rc < 0) {
    5417           0 :                 bs_load_ctx_fail(ctx, rc);
    5418           0 :                 return;
    5419             :         }
    5420             : 
    5421           0 :         bs_load_read_used_pages(ctx);
    5422             : }
    5423             : 
    5424             : void
    5425           0 : spdk_bs_dump(struct spdk_bs_dev *dev, FILE *fp, spdk_bs_dump_print_xattr print_xattr_fn,
    5426             :              spdk_bs_op_complete cb_fn, void *cb_arg)
    5427             : {
    5428           0 :         struct spdk_blob_store  *bs;
    5429           0 :         struct spdk_bs_cpl      cpl;
    5430           0 :         struct spdk_bs_load_ctx *ctx;
    5431           0 :         struct spdk_bs_opts     opts = {};
    5432             :         int err;
    5433             : 
    5434           0 :         SPDK_DEBUGLOG(blob, "Dumping blobstore from dev %p\n", dev);
    5435             : 
    5436           0 :         spdk_bs_opts_init(&opts, sizeof(opts));
    5437             : 
    5438           0 :         err = bs_alloc(dev, &opts, &bs, &ctx);
    5439           0 :         if (err) {
    5440           0 :                 dev->destroy(dev);
    5441           0 :                 cb_fn(cb_arg, err);
    5442           0 :                 return;
    5443             :         }
    5444             : 
    5445           0 :         ctx->dumping = true;
    5446           0 :         ctx->fp = fp;
    5447           0 :         ctx->print_xattr_fn = print_xattr_fn;
    5448             : 
    5449           0 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5450           0 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5451           0 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5452             : 
    5453           0 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5454           0 :         if (!ctx->seq) {
    5455           0 :                 spdk_free(ctx->super);
    5456           0 :                 free(ctx);
    5457           0 :                 bs_free(bs);
    5458           0 :                 cb_fn(cb_arg, -ENOMEM);
    5459           0 :                 return;
    5460             :         }
    5461             : 
    5462             :         /* Read the super block */
    5463           0 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    5464           0 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5465             :                              bs_dump_super_cpl, ctx);
    5466             : }
    5467             : 
    5468             : /* END spdk_bs_dump */
    5469             : 
    5470             : /* START spdk_bs_init */
    5471             : 
    5472             : static void
    5473         472 : bs_init_persist_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5474             : {
    5475         472 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5476             : 
    5477         472 :         ctx->bs->used_clusters = spdk_bit_pool_create_from_array(ctx->used_clusters);
    5478         472 :         spdk_free(ctx->super);
    5479         472 :         free(ctx);
    5480             : 
    5481         472 :         bs_sequence_finish(seq, bserrno);
    5482         472 : }
    5483             : 
    5484             : static void
    5485         472 : bs_init_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5486             : {
    5487         472 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5488             : 
    5489             :         /* Write super block */
    5490         472 :         bs_sequence_write_dev(seq, ctx->super, bs_page_to_lba(ctx->bs, 0),
    5491         472 :                               bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)),
    5492             :                               bs_init_persist_super_cpl, ctx);
    5493         472 : }
    5494             : 
    5495             : void
    5496         488 : spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
    5497             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
    5498             : {
    5499         488 :         struct spdk_bs_load_ctx *ctx;
    5500         488 :         struct spdk_blob_store  *bs;
    5501         488 :         struct spdk_bs_cpl      cpl;
    5502             :         spdk_bs_sequence_t      *seq;
    5503             :         spdk_bs_batch_t         *batch;
    5504             :         uint64_t                num_md_lba;
    5505             :         uint64_t                num_md_pages;
    5506             :         uint64_t                num_md_clusters;
    5507             :         uint64_t                max_used_cluster_mask_len;
    5508             :         uint32_t                i;
    5509         488 :         struct spdk_bs_opts     opts = {};
    5510             :         int                     rc;
    5511             :         uint64_t                lba, lba_count;
    5512             : 
    5513         488 :         SPDK_DEBUGLOG(blob, "Initializing blobstore on dev %p\n", dev);
    5514             : 
    5515         488 :         if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
    5516           4 :                 SPDK_ERRLOG("unsupported dev block length of %d\n",
    5517             :                             dev->blocklen);
    5518           4 :                 dev->destroy(dev);
    5519           4 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5520           4 :                 return;
    5521             :         }
    5522             : 
    5523         484 :         spdk_bs_opts_init(&opts, sizeof(opts));
    5524         484 :         if (o) {
    5525         182 :                 if (bs_opts_copy(o, &opts)) {
    5526           0 :                         return;
    5527             :                 }
    5528             :         }
    5529             : 
    5530         484 :         if (bs_opts_verify(&opts) != 0) {
    5531           4 :                 dev->destroy(dev);
    5532           4 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5533           4 :                 return;
    5534             :         }
    5535             : 
    5536         480 :         rc = bs_alloc(dev, &opts, &bs, &ctx);
    5537         480 :         if (rc) {
    5538           4 :                 dev->destroy(dev);
    5539           4 :                 cb_fn(cb_arg, NULL, rc);
    5540           4 :                 return;
    5541             :         }
    5542             : 
    5543         476 :         if (opts.num_md_pages == SPDK_BLOB_OPTS_NUM_MD_PAGES) {
    5544             :                 /* By default, allocate 1 page per cluster.
    5545             :                  * Technically, this over-allocates metadata
    5546             :                  * because more metadata will reduce the number
    5547             :                  * of usable clusters. This can be addressed with
    5548             :                  * more complex math in the future.
    5549             :                  */
    5550         468 :                 bs->md_len = bs->total_clusters;
    5551             :         } else {
    5552           8 :                 bs->md_len = opts.num_md_pages;
    5553             :         }
    5554         476 :         rc = spdk_bit_array_resize(&bs->used_md_pages, bs->md_len);
    5555         476 :         if (rc < 0) {
    5556           0 :                 spdk_free(ctx->super);
    5557           0 :                 free(ctx);
    5558           0 :                 bs_free(bs);
    5559           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5560           0 :                 return;
    5561             :         }
    5562             : 
    5563         476 :         rc = spdk_bit_array_resize(&bs->used_blobids, bs->md_len);
    5564         476 :         if (rc < 0) {
    5565           0 :                 spdk_free(ctx->super);
    5566           0 :                 free(ctx);
    5567           0 :                 bs_free(bs);
    5568           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5569           0 :                 return;
    5570             :         }
    5571             : 
    5572         476 :         rc = spdk_bit_array_resize(&bs->open_blobids, bs->md_len);
    5573         476 :         if (rc < 0) {
    5574           0 :                 spdk_free(ctx->super);
    5575           0 :                 free(ctx);
    5576           0 :                 bs_free(bs);
    5577           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5578           0 :                 return;
    5579             :         }
    5580             : 
    5581         476 :         memcpy(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    5582             :                sizeof(ctx->super->signature));
    5583         476 :         ctx->super->version = SPDK_BS_VERSION;
    5584         476 :         ctx->super->length = sizeof(*ctx->super);
    5585         476 :         ctx->super->super_blob = bs->super_blob;
    5586         476 :         ctx->super->clean = 0;
    5587         476 :         ctx->super->cluster_size = bs->cluster_sz;
    5588         476 :         ctx->super->io_unit_size = bs->io_unit_size;
    5589         476 :         memcpy(&ctx->super->bstype, &bs->bstype, sizeof(bs->bstype));
    5590             : 
    5591             :         /* Calculate how many pages the metadata consumes at the front
    5592             :          * of the disk.
    5593             :          */
    5594             : 
    5595             :         /* The super block uses 1 page */
    5596         476 :         num_md_pages = 1;
    5597             : 
    5598             :         /* The used_md_pages mask requires 1 bit per metadata page, rounded
    5599             :          * up to the nearest page, plus a header.
    5600             :          */
    5601         476 :         ctx->super->used_page_mask_start = num_md_pages;
    5602         476 :         ctx->super->used_page_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5603         476 :                                          spdk_divide_round_up(bs->md_len, 8),
    5604             :                                          SPDK_BS_PAGE_SIZE);
    5605         476 :         num_md_pages += ctx->super->used_page_mask_len;
    5606             : 
    5607             :         /* The used_clusters mask requires 1 bit per cluster, rounded
    5608             :          * up to the nearest page, plus a header.
    5609             :          */
    5610         476 :         ctx->super->used_cluster_mask_start = num_md_pages;
    5611         476 :         ctx->super->used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5612         476 :                                             spdk_divide_round_up(bs->total_clusters, 8),
    5613             :                                             SPDK_BS_PAGE_SIZE);
    5614             :         /* The blobstore might be extended, then the used_cluster bitmap will need more space.
    5615             :          * Here we calculate the max clusters we can support according to the
    5616             :          * num_md_pages (bs->md_len).
    5617             :          */
    5618         476 :         max_used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5619         476 :                                     spdk_divide_round_up(bs->md_len, 8),
    5620             :                                     SPDK_BS_PAGE_SIZE);
    5621         476 :         max_used_cluster_mask_len = spdk_max(max_used_cluster_mask_len,
    5622             :                                              ctx->super->used_cluster_mask_len);
    5623         476 :         num_md_pages += max_used_cluster_mask_len;
    5624             : 
    5625             :         /* The used_blobids mask requires 1 bit per metadata page, rounded
    5626             :          * up to the nearest page, plus a header.
    5627             :          */
    5628         476 :         ctx->super->used_blobid_mask_start = num_md_pages;
    5629         476 :         ctx->super->used_blobid_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5630         476 :                                            spdk_divide_round_up(bs->md_len, 8),
    5631             :                                            SPDK_BS_PAGE_SIZE);
    5632         476 :         num_md_pages += ctx->super->used_blobid_mask_len;
    5633             : 
    5634             :         /* The metadata region size was chosen above */
    5635         476 :         ctx->super->md_start = bs->md_start = num_md_pages;
    5636         476 :         ctx->super->md_len = bs->md_len;
    5637         476 :         num_md_pages += bs->md_len;
    5638             : 
    5639         476 :         num_md_lba = bs_page_to_lba(bs, num_md_pages);
    5640             : 
    5641         476 :         ctx->super->size = dev->blockcnt * dev->blocklen;
    5642             : 
    5643         476 :         ctx->super->crc = blob_md_page_calc_crc(ctx->super);
    5644             : 
    5645         476 :         num_md_clusters = spdk_divide_round_up(num_md_pages, bs->pages_per_cluster);
    5646         476 :         if (num_md_clusters > bs->total_clusters) {
    5647           4 :                 SPDK_ERRLOG("Blobstore metadata cannot use more clusters than is available, "
    5648             :                             "please decrease number of pages reserved for metadata "
    5649             :                             "or increase cluster size.\n");
    5650           4 :                 spdk_free(ctx->super);
    5651           4 :                 spdk_bit_array_free(&ctx->used_clusters);
    5652           4 :                 free(ctx);
    5653           4 :                 bs_free(bs);
    5654           4 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5655           4 :                 return;
    5656             :         }
    5657             :         /* Claim all of the clusters used by the metadata */
    5658       75700 :         for (i = 0; i < num_md_clusters; i++) {
    5659       75228 :                 spdk_bit_array_set(ctx->used_clusters, i);
    5660             :         }
    5661             : 
    5662         472 :         bs->num_free_clusters -= num_md_clusters;
    5663         472 :         bs->total_data_clusters = bs->num_free_clusters;
    5664             : 
    5665         472 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
    5666         472 :         cpl.u.bs_handle.cb_fn = cb_fn;
    5667         472 :         cpl.u.bs_handle.cb_arg = cb_arg;
    5668         472 :         cpl.u.bs_handle.bs = bs;
    5669             : 
    5670         472 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5671         472 :         if (!seq) {
    5672           0 :                 spdk_free(ctx->super);
    5673           0 :                 free(ctx);
    5674           0 :                 bs_free(bs);
    5675           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5676           0 :                 return;
    5677             :         }
    5678             : 
    5679         472 :         batch = bs_sequence_to_batch(seq, bs_init_trim_cpl, ctx);
    5680             : 
    5681             :         /* Clear metadata space */
    5682         472 :         bs_batch_write_zeroes_dev(batch, 0, num_md_lba);
    5683             : 
    5684         472 :         lba = num_md_lba;
    5685         472 :         lba_count = ctx->bs->dev->blockcnt - lba;
    5686         472 :         switch (opts.clear_method) {
    5687         456 :         case BS_CLEAR_WITH_UNMAP:
    5688             :                 /* Trim data clusters */
    5689         456 :                 bs_batch_unmap_dev(batch, lba, lba_count);
    5690         456 :                 break;
    5691           0 :         case BS_CLEAR_WITH_WRITE_ZEROES:
    5692             :                 /* Write_zeroes to data clusters */
    5693           0 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    5694           0 :                 break;
    5695          16 :         case BS_CLEAR_WITH_NONE:
    5696             :         default:
    5697          16 :                 break;
    5698             :         }
    5699             : 
    5700         472 :         bs_batch_close(batch);
    5701             : }
    5702             : 
    5703             : /* END spdk_bs_init */
    5704             : 
    5705             : /* START spdk_bs_destroy */
    5706             : 
    5707             : static void
    5708           4 : bs_destroy_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5709             : {
    5710           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5711           4 :         struct spdk_blob_store *bs = ctx->bs;
    5712             : 
    5713             :         /*
    5714             :          * We need to defer calling bs_call_cpl() until after
    5715             :          * dev destruction, so tuck these away for later use.
    5716             :          */
    5717           4 :         bs->unload_err = bserrno;
    5718           4 :         memcpy(&bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    5719           4 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    5720             : 
    5721           4 :         bs_sequence_finish(seq, bserrno);
    5722             : 
    5723           4 :         bs_free(bs);
    5724           4 :         free(ctx);
    5725           4 : }
    5726             : 
    5727             : void
    5728           4 : spdk_bs_destroy(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn,
    5729             :                 void *cb_arg)
    5730             : {
    5731           4 :         struct spdk_bs_cpl      cpl;
    5732             :         spdk_bs_sequence_t      *seq;
    5733             :         struct spdk_bs_load_ctx *ctx;
    5734             : 
    5735           4 :         SPDK_DEBUGLOG(blob, "Destroying blobstore\n");
    5736             : 
    5737           4 :         if (!RB_EMPTY(&bs->open_blobs)) {
    5738           0 :                 SPDK_ERRLOG("Blobstore still has open blobs\n");
    5739           0 :                 cb_fn(cb_arg, -EBUSY);
    5740           0 :                 return;
    5741             :         }
    5742             : 
    5743           4 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5744           4 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5745           4 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5746             : 
    5747           4 :         ctx = calloc(1, sizeof(*ctx));
    5748           4 :         if (!ctx) {
    5749           0 :                 cb_fn(cb_arg, -ENOMEM);
    5750           0 :                 return;
    5751             :         }
    5752             : 
    5753           4 :         ctx->bs = bs;
    5754             : 
    5755           4 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5756           4 :         if (!seq) {
    5757           0 :                 free(ctx);
    5758           0 :                 cb_fn(cb_arg, -ENOMEM);
    5759           0 :                 return;
    5760             :         }
    5761             : 
    5762             :         /* Write zeroes to the super block */
    5763           4 :         bs_sequence_write_zeroes_dev(seq,
    5764             :                                      bs_page_to_lba(bs, 0),
    5765             :                                      bs_byte_to_lba(bs, sizeof(struct spdk_bs_super_block)),
    5766             :                                      bs_destroy_trim_cpl, ctx);
    5767             : }
    5768             : 
    5769             : /* END spdk_bs_destroy */
    5770             : 
    5771             : /* START spdk_bs_unload */
    5772             : 
    5773             : static void
    5774         654 : bs_unload_finish(struct spdk_bs_load_ctx *ctx, int bserrno)
    5775             : {
    5776         654 :         spdk_bs_sequence_t *seq = ctx->seq;
    5777             : 
    5778         654 :         spdk_free(ctx->super);
    5779             : 
    5780             :         /*
    5781             :          * We need to defer calling bs_call_cpl() until after
    5782             :          * dev destruction, so tuck these away for later use.
    5783             :          */
    5784         654 :         ctx->bs->unload_err = bserrno;
    5785         654 :         memcpy(&ctx->bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    5786         654 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    5787             : 
    5788         654 :         bs_sequence_finish(seq, bserrno);
    5789             : 
    5790         654 :         bs_free(ctx->bs);
    5791         654 :         free(ctx);
    5792         654 : }
    5793             : 
    5794             : static void
    5795         654 : bs_unload_write_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5796             : {
    5797         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5798             : 
    5799         654 :         bs_unload_finish(ctx, bserrno);
    5800         654 : }
    5801             : 
    5802             : static void
    5803         654 : bs_unload_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5804             : {
    5805         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5806             : 
    5807         654 :         spdk_free(ctx->mask);
    5808             : 
    5809         654 :         if (bserrno != 0) {
    5810           0 :                 bs_unload_finish(ctx, bserrno);
    5811           0 :                 return;
    5812             :         }
    5813             : 
    5814         654 :         ctx->super->clean = 1;
    5815             : 
    5816         654 :         bs_write_super(seq, ctx->bs, ctx->super, bs_unload_write_super_cpl, ctx);
    5817             : }
    5818             : 
    5819             : static void
    5820         654 : bs_unload_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5821             : {
    5822         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5823             : 
    5824         654 :         spdk_free(ctx->mask);
    5825         654 :         ctx->mask = NULL;
    5826             : 
    5827         654 :         if (bserrno != 0) {
    5828           0 :                 bs_unload_finish(ctx, bserrno);
    5829           0 :                 return;
    5830             :         }
    5831             : 
    5832         654 :         bs_write_used_clusters(seq, ctx, bs_unload_write_used_clusters_cpl);
    5833             : }
    5834             : 
    5835             : static void
    5836         654 : bs_unload_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5837             : {
    5838         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5839             : 
    5840         654 :         spdk_free(ctx->mask);
    5841         654 :         ctx->mask = NULL;
    5842             : 
    5843         654 :         if (bserrno != 0) {
    5844           0 :                 bs_unload_finish(ctx, bserrno);
    5845           0 :                 return;
    5846             :         }
    5847             : 
    5848         654 :         bs_write_used_blobids(seq, ctx, bs_unload_write_used_blobids_cpl);
    5849             : }
    5850             : 
    5851             : static void
    5852         654 : bs_unload_read_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5853             : {
    5854         654 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5855             :         int rc;
    5856             : 
    5857         654 :         if (bserrno != 0) {
    5858           0 :                 bs_unload_finish(ctx, bserrno);
    5859           0 :                 return;
    5860             :         }
    5861             : 
    5862         654 :         rc = bs_super_validate(ctx->super, ctx->bs);
    5863         654 :         if (rc != 0) {
    5864           0 :                 bs_unload_finish(ctx, rc);
    5865           0 :                 return;
    5866             :         }
    5867             : 
    5868         654 :         bs_write_used_md(seq, cb_arg, bs_unload_write_used_pages_cpl);
    5869             : }
    5870             : 
    5871             : void
    5872         662 : spdk_bs_unload(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn, void *cb_arg)
    5873             : {
    5874         662 :         struct spdk_bs_cpl      cpl;
    5875             :         struct spdk_bs_load_ctx *ctx;
    5876             : 
    5877         662 :         SPDK_DEBUGLOG(blob, "Syncing blobstore\n");
    5878             : 
    5879             :         /*
    5880             :          * If external snapshot channels are being destroyed while the blobstore is unloaded, the
    5881             :          * unload is deferred until after the channel destruction completes.
    5882             :          */
    5883         662 :         if (bs->esnap_channels_unloading != 0) {
    5884           4 :                 if (bs->esnap_unload_cb_fn != NULL) {
    5885           0 :                         SPDK_ERRLOG("Blobstore unload in progress\n");
    5886           0 :                         cb_fn(cb_arg, -EBUSY);
    5887           0 :                         return;
    5888             :                 }
    5889           4 :                 SPDK_DEBUGLOG(blob_esnap, "Blobstore unload deferred: %" PRIu32
    5890             :                               " esnap clones are unloading\n", bs->esnap_channels_unloading);
    5891           4 :                 bs->esnap_unload_cb_fn = cb_fn;
    5892           4 :                 bs->esnap_unload_cb_arg = cb_arg;
    5893           4 :                 return;
    5894             :         }
    5895         658 :         if (bs->esnap_unload_cb_fn != NULL) {
    5896           4 :                 SPDK_DEBUGLOG(blob_esnap, "Blobstore deferred unload progressing\n");
    5897           4 :                 assert(bs->esnap_unload_cb_fn == cb_fn);
    5898           4 :                 assert(bs->esnap_unload_cb_arg == cb_arg);
    5899           4 :                 bs->esnap_unload_cb_fn = NULL;
    5900           4 :                 bs->esnap_unload_cb_arg = NULL;
    5901             :         }
    5902             : 
    5903         658 :         if (!RB_EMPTY(&bs->open_blobs)) {
    5904           4 :                 SPDK_ERRLOG("Blobstore still has open blobs\n");
    5905           4 :                 cb_fn(cb_arg, -EBUSY);
    5906           4 :                 return;
    5907             :         }
    5908             : 
    5909         654 :         ctx = calloc(1, sizeof(*ctx));
    5910         654 :         if (!ctx) {
    5911           0 :                 cb_fn(cb_arg, -ENOMEM);
    5912           0 :                 return;
    5913             :         }
    5914             : 
    5915         654 :         ctx->bs = bs;
    5916             : 
    5917         654 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    5918             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    5919         654 :         if (!ctx->super) {
    5920           0 :                 free(ctx);
    5921           0 :                 cb_fn(cb_arg, -ENOMEM);
    5922           0 :                 return;
    5923             :         }
    5924             : 
    5925         654 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5926         654 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5927         654 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5928             : 
    5929         654 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5930         654 :         if (!ctx->seq) {
    5931           0 :                 spdk_free(ctx->super);
    5932           0 :                 free(ctx);
    5933           0 :                 cb_fn(cb_arg, -ENOMEM);
    5934           0 :                 return;
    5935             :         }
    5936             : 
    5937             :         /* Read super block */
    5938         654 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    5939         654 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5940             :                              bs_unload_read_super_cpl, ctx);
    5941             : }
    5942             : 
    5943             : /* END spdk_bs_unload */
    5944             : 
    5945             : /* START spdk_bs_set_super */
    5946             : 
    5947             : struct spdk_bs_set_super_ctx {
    5948             :         struct spdk_blob_store          *bs;
    5949             :         struct spdk_bs_super_block      *super;
    5950             : };
    5951             : 
    5952             : static void
    5953           8 : bs_set_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5954             : {
    5955           8 :         struct spdk_bs_set_super_ctx    *ctx = cb_arg;
    5956             : 
    5957           8 :         if (bserrno != 0) {
    5958           0 :                 SPDK_ERRLOG("Unable to write to super block of blobstore\n");
    5959             :         }
    5960             : 
    5961           8 :         spdk_free(ctx->super);
    5962             : 
    5963           8 :         bs_sequence_finish(seq, bserrno);
    5964             : 
    5965           8 :         free(ctx);
    5966           8 : }
    5967             : 
    5968             : static void
    5969           8 : bs_set_super_read_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5970             : {
    5971           8 :         struct spdk_bs_set_super_ctx    *ctx = cb_arg;
    5972             :         int rc;
    5973             : 
    5974           8 :         if (bserrno != 0) {
    5975           0 :                 SPDK_ERRLOG("Unable to read super block of blobstore\n");
    5976           0 :                 spdk_free(ctx->super);
    5977           0 :                 bs_sequence_finish(seq, bserrno);
    5978           0 :                 free(ctx);
    5979           0 :                 return;
    5980             :         }
    5981             : 
    5982           8 :         rc = bs_super_validate(ctx->super, ctx->bs);
    5983           8 :         if (rc != 0) {
    5984           0 :                 SPDK_ERRLOG("Not a valid super block\n");
    5985           0 :                 spdk_free(ctx->super);
    5986           0 :                 bs_sequence_finish(seq, rc);
    5987           0 :                 free(ctx);
    5988           0 :                 return;
    5989             :         }
    5990             : 
    5991           8 :         bs_write_super(seq, ctx->bs, ctx->super, bs_set_super_write_cpl, ctx);
    5992             : }
    5993             : 
    5994             : void
    5995           8 : spdk_bs_set_super(struct spdk_blob_store *bs, spdk_blob_id blobid,
    5996             :                   spdk_bs_op_complete cb_fn, void *cb_arg)
    5997             : {
    5998           8 :         struct spdk_bs_cpl              cpl;
    5999             :         spdk_bs_sequence_t              *seq;
    6000             :         struct spdk_bs_set_super_ctx    *ctx;
    6001             : 
    6002           8 :         SPDK_DEBUGLOG(blob, "Setting super blob id on blobstore\n");
    6003             : 
    6004           8 :         ctx = calloc(1, sizeof(*ctx));
    6005           8 :         if (!ctx) {
    6006           0 :                 cb_fn(cb_arg, -ENOMEM);
    6007           0 :                 return;
    6008             :         }
    6009             : 
    6010           8 :         ctx->bs = bs;
    6011             : 
    6012           8 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    6013             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    6014           8 :         if (!ctx->super) {
    6015           0 :                 free(ctx);
    6016           0 :                 cb_fn(cb_arg, -ENOMEM);
    6017           0 :                 return;
    6018             :         }
    6019             : 
    6020           8 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    6021           8 :         cpl.u.bs_basic.cb_fn = cb_fn;
    6022           8 :         cpl.u.bs_basic.cb_arg = cb_arg;
    6023             : 
    6024           8 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    6025           8 :         if (!seq) {
    6026           0 :                 spdk_free(ctx->super);
    6027           0 :                 free(ctx);
    6028           0 :                 cb_fn(cb_arg, -ENOMEM);
    6029           0 :                 return;
    6030             :         }
    6031             : 
    6032           8 :         bs->super_blob = blobid;
    6033             : 
    6034             :         /* Read super block */
    6035           8 :         bs_sequence_read_dev(seq, ctx->super, bs_page_to_lba(bs, 0),
    6036           8 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    6037             :                              bs_set_super_read_cpl, ctx);
    6038             : }
    6039             : 
    6040             : /* END spdk_bs_set_super */
    6041             : 
    6042             : void
    6043          12 : spdk_bs_get_super(struct spdk_blob_store *bs,
    6044             :                   spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6045             : {
    6046          12 :         if (bs->super_blob == SPDK_BLOBID_INVALID) {
    6047           4 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOENT);
    6048             :         } else {
    6049           8 :                 cb_fn(cb_arg, bs->super_blob, 0);
    6050             :         }
    6051          12 : }
    6052             : 
    6053             : uint64_t
    6054         132 : spdk_bs_get_cluster_size(struct spdk_blob_store *bs)
    6055             : {
    6056         132 :         return bs->cluster_sz;
    6057             : }
    6058             : 
    6059             : uint64_t
    6060          68 : spdk_bs_get_page_size(struct spdk_blob_store *bs)
    6061             : {
    6062          68 :         return SPDK_BS_PAGE_SIZE;
    6063             : }
    6064             : 
    6065             : uint64_t
    6066         738 : spdk_bs_get_io_unit_size(struct spdk_blob_store *bs)
    6067             : {
    6068         738 :         return bs->io_unit_size;
    6069             : }
    6070             : 
    6071             : uint64_t
    6072         540 : spdk_bs_free_cluster_count(struct spdk_blob_store *bs)
    6073             : {
    6074         540 :         return bs->num_free_clusters;
    6075             : }
    6076             : 
    6077             : uint64_t
    6078          92 : spdk_bs_total_data_cluster_count(struct spdk_blob_store *bs)
    6079             : {
    6080          92 :         return bs->total_data_clusters;
    6081             : }
    6082             : 
    6083             : static int
    6084         780 : bs_register_md_thread(struct spdk_blob_store *bs)
    6085             : {
    6086         780 :         bs->md_channel = spdk_get_io_channel(bs);
    6087         780 :         if (!bs->md_channel) {
    6088           0 :                 SPDK_ERRLOG("Failed to get IO channel.\n");
    6089           0 :                 return -1;
    6090             :         }
    6091             : 
    6092         780 :         return 0;
    6093             : }
    6094             : 
    6095             : static int
    6096         780 : bs_unregister_md_thread(struct spdk_blob_store *bs)
    6097             : {
    6098         780 :         spdk_put_io_channel(bs->md_channel);
    6099             : 
    6100         780 :         return 0;
    6101             : }
    6102             : 
    6103             : spdk_blob_id
    6104         562 : spdk_blob_get_id(struct spdk_blob *blob)
    6105             : {
    6106         562 :         assert(blob != NULL);
    6107             : 
    6108         562 :         return blob->id;
    6109             : }
    6110             : 
    6111             : uint64_t
    6112          24 : spdk_blob_get_num_pages(struct spdk_blob *blob)
    6113             : {
    6114          24 :         assert(blob != NULL);
    6115             : 
    6116          24 :         return bs_cluster_to_page(blob->bs, blob->active.num_clusters);
    6117             : }
    6118             : 
    6119             : uint64_t
    6120          24 : spdk_blob_get_num_io_units(struct spdk_blob *blob)
    6121             : {
    6122          24 :         assert(blob != NULL);
    6123             : 
    6124          24 :         return spdk_blob_get_num_pages(blob) * bs_io_unit_per_page(blob->bs);
    6125             : }
    6126             : 
    6127             : uint64_t
    6128         569 : spdk_blob_get_num_clusters(struct spdk_blob *blob)
    6129             : {
    6130         569 :         assert(blob != NULL);
    6131             : 
    6132         569 :         return blob->active.num_clusters;
    6133             : }
    6134             : 
    6135             : uint64_t
    6136         330 : spdk_blob_get_num_allocated_clusters(struct spdk_blob *blob)
    6137             : {
    6138         330 :         assert(blob != NULL);
    6139             : 
    6140         330 :         return blob->active.num_allocated_clusters;
    6141             : }
    6142             : 
    6143             : static uint64_t
    6144          24 : blob_find_io_unit(struct spdk_blob *blob, uint64_t offset, bool is_allocated)
    6145             : {
    6146          24 :         uint64_t blob_io_unit_num = spdk_blob_get_num_io_units(blob);
    6147             : 
    6148          44 :         while (offset < blob_io_unit_num) {
    6149          40 :                 if (bs_io_unit_is_allocated(blob, offset) == is_allocated) {
    6150          20 :                         return offset;
    6151             :                 }
    6152             : 
    6153          20 :                 offset += bs_num_io_units_to_cluster_boundary(blob, offset);
    6154             :         }
    6155             : 
    6156           4 :         return UINT64_MAX;
    6157             : }
    6158             : 
    6159             : uint64_t
    6160          12 : spdk_blob_get_next_allocated_io_unit(struct spdk_blob *blob, uint64_t offset)
    6161             : {
    6162          12 :         return blob_find_io_unit(blob, offset, true);
    6163             : }
    6164             : 
    6165             : uint64_t
    6166          12 : spdk_blob_get_next_unallocated_io_unit(struct spdk_blob *blob, uint64_t offset)
    6167             : {
    6168          12 :         return blob_find_io_unit(blob, offset, false);
    6169             : }
    6170             : 
    6171             : /* START spdk_bs_create_blob */
    6172             : 
    6173             : static void
    6174        1878 : bs_create_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    6175             : {
    6176        1878 :         struct spdk_blob *blob = cb_arg;
    6177        1878 :         uint32_t page_idx = bs_blobid_to_page(blob->id);
    6178             : 
    6179        1878 :         if (bserrno != 0) {
    6180           0 :                 spdk_spin_lock(&blob->bs->used_lock);
    6181           0 :                 spdk_bit_array_clear(blob->bs->used_blobids, page_idx);
    6182           0 :                 bs_release_md_page(blob->bs, page_idx);
    6183           0 :                 spdk_spin_unlock(&blob->bs->used_lock);
    6184             :         }
    6185             : 
    6186        1878 :         blob_free(blob);
    6187             : 
    6188        1878 :         bs_sequence_finish(seq, bserrno);
    6189        1878 : }
    6190             : 
    6191             : static int
    6192        3776 : blob_set_xattrs(struct spdk_blob *blob, const struct spdk_blob_xattr_opts *xattrs,
    6193             :                 bool internal)
    6194             : {
    6195             :         uint64_t i;
    6196        3776 :         size_t value_len = 0;
    6197             :         int rc;
    6198        3776 :         const void *value = NULL;
    6199        3776 :         if (xattrs->count > 0 && xattrs->get_value == NULL) {
    6200           8 :                 return -EINVAL;
    6201             :         }
    6202        4084 :         for (i = 0; i < xattrs->count; i++) {
    6203         320 :                 xattrs->get_value(xattrs->ctx, xattrs->names[i], &value, &value_len);
    6204         320 :                 if (value == NULL || value_len == 0) {
    6205           4 :                         return -EINVAL;
    6206             :                 }
    6207         316 :                 rc = blob_set_xattr(blob, xattrs->names[i], value, value_len, internal);
    6208         316 :                 if (rc < 0) {
    6209           0 :                         return rc;
    6210             :                 }
    6211             :         }
    6212        3764 :         return 0;
    6213             : }
    6214             : 
    6215             : static void
    6216        1862 : blob_opts_copy(const struct spdk_blob_opts *src, struct spdk_blob_opts *dst)
    6217             : {
    6218             : #define FIELD_OK(field) \
    6219             :         offsetof(struct spdk_blob_opts, field) + sizeof(src->field) <= src->opts_size
    6220             : 
    6221             : #define SET_FIELD(field) \
    6222             :         if (FIELD_OK(field)) { \
    6223             :                 dst->field = src->field; \
    6224             :         } \
    6225             : 
    6226        1862 :         SET_FIELD(num_clusters);
    6227        1862 :         SET_FIELD(thin_provision);
    6228        1862 :         SET_FIELD(clear_method);
    6229             : 
    6230        1862 :         if (FIELD_OK(xattrs)) {
    6231        1862 :                 memcpy(&dst->xattrs, &src->xattrs, sizeof(src->xattrs));
    6232             :         }
    6233             : 
    6234        1862 :         SET_FIELD(use_extent_table);
    6235        1862 :         SET_FIELD(esnap_id);
    6236        1862 :         SET_FIELD(esnap_id_len);
    6237             : 
    6238        1862 :         dst->opts_size = src->opts_size;
    6239             : 
    6240             :         /* You should not remove this statement, but need to update the assert statement
    6241             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    6242             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_opts) == 80, "Incorrect size");
    6243             : 
    6244             : #undef FIELD_OK
    6245             : #undef SET_FIELD
    6246        1862 : }
    6247             : 
    6248             : static void
    6249        1894 : bs_create_blob(struct spdk_blob_store *bs,
    6250             :                const struct spdk_blob_opts *opts,
    6251             :                const struct spdk_blob_xattr_opts *internal_xattrs,
    6252             :                spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6253             : {
    6254             :         struct spdk_blob        *blob;
    6255             :         uint32_t                page_idx;
    6256        1894 :         struct spdk_bs_cpl      cpl;
    6257        1894 :         struct spdk_blob_opts   opts_local;
    6258        1894 :         struct spdk_blob_xattr_opts internal_xattrs_default;
    6259             :         spdk_bs_sequence_t      *seq;
    6260             :         spdk_blob_id            id;
    6261             :         int rc;
    6262             : 
    6263        1894 :         assert(spdk_get_thread() == bs->md_thread);
    6264             : 
    6265        1894 :         spdk_spin_lock(&bs->used_lock);
    6266        1894 :         page_idx = spdk_bit_array_find_first_clear(bs->used_md_pages, 0);
    6267        1894 :         if (page_idx == UINT32_MAX) {
    6268           0 :                 spdk_spin_unlock(&bs->used_lock);
    6269           0 :                 cb_fn(cb_arg, 0, -ENOMEM);
    6270           0 :                 return;
    6271             :         }
    6272        1894 :         spdk_bit_array_set(bs->used_blobids, page_idx);
    6273        1894 :         bs_claim_md_page(bs, page_idx);
    6274        1894 :         spdk_spin_unlock(&bs->used_lock);
    6275             : 
    6276        1894 :         id = bs_page_to_blobid(page_idx);
    6277             : 
    6278        1894 :         SPDK_DEBUGLOG(blob, "Creating blob with id 0x%" PRIx64 " at page %u\n", id, page_idx);
    6279             : 
    6280        1894 :         spdk_blob_opts_init(&opts_local, sizeof(opts_local));
    6281        1894 :         if (opts) {
    6282        1862 :                 blob_opts_copy(opts, &opts_local);
    6283             :         }
    6284             : 
    6285        1894 :         blob = blob_alloc(bs, id);
    6286        1894 :         if (!blob) {
    6287           0 :                 rc = -ENOMEM;
    6288           0 :                 goto error;
    6289             :         }
    6290             : 
    6291        1894 :         blob->use_extent_table = opts_local.use_extent_table;
    6292        1894 :         if (blob->use_extent_table) {
    6293         968 :                 blob->invalid_flags |= SPDK_BLOB_EXTENT_TABLE;
    6294             :         }
    6295             : 
    6296        1894 :         if (!internal_xattrs) {
    6297        1622 :                 blob_xattrs_init(&internal_xattrs_default);
    6298        1622 :                 internal_xattrs = &internal_xattrs_default;
    6299             :         }
    6300             : 
    6301        1894 :         rc = blob_set_xattrs(blob, &opts_local.xattrs, false);
    6302        1894 :         if (rc < 0) {
    6303          12 :                 goto error;
    6304             :         }
    6305             : 
    6306        1882 :         rc = blob_set_xattrs(blob, internal_xattrs, true);
    6307        1882 :         if (rc < 0) {
    6308           0 :                 goto error;
    6309             :         }
    6310             : 
    6311        1882 :         if (opts_local.thin_provision) {
    6312         356 :                 blob_set_thin_provision(blob);
    6313             :         }
    6314             : 
    6315        1882 :         blob_set_clear_method(blob, opts_local.clear_method);
    6316             : 
    6317        1882 :         if (opts_local.esnap_id != NULL) {
    6318          60 :                 if (opts_local.esnap_id_len > UINT16_MAX) {
    6319           0 :                         SPDK_ERRLOG("esnap id length %" PRIu64 "is too long\n",
    6320             :                                     opts_local.esnap_id_len);
    6321           0 :                         rc = -EINVAL;
    6322           0 :                         goto error;
    6323             : 
    6324             :                 }
    6325          60 :                 blob_set_thin_provision(blob);
    6326          60 :                 blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6327          60 :                 rc = blob_set_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID,
    6328          60 :                                     opts_local.esnap_id, opts_local.esnap_id_len, true);
    6329          60 :                 if (rc != 0) {
    6330           0 :                         goto error;
    6331             :                 }
    6332             :         }
    6333             : 
    6334        1882 :         rc = blob_resize(blob, opts_local.num_clusters);
    6335        1882 :         if (rc < 0) {
    6336           4 :                 goto error;
    6337             :         }
    6338        1878 :         cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6339        1878 :         cpl.u.blobid.cb_fn = cb_fn;
    6340        1878 :         cpl.u.blobid.cb_arg = cb_arg;
    6341        1878 :         cpl.u.blobid.blobid = blob->id;
    6342             : 
    6343        1878 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    6344        1878 :         if (!seq) {
    6345           0 :                 rc = -ENOMEM;
    6346           0 :                 goto error;
    6347             :         }
    6348             : 
    6349        1878 :         blob_persist(seq, blob, bs_create_blob_cpl, blob);
    6350        1878 :         return;
    6351             : 
    6352          16 : error:
    6353          16 :         SPDK_ERRLOG("Failed to create blob: %s, size in clusters/size: %lu (clusters)\n",
    6354             :                     spdk_strerror(rc), opts_local.num_clusters);
    6355          16 :         if (blob != NULL) {
    6356          16 :                 blob_free(blob);
    6357             :         }
    6358          16 :         spdk_spin_lock(&bs->used_lock);
    6359          16 :         spdk_bit_array_clear(bs->used_blobids, page_idx);
    6360          16 :         bs_release_md_page(bs, page_idx);
    6361          16 :         spdk_spin_unlock(&bs->used_lock);
    6362          16 :         cb_fn(cb_arg, 0, rc);
    6363             : }
    6364             : 
    6365             : void
    6366          16 : spdk_bs_create_blob(struct spdk_blob_store *bs,
    6367             :                     spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6368             : {
    6369          16 :         bs_create_blob(bs, NULL, NULL, cb_fn, cb_arg);
    6370          16 : }
    6371             : 
    6372             : void
    6373        1598 : spdk_bs_create_blob_ext(struct spdk_blob_store *bs, const struct spdk_blob_opts *opts,
    6374             :                         spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6375             : {
    6376        1598 :         bs_create_blob(bs, opts, NULL, cb_fn, cb_arg);
    6377        1598 : }
    6378             : 
    6379             : /* END spdk_bs_create_blob */
    6380             : 
    6381             : /* START blob_cleanup */
    6382             : 
    6383             : struct spdk_clone_snapshot_ctx {
    6384             :         struct spdk_bs_cpl      cpl;
    6385             :         int bserrno;
    6386             :         bool frozen;
    6387             : 
    6388             :         struct spdk_io_channel *channel;
    6389             : 
    6390             :         /* Current cluster for inflate operation */
    6391             :         uint64_t cluster;
    6392             : 
    6393             :         /* For inflation force allocation of all unallocated clusters and remove
    6394             :          * thin-provisioning. Otherwise only decouple parent and keep clone thin. */
    6395             :         bool allocate_all;
    6396             : 
    6397             :         struct {
    6398             :                 spdk_blob_id id;
    6399             :                 struct spdk_blob *blob;
    6400             :                 bool md_ro;
    6401             :         } original;
    6402             :         struct {
    6403             :                 spdk_blob_id id;
    6404             :                 struct spdk_blob *blob;
    6405             :         } new;
    6406             : 
    6407             :         /* xattrs specified for snapshot/clones only. They have no impact on
    6408             :          * the original blobs xattrs. */
    6409             :         const struct spdk_blob_xattr_opts *xattrs;
    6410             : };
    6411             : 
    6412             : static void
    6413         346 : bs_clone_snapshot_cleanup_finish(void *cb_arg, int bserrno)
    6414             : {
    6415         346 :         struct spdk_clone_snapshot_ctx *ctx = cb_arg;
    6416         346 :         struct spdk_bs_cpl *cpl = &ctx->cpl;
    6417             : 
    6418         346 :         if (bserrno != 0) {
    6419           6 :                 if (ctx->bserrno != 0) {
    6420           0 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6421             :                 } else {
    6422           6 :                         ctx->bserrno = bserrno;
    6423             :                 }
    6424             :         }
    6425             : 
    6426         346 :         switch (cpl->type) {
    6427         282 :         case SPDK_BS_CPL_TYPE_BLOBID:
    6428         282 :                 cpl->u.blobid.cb_fn(cpl->u.blobid.cb_arg, cpl->u.blobid.blobid, ctx->bserrno);
    6429         282 :                 break;
    6430          64 :         case SPDK_BS_CPL_TYPE_BLOB_BASIC:
    6431          64 :                 cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, ctx->bserrno);
    6432          64 :                 break;
    6433           0 :         default:
    6434           0 :                 SPDK_UNREACHABLE();
    6435             :                 break;
    6436             :         }
    6437             : 
    6438         346 :         free(ctx);
    6439         346 : }
    6440             : 
    6441             : static void
    6442         332 : bs_snapshot_unfreeze_cpl(void *cb_arg, int bserrno)
    6443             : {
    6444         332 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6445         332 :         struct spdk_blob *origblob = ctx->original.blob;
    6446             : 
    6447         332 :         if (bserrno != 0) {
    6448           0 :                 if (ctx->bserrno != 0) {
    6449           0 :                         SPDK_ERRLOG("Unfreeze error %d\n", bserrno);
    6450             :                 } else {
    6451           0 :                         ctx->bserrno = bserrno;
    6452             :                 }
    6453             :         }
    6454             : 
    6455         332 :         ctx->original.id = origblob->id;
    6456         332 :         origblob->locked_operation_in_progress = false;
    6457             : 
    6458             :         /* Revert md_ro to original state */
    6459         332 :         origblob->md_ro = ctx->original.md_ro;
    6460             : 
    6461         332 :         spdk_blob_close(origblob, bs_clone_snapshot_cleanup_finish, ctx);
    6462         332 : }
    6463             : 
    6464             : static void
    6465         332 : bs_clone_snapshot_origblob_cleanup(void *cb_arg, int bserrno)
    6466             : {
    6467         332 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6468         332 :         struct spdk_blob *origblob = ctx->original.blob;
    6469             : 
    6470         332 :         if (bserrno != 0) {
    6471          24 :                 if (ctx->bserrno != 0) {
    6472           4 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6473             :                 } else {
    6474          20 :                         ctx->bserrno = bserrno;
    6475             :                 }
    6476             :         }
    6477             : 
    6478         332 :         if (ctx->frozen) {
    6479             :                 /* Unfreeze any outstanding I/O */
    6480         212 :                 blob_unfreeze_io(origblob, bs_snapshot_unfreeze_cpl, ctx);
    6481             :         } else {
    6482         120 :                 bs_snapshot_unfreeze_cpl(ctx, 0);
    6483             :         }
    6484             : 
    6485         332 : }
    6486             : 
    6487             : static void
    6488           4 : bs_clone_snapshot_newblob_cleanup(struct spdk_clone_snapshot_ctx *ctx, int bserrno)
    6489             : {
    6490           4 :         struct spdk_blob *newblob = ctx->new.blob;
    6491             : 
    6492           4 :         if (bserrno != 0) {
    6493           4 :                 if (ctx->bserrno != 0) {
    6494           0 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6495             :                 } else {
    6496           4 :                         ctx->bserrno = bserrno;
    6497             :                 }
    6498             :         }
    6499             : 
    6500           4 :         ctx->new.id = newblob->id;
    6501           4 :         spdk_blob_close(newblob, bs_clone_snapshot_origblob_cleanup, ctx);
    6502           4 : }
    6503             : 
    6504             : /* END blob_cleanup */
    6505             : 
    6506             : /* START spdk_bs_create_snapshot */
    6507             : 
    6508             : static void
    6509         220 : bs_snapshot_swap_cluster_maps(struct spdk_blob *blob1, struct spdk_blob *blob2)
    6510             : {
    6511             :         uint64_t *cluster_temp;
    6512             :         uint64_t num_allocated_clusters_temp;
    6513             :         uint32_t *extent_page_temp;
    6514             : 
    6515         220 :         cluster_temp = blob1->active.clusters;
    6516         220 :         blob1->active.clusters = blob2->active.clusters;
    6517         220 :         blob2->active.clusters = cluster_temp;
    6518             : 
    6519         220 :         num_allocated_clusters_temp = blob1->active.num_allocated_clusters;
    6520         220 :         blob1->active.num_allocated_clusters = blob2->active.num_allocated_clusters;
    6521         220 :         blob2->active.num_allocated_clusters = num_allocated_clusters_temp;
    6522             : 
    6523         220 :         extent_page_temp = blob1->active.extent_pages;
    6524         220 :         blob1->active.extent_pages = blob2->active.extent_pages;
    6525         220 :         blob2->active.extent_pages = extent_page_temp;
    6526         220 : }
    6527             : 
    6528             : /* Copies an internal xattr */
    6529             : static int
    6530          28 : bs_snapshot_copy_xattr(struct spdk_blob *toblob, struct spdk_blob *fromblob, const char *name)
    6531             : {
    6532          28 :         const void      *val = NULL;
    6533          28 :         size_t          len;
    6534             :         int             bserrno;
    6535             : 
    6536          28 :         bserrno = blob_get_xattr_value(fromblob, name, &val, &len, true);
    6537          28 :         if (bserrno != 0) {
    6538           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " missing %s XATTR\n", fromblob->id, name);
    6539           0 :                 return bserrno;
    6540             :         }
    6541             : 
    6542          28 :         bserrno = blob_set_xattr(toblob, name, val, len, true);
    6543          28 :         if (bserrno != 0) {
    6544           0 :                 SPDK_ERRLOG("could not set %s XATTR on blob 0x%" PRIx64 "\n",
    6545             :                             name, toblob->id);
    6546           0 :                 return bserrno;
    6547             :         }
    6548          28 :         return 0;
    6549             : }
    6550             : 
    6551             : static void
    6552         208 : bs_snapshot_origblob_sync_cpl(void *cb_arg, int bserrno)
    6553             : {
    6554         208 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6555         208 :         struct spdk_blob *origblob = ctx->original.blob;
    6556         208 :         struct spdk_blob *newblob = ctx->new.blob;
    6557             : 
    6558         208 :         if (bserrno != 0) {
    6559           4 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6560           4 :                 if (blob_is_esnap_clone(newblob)) {
    6561           0 :                         bs_snapshot_copy_xattr(origblob, newblob, BLOB_EXTERNAL_SNAPSHOT_ID);
    6562           0 :                         origblob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6563             :                 }
    6564           4 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6565           4 :                 return;
    6566             :         }
    6567             : 
    6568             :         /* Remove metadata descriptor SNAPSHOT_IN_PROGRESS */
    6569         204 :         bserrno = blob_remove_xattr(newblob, SNAPSHOT_IN_PROGRESS, true);
    6570         204 :         if (bserrno != 0) {
    6571           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6572           0 :                 return;
    6573             :         }
    6574             : 
    6575         204 :         bs_blob_list_add(ctx->original.blob);
    6576             : 
    6577         204 :         spdk_blob_set_read_only(newblob);
    6578             : 
    6579             :         /* sync snapshot metadata */
    6580         204 :         spdk_blob_sync_md(newblob, bs_clone_snapshot_origblob_cleanup, ctx);
    6581             : }
    6582             : 
    6583             : static void
    6584         212 : bs_snapshot_newblob_sync_cpl(void *cb_arg, int bserrno)
    6585             : {
    6586         212 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6587         212 :         struct spdk_blob *origblob = ctx->original.blob;
    6588         212 :         struct spdk_blob *newblob = ctx->new.blob;
    6589             : 
    6590         212 :         if (bserrno != 0) {
    6591             :                 /* return cluster map back to original */
    6592           4 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6593             : 
    6594             :                 /* Newblob md sync failed. Valid clusters are only present in origblob.
    6595             :                  * Since I/O is frozen on origblob, not changes to zeroed out cluster map should have occurred.
    6596             :                  * Newblob needs to be reverted to thin_provisioned state at creation to properly close. */
    6597           4 :                 blob_set_thin_provision(newblob);
    6598           4 :                 assert(spdk_mem_all_zero(newblob->active.clusters,
    6599             :                                          newblob->active.num_clusters * sizeof(*newblob->active.clusters)));
    6600           4 :                 assert(spdk_mem_all_zero(newblob->active.extent_pages,
    6601             :                                          newblob->active.num_extent_pages * sizeof(*newblob->active.extent_pages)));
    6602             : 
    6603           4 :                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6604           4 :                 return;
    6605             :         }
    6606             : 
    6607             :         /* Set internal xattr for snapshot id */
    6608         208 :         bserrno = blob_set_xattr(origblob, BLOB_SNAPSHOT, &newblob->id, sizeof(spdk_blob_id), true);
    6609         208 :         if (bserrno != 0) {
    6610             :                 /* return cluster map back to original */
    6611           0 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6612           0 :                 blob_set_thin_provision(newblob);
    6613           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6614           0 :                 return;
    6615             :         }
    6616             : 
    6617             :         /* Create new back_bs_dev for snapshot */
    6618         208 :         origblob->back_bs_dev = bs_create_blob_bs_dev(newblob);
    6619         208 :         if (origblob->back_bs_dev == NULL) {
    6620             :                 /* return cluster map back to original */
    6621           0 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6622           0 :                 blob_set_thin_provision(newblob);
    6623           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, -EINVAL);
    6624           0 :                 return;
    6625             :         }
    6626             : 
    6627             :         /* Remove the xattr that references an external snapshot */
    6628         208 :         if (blob_is_esnap_clone(origblob)) {
    6629          16 :                 origblob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6630          16 :                 bserrno = blob_remove_xattr(origblob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
    6631          16 :                 if (bserrno != 0) {
    6632           0 :                         if (bserrno == -ENOENT) {
    6633           0 :                                 SPDK_ERRLOG("blob 0x%" PRIx64 " has no " BLOB_EXTERNAL_SNAPSHOT_ID
    6634             :                                             " xattr to remove\n", origblob->id);
    6635           0 :                                 assert(false);
    6636             :                         } else {
    6637             :                                 /* return cluster map back to original */
    6638           0 :                                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6639           0 :                                 blob_set_thin_provision(newblob);
    6640           0 :                                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6641           0 :                                 return;
    6642             :                         }
    6643             :                 }
    6644             :         }
    6645             : 
    6646         208 :         bs_blob_list_remove(origblob);
    6647         208 :         origblob->parent_id = newblob->id;
    6648             :         /* set clone blob as thin provisioned */
    6649         208 :         blob_set_thin_provision(origblob);
    6650             : 
    6651         208 :         bs_blob_list_add(newblob);
    6652             : 
    6653             :         /* sync clone metadata */
    6654         208 :         spdk_blob_sync_md(origblob, bs_snapshot_origblob_sync_cpl, ctx);
    6655             : }
    6656             : 
    6657             : static void
    6658         212 : bs_snapshot_freeze_cpl(void *cb_arg, int rc)
    6659             : {
    6660         212 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6661         212 :         struct spdk_blob *origblob = ctx->original.blob;
    6662         212 :         struct spdk_blob *newblob = ctx->new.blob;
    6663             :         int bserrno;
    6664             : 
    6665         212 :         if (rc != 0) {
    6666           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, rc);
    6667           0 :                 return;
    6668             :         }
    6669             : 
    6670         212 :         ctx->frozen = true;
    6671             : 
    6672         212 :         if (blob_is_esnap_clone(origblob)) {
    6673             :                 /* Clean up any channels associated with the original blob id because future IO will
    6674             :                  * perform IO using the snapshot blob_id.
    6675             :                  */
    6676          16 :                 blob_esnap_destroy_bs_dev_channels(origblob, false, NULL, NULL);
    6677             :         }
    6678         212 :         if (newblob->back_bs_dev) {
    6679         212 :                 blob_back_bs_destroy(newblob);
    6680             :         }
    6681             :         /* set new back_bs_dev for snapshot */
    6682         212 :         newblob->back_bs_dev = origblob->back_bs_dev;
    6683             :         /* Set invalid flags from origblob */
    6684         212 :         newblob->invalid_flags = origblob->invalid_flags;
    6685             : 
    6686             :         /* inherit parent from original blob if set */
    6687         212 :         newblob->parent_id = origblob->parent_id;
    6688         212 :         switch (origblob->parent_id) {
    6689          16 :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    6690          16 :                 bserrno = bs_snapshot_copy_xattr(newblob, origblob, BLOB_EXTERNAL_SNAPSHOT_ID);
    6691          16 :                 if (bserrno != 0) {
    6692           0 :                         bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6693           0 :                         return;
    6694             :                 }
    6695          16 :                 break;
    6696         144 :         case SPDK_BLOBID_INVALID:
    6697         144 :                 break;
    6698          52 :         default:
    6699             :                 /* Set internal xattr for snapshot id */
    6700          52 :                 bserrno = blob_set_xattr(newblob, BLOB_SNAPSHOT,
    6701          52 :                                          &origblob->parent_id, sizeof(spdk_blob_id), true);
    6702          52 :                 if (bserrno != 0) {
    6703           0 :                         bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6704           0 :                         return;
    6705             :                 }
    6706             :         }
    6707             : 
    6708             :         /* swap cluster maps */
    6709         212 :         bs_snapshot_swap_cluster_maps(newblob, origblob);
    6710             : 
    6711             :         /* Set the clear method on the new blob to match the original. */
    6712         212 :         blob_set_clear_method(newblob, origblob->clear_method);
    6713             : 
    6714             :         /* sync snapshot metadata */
    6715         212 :         spdk_blob_sync_md(newblob, bs_snapshot_newblob_sync_cpl, ctx);
    6716             : }
    6717             : 
    6718             : static void
    6719         216 : bs_snapshot_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6720             : {
    6721         216 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6722         216 :         struct spdk_blob *origblob = ctx->original.blob;
    6723         216 :         struct spdk_blob *newblob = _blob;
    6724             : 
    6725         216 :         if (bserrno != 0) {
    6726           4 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6727           4 :                 return;
    6728             :         }
    6729             : 
    6730         212 :         ctx->new.blob = newblob;
    6731         212 :         assert(spdk_blob_is_thin_provisioned(newblob));
    6732         212 :         assert(spdk_mem_all_zero(newblob->active.clusters,
    6733             :                                  newblob->active.num_clusters * sizeof(*newblob->active.clusters)));
    6734         212 :         assert(spdk_mem_all_zero(newblob->active.extent_pages,
    6735             :                                  newblob->active.num_extent_pages * sizeof(*newblob->active.extent_pages)));
    6736             : 
    6737         212 :         blob_freeze_io(origblob, bs_snapshot_freeze_cpl, ctx);
    6738             : }
    6739             : 
    6740             : static void
    6741         220 : bs_snapshot_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
    6742             : {
    6743         220 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6744         220 :         struct spdk_blob *origblob = ctx->original.blob;
    6745             : 
    6746         220 :         if (bserrno != 0) {
    6747           4 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6748           4 :                 return;
    6749             :         }
    6750             : 
    6751         216 :         ctx->new.id = blobid;
    6752         216 :         ctx->cpl.u.blobid.blobid = blobid;
    6753             : 
    6754         216 :         spdk_bs_open_blob(origblob->bs, ctx->new.id, bs_snapshot_newblob_open_cpl, ctx);
    6755             : }
    6756             : 
    6757             : 
    6758             : static void
    6759         220 : bs_xattr_snapshot(void *arg, const char *name,
    6760             :                   const void **value, size_t *value_len)
    6761             : {
    6762         220 :         assert(strncmp(name, SNAPSHOT_IN_PROGRESS, sizeof(SNAPSHOT_IN_PROGRESS)) == 0);
    6763             : 
    6764         220 :         struct spdk_blob *blob = (struct spdk_blob *)arg;
    6765         220 :         *value = &blob->id;
    6766         220 :         *value_len = sizeof(blob->id);
    6767         220 : }
    6768             : 
    6769             : static void
    6770         230 : bs_snapshot_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6771             : {
    6772         230 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6773         230 :         struct spdk_blob_opts opts;
    6774         230 :         struct spdk_blob_xattr_opts internal_xattrs;
    6775         230 :         char *xattrs_names[] = { SNAPSHOT_IN_PROGRESS };
    6776             : 
    6777         230 :         if (bserrno != 0) {
    6778           6 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    6779           6 :                 return;
    6780             :         }
    6781             : 
    6782         224 :         ctx->original.blob = _blob;
    6783             : 
    6784         224 :         if (_blob->data_ro || _blob->md_ro) {
    6785           4 :                 SPDK_DEBUGLOG(blob, "Cannot create snapshot from read only blob with id 0x%"
    6786             :                               PRIx64 "\n", _blob->id);
    6787           4 :                 ctx->bserrno = -EINVAL;
    6788           4 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6789           4 :                 return;
    6790             :         }
    6791             : 
    6792         220 :         if (_blob->locked_operation_in_progress) {
    6793           0 :                 SPDK_DEBUGLOG(blob, "Cannot create snapshot - another operation in progress\n");
    6794           0 :                 ctx->bserrno = -EBUSY;
    6795           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6796           0 :                 return;
    6797             :         }
    6798             : 
    6799         220 :         _blob->locked_operation_in_progress = true;
    6800             : 
    6801         220 :         spdk_blob_opts_init(&opts, sizeof(opts));
    6802         220 :         blob_xattrs_init(&internal_xattrs);
    6803             : 
    6804             :         /* Change the size of new blob to the same as in original blob,
    6805             :          * but do not allocate clusters */
    6806         220 :         opts.thin_provision = true;
    6807         220 :         opts.num_clusters = spdk_blob_get_num_clusters(_blob);
    6808         220 :         opts.use_extent_table = _blob->use_extent_table;
    6809             : 
    6810             :         /* If there are any xattrs specified for snapshot, set them now */
    6811         220 :         if (ctx->xattrs) {
    6812           4 :                 memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
    6813             :         }
    6814             :         /* Set internal xattr SNAPSHOT_IN_PROGRESS */
    6815         220 :         internal_xattrs.count = 1;
    6816         220 :         internal_xattrs.ctx = _blob;
    6817         220 :         internal_xattrs.names = xattrs_names;
    6818         220 :         internal_xattrs.get_value = bs_xattr_snapshot;
    6819             : 
    6820         220 :         bs_create_blob(_blob->bs, &opts, &internal_xattrs,
    6821             :                        bs_snapshot_newblob_create_cpl, ctx);
    6822             : }
    6823             : 
    6824             : void
    6825         230 : spdk_bs_create_snapshot(struct spdk_blob_store *bs, spdk_blob_id blobid,
    6826             :                         const struct spdk_blob_xattr_opts *snapshot_xattrs,
    6827             :                         spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6828             : {
    6829         230 :         struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
    6830             : 
    6831         230 :         if (!ctx) {
    6832           0 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
    6833           0 :                 return;
    6834             :         }
    6835         230 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6836         230 :         ctx->cpl.u.blobid.cb_fn = cb_fn;
    6837         230 :         ctx->cpl.u.blobid.cb_arg = cb_arg;
    6838         230 :         ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
    6839         230 :         ctx->bserrno = 0;
    6840         230 :         ctx->frozen = false;
    6841         230 :         ctx->original.id = blobid;
    6842         230 :         ctx->xattrs = snapshot_xattrs;
    6843             : 
    6844         230 :         spdk_bs_open_blob(bs, ctx->original.id, bs_snapshot_origblob_open_cpl, ctx);
    6845             : }
    6846             : /* END spdk_bs_create_snapshot */
    6847             : 
    6848             : /* START spdk_bs_create_clone */
    6849             : 
    6850             : static void
    6851          48 : bs_xattr_clone(void *arg, const char *name,
    6852             :                const void **value, size_t *value_len)
    6853             : {
    6854          48 :         assert(strncmp(name, BLOB_SNAPSHOT, sizeof(BLOB_SNAPSHOT)) == 0);
    6855             : 
    6856          48 :         struct spdk_blob *blob = (struct spdk_blob *)arg;
    6857          48 :         *value = &blob->id;
    6858          48 :         *value_len = sizeof(blob->id);
    6859          48 : }
    6860             : 
    6861             : static void
    6862          48 : bs_clone_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6863             : {
    6864          48 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6865          48 :         struct spdk_blob *clone = _blob;
    6866             : 
    6867          48 :         ctx->new.blob = clone;
    6868          48 :         bs_blob_list_add(clone);
    6869             : 
    6870          48 :         spdk_blob_close(clone, bs_clone_snapshot_origblob_cleanup, ctx);
    6871          48 : }
    6872             : 
    6873             : static void
    6874          48 : bs_clone_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
    6875             : {
    6876          48 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6877             : 
    6878          48 :         ctx->cpl.u.blobid.blobid = blobid;
    6879          48 :         spdk_bs_open_blob(ctx->original.blob->bs, blobid, bs_clone_newblob_open_cpl, ctx);
    6880          48 : }
    6881             : 
    6882             : static void
    6883          52 : bs_clone_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6884             : {
    6885          52 :         struct spdk_clone_snapshot_ctx  *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6886          52 :         struct spdk_blob_opts           opts;
    6887          52 :         struct spdk_blob_xattr_opts internal_xattrs;
    6888          52 :         char *xattr_names[] = { BLOB_SNAPSHOT };
    6889             : 
    6890          52 :         if (bserrno != 0) {
    6891           0 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    6892           0 :                 return;
    6893             :         }
    6894             : 
    6895          52 :         ctx->original.blob = _blob;
    6896          52 :         ctx->original.md_ro = _blob->md_ro;
    6897             : 
    6898          52 :         if (!_blob->data_ro || !_blob->md_ro) {
    6899           4 :                 SPDK_DEBUGLOG(blob, "Clone not from read-only blob\n");
    6900           4 :                 ctx->bserrno = -EINVAL;
    6901           4 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6902           4 :                 return;
    6903             :         }
    6904             : 
    6905          48 :         if (_blob->locked_operation_in_progress) {
    6906           0 :                 SPDK_DEBUGLOG(blob, "Cannot create clone - another operation in progress\n");
    6907           0 :                 ctx->bserrno = -EBUSY;
    6908           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6909           0 :                 return;
    6910             :         }
    6911             : 
    6912          48 :         _blob->locked_operation_in_progress = true;
    6913             : 
    6914          48 :         spdk_blob_opts_init(&opts, sizeof(opts));
    6915          48 :         blob_xattrs_init(&internal_xattrs);
    6916             : 
    6917          48 :         opts.thin_provision = true;
    6918          48 :         opts.num_clusters = spdk_blob_get_num_clusters(_blob);
    6919          48 :         opts.use_extent_table = _blob->use_extent_table;
    6920          48 :         if (ctx->xattrs) {
    6921           4 :                 memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
    6922             :         }
    6923             : 
    6924             :         /* Set internal xattr BLOB_SNAPSHOT */
    6925          48 :         internal_xattrs.count = 1;
    6926          48 :         internal_xattrs.ctx = _blob;
    6927          48 :         internal_xattrs.names = xattr_names;
    6928          48 :         internal_xattrs.get_value = bs_xattr_clone;
    6929             : 
    6930          48 :         bs_create_blob(_blob->bs, &opts, &internal_xattrs,
    6931             :                        bs_clone_newblob_create_cpl, ctx);
    6932             : }
    6933             : 
    6934             : void
    6935          52 : spdk_bs_create_clone(struct spdk_blob_store *bs, spdk_blob_id blobid,
    6936             :                      const struct spdk_blob_xattr_opts *clone_xattrs,
    6937             :                      spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6938             : {
    6939          52 :         struct spdk_clone_snapshot_ctx  *ctx = calloc(1, sizeof(*ctx));
    6940             : 
    6941          52 :         if (!ctx) {
    6942           0 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
    6943           0 :                 return;
    6944             :         }
    6945             : 
    6946          52 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6947          52 :         ctx->cpl.u.blobid.cb_fn = cb_fn;
    6948          52 :         ctx->cpl.u.blobid.cb_arg = cb_arg;
    6949          52 :         ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
    6950          52 :         ctx->bserrno = 0;
    6951          52 :         ctx->xattrs = clone_xattrs;
    6952          52 :         ctx->original.id = blobid;
    6953             : 
    6954          52 :         spdk_bs_open_blob(bs, ctx->original.id, bs_clone_origblob_open_cpl, ctx);
    6955             : }
    6956             : 
    6957             : /* END spdk_bs_create_clone */
    6958             : 
    6959             : /* START spdk_bs_inflate_blob */
    6960             : 
    6961             : static void
    6962          12 : bs_inflate_blob_set_parent_cpl(void *cb_arg, struct spdk_blob *_parent, int bserrno)
    6963             : {
    6964          12 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6965          12 :         struct spdk_blob *_blob = ctx->original.blob;
    6966             : 
    6967          12 :         if (bserrno != 0) {
    6968           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6969           0 :                 return;
    6970             :         }
    6971             : 
    6972             :         /* Temporarily override md_ro flag for MD modification */
    6973          12 :         _blob->md_ro = false;
    6974             : 
    6975          12 :         bserrno = blob_set_xattr(_blob, BLOB_SNAPSHOT, &_parent->id, sizeof(spdk_blob_id), true);
    6976          12 :         if (bserrno != 0) {
    6977           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6978           0 :                 return;
    6979             :         }
    6980             : 
    6981          12 :         assert(_parent != NULL);
    6982             : 
    6983          12 :         bs_blob_list_remove(_blob);
    6984          12 :         _blob->parent_id = _parent->id;
    6985             : 
    6986          12 :         blob_back_bs_destroy(_blob);
    6987          12 :         _blob->back_bs_dev = bs_create_blob_bs_dev(_parent);
    6988          12 :         bs_blob_list_add(_blob);
    6989             : 
    6990          12 :         spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
    6991             : }
    6992             : 
    6993             : static void
    6994           4 : bs_inflate_blob_set_esnap_refs(struct spdk_clone_snapshot_ctx *ctx)
    6995             : {
    6996           4 :         struct spdk_blob *_blob = ctx->original.blob;
    6997           4 :         struct spdk_blob *_parent = ((struct spdk_blob_bs_dev *)(_blob->back_bs_dev))->blob;
    6998             :         int bserrno;
    6999             : 
    7000           4 :         assert(_parent != NULL);
    7001           4 :         assert(_parent->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT);
    7002             : 
    7003             :         /* Temporarily override md_ro flag for MD modification */
    7004           4 :         _blob->md_ro = false;
    7005             : 
    7006           4 :         blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
    7007           4 :         bserrno = bs_snapshot_copy_xattr(_blob, _parent, BLOB_EXTERNAL_SNAPSHOT_ID);
    7008           4 :         if (bserrno != 0) {
    7009           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    7010           0 :                 return;
    7011             :         }
    7012             : 
    7013           4 :         bs_blob_list_remove(_blob);
    7014             : 
    7015           4 :         _blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    7016           4 :         _blob->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
    7017             : 
    7018           4 :         blob_back_bs_destroy(_blob);
    7019           4 :         _blob->back_bs_dev = _parent->back_bs_dev;
    7020             : 
    7021           4 :         LIST_INSERT_AFTER(_parent, _blob, back_bs_dev_link);
    7022             : 
    7023           4 :         spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
    7024             : }
    7025             : 
    7026             : static void
    7027          60 : bs_inflate_blob_done(struct spdk_clone_snapshot_ctx *ctx)
    7028             : {
    7029          60 :         struct spdk_blob *_blob = ctx->original.blob;
    7030             :         struct spdk_blob *_parent;
    7031             : 
    7032          60 :         if (ctx->allocate_all) {
    7033             :                 /* remove thin provisioning */
    7034          32 :                 bs_blob_list_remove(_blob);
    7035          32 :                 if (_blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    7036           8 :                         blob_remove_xattr(_blob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
    7037           8 :                         _blob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
    7038             :                 } else {
    7039          24 :                         blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
    7040             :                 }
    7041          32 :                 _blob->invalid_flags = _blob->invalid_flags & ~SPDK_BLOB_THIN_PROV;
    7042          32 :                 blob_back_bs_destroy(_blob);
    7043          32 :                 _blob->parent_id = SPDK_BLOBID_INVALID;
    7044             :         } else {
    7045             :                 /* For now, esnap clones always have allocate_all set. */
    7046          28 :                 assert(!blob_is_esnap_clone(_blob));
    7047             : 
    7048          28 :                 _parent = ((struct spdk_blob_bs_dev *)(_blob->back_bs_dev))->blob;
    7049          28 :                 switch (_parent->parent_id) {
    7050          12 :                 case SPDK_BLOBID_INVALID:
    7051          12 :                         bs_blob_list_remove(_blob);
    7052          12 :                         _blob->parent_id = SPDK_BLOBID_INVALID;
    7053          12 :                         blob_back_bs_destroy(_blob);
    7054          12 :                         _blob->back_bs_dev = bs_create_zeroes_dev();
    7055          12 :                         break;
    7056           4 :                 case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    7057           4 :                         bs_inflate_blob_set_esnap_refs(ctx);
    7058           4 :                         return;
    7059          12 :                 default:
    7060             :                         /* We must change the parent of the inflated blob */
    7061          12 :                         spdk_bs_open_blob(_blob->bs, _parent->parent_id,
    7062             :                                           bs_inflate_blob_set_parent_cpl, ctx);
    7063          12 :                         return;
    7064             :                 }
    7065             :         }
    7066             : 
    7067             :         /* Temporarily override md_ro flag for MD modification */
    7068          44 :         _blob->md_ro = false;
    7069          44 :         blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
    7070          44 :         _blob->state = SPDK_BLOB_STATE_DIRTY;
    7071             : 
    7072          44 :         spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
    7073             : }
    7074             : 
    7075             : /* Check if cluster needs allocation */
    7076             : static inline bool
    7077        1280 : bs_cluster_needs_allocation(struct spdk_blob *blob, uint64_t cluster, bool allocate_all)
    7078             : {
    7079             :         struct spdk_blob_bs_dev *b;
    7080             : 
    7081        1280 :         assert(blob != NULL);
    7082             : 
    7083        1280 :         if (blob->active.clusters[cluster] != 0) {
    7084             :                 /* Cluster is already allocated */
    7085          32 :                 return false;
    7086             :         }
    7087             : 
    7088        1248 :         if (blob->parent_id == SPDK_BLOBID_INVALID) {
    7089             :                 /* Blob have no parent blob */
    7090          80 :                 return allocate_all;
    7091             :         }
    7092             : 
    7093        1168 :         if (blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    7094          64 :                 return true;
    7095             :         }
    7096             : 
    7097        1104 :         b = (struct spdk_blob_bs_dev *)blob->back_bs_dev;
    7098        1104 :         return (allocate_all || b->blob->active.clusters[cluster] != 0);
    7099             : }
    7100             : 
    7101             : static void
    7102         512 : bs_inflate_blob_touch_next(void *cb_arg, int bserrno)
    7103             : {
    7104         512 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    7105         512 :         struct spdk_blob *_blob = ctx->original.blob;
    7106         512 :         struct spdk_bs_cpl cpl;
    7107             :         spdk_bs_user_op_t *op;
    7108             :         uint64_t offset;
    7109             : 
    7110         512 :         if (bserrno != 0) {
    7111           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    7112           0 :                 return;
    7113             :         }
    7114             : 
    7115         700 :         for (; ctx->cluster < _blob->active.num_clusters; ctx->cluster++) {
    7116         640 :                 if (bs_cluster_needs_allocation(_blob, ctx->cluster, ctx->allocate_all)) {
    7117         452 :                         break;
    7118             :                 }
    7119             :         }
    7120             : 
    7121         512 :         if (ctx->cluster < _blob->active.num_clusters) {
    7122         452 :                 offset = bs_cluster_to_lba(_blob->bs, ctx->cluster);
    7123             : 
    7124             :                 /* We may safely increment a cluster before copying */
    7125         452 :                 ctx->cluster++;
    7126             : 
    7127             :                 /* Use a dummy 0B read as a context for cluster copy */
    7128         452 :                 cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7129         452 :                 cpl.u.blob_basic.cb_fn = bs_inflate_blob_touch_next;
    7130         452 :                 cpl.u.blob_basic.cb_arg = ctx;
    7131             : 
    7132         452 :                 op = bs_user_op_alloc(ctx->channel, &cpl, SPDK_BLOB_READ, _blob,
    7133             :                                       NULL, 0, offset, 0);
    7134         452 :                 if (!op) {
    7135           0 :                         bs_clone_snapshot_origblob_cleanup(ctx, -ENOMEM);
    7136           0 :                         return;
    7137             :                 }
    7138             : 
    7139         452 :                 bs_allocate_and_copy_cluster(_blob, ctx->channel, offset, op);
    7140             :         } else {
    7141          60 :                 bs_inflate_blob_done(ctx);
    7142             :         }
    7143             : }
    7144             : 
    7145             : static void
    7146          64 : bs_inflate_blob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    7147             : {
    7148          64 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    7149             :         uint64_t clusters_needed;
    7150             :         uint64_t i;
    7151             : 
    7152          64 :         if (bserrno != 0) {
    7153           0 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    7154           0 :                 return;
    7155             :         }
    7156             : 
    7157          64 :         ctx->original.blob = _blob;
    7158          64 :         ctx->original.md_ro = _blob->md_ro;
    7159             : 
    7160          64 :         if (_blob->locked_operation_in_progress) {
    7161           0 :                 SPDK_DEBUGLOG(blob, "Cannot inflate blob - another operation in progress\n");
    7162           0 :                 ctx->bserrno = -EBUSY;
    7163           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    7164           0 :                 return;
    7165             :         }
    7166             : 
    7167          64 :         _blob->locked_operation_in_progress = true;
    7168             : 
    7169          64 :         switch (_blob->parent_id) {
    7170           8 :         case SPDK_BLOBID_INVALID:
    7171           8 :                 if (!ctx->allocate_all) {
    7172             :                         /* This blob has no parent, so we cannot decouple it. */
    7173           4 :                         SPDK_ERRLOG("Cannot decouple parent of blob with no parent.\n");
    7174           4 :                         bs_clone_snapshot_origblob_cleanup(ctx, -EINVAL);
    7175           4 :                         return;
    7176             :                 }
    7177           4 :                 break;
    7178           8 :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    7179             :                 /*
    7180             :                  * It would be better to rely on back_bs_dev->is_zeroes(), to determine which
    7181             :                  * clusters require allocation. Until there is a blobstore consumer that
    7182             :                  * uses esnaps with an spdk_bs_dev that implements a useful is_zeroes() it is not
    7183             :                  * worth the effort.
    7184             :                  */
    7185           8 :                 ctx->allocate_all = true;
    7186           8 :                 break;
    7187          48 :         default:
    7188          48 :                 break;
    7189             :         }
    7190             : 
    7191          60 :         if (spdk_blob_is_thin_provisioned(_blob) == false) {
    7192             :                 /* This is not thin provisioned blob. No need to inflate. */
    7193           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, 0);
    7194           0 :                 return;
    7195             :         }
    7196             : 
    7197             :         /* Do two passes - one to verify that we can obtain enough clusters
    7198             :          * and another to actually claim them.
    7199             :          */
    7200          60 :         clusters_needed = 0;
    7201         700 :         for (i = 0; i < _blob->active.num_clusters; i++) {
    7202         640 :                 if (bs_cluster_needs_allocation(_blob, i, ctx->allocate_all)) {
    7203         452 :                         clusters_needed++;
    7204             :                 }
    7205             :         }
    7206             : 
    7207          60 :         if (clusters_needed > _blob->bs->num_free_clusters) {
    7208             :                 /* Not enough free clusters. Cannot satisfy the request. */
    7209           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, -ENOSPC);
    7210           0 :                 return;
    7211             :         }
    7212             : 
    7213          60 :         ctx->cluster = 0;
    7214          60 :         bs_inflate_blob_touch_next(ctx, 0);
    7215             : }
    7216             : 
    7217             : static void
    7218          64 : bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7219             :                 spdk_blob_id blobid, bool allocate_all, spdk_blob_op_complete cb_fn, void *cb_arg)
    7220             : {
    7221          64 :         struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
    7222             : 
    7223          64 :         if (!ctx) {
    7224           0 :                 cb_fn(cb_arg, -ENOMEM);
    7225           0 :                 return;
    7226             :         }
    7227          64 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7228          64 :         ctx->cpl.u.bs_basic.cb_fn = cb_fn;
    7229          64 :         ctx->cpl.u.bs_basic.cb_arg = cb_arg;
    7230          64 :         ctx->bserrno = 0;
    7231          64 :         ctx->original.id = blobid;
    7232          64 :         ctx->channel = channel;
    7233          64 :         ctx->allocate_all = allocate_all;
    7234             : 
    7235          64 :         spdk_bs_open_blob(bs, ctx->original.id, bs_inflate_blob_open_cpl, ctx);
    7236             : }
    7237             : 
    7238             : void
    7239          28 : spdk_bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7240             :                      spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
    7241             : {
    7242          28 :         bs_inflate_blob(bs, channel, blobid, true, cb_fn, cb_arg);
    7243          28 : }
    7244             : 
    7245             : void
    7246          36 : spdk_bs_blob_decouple_parent(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7247             :                              spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
    7248             : {
    7249          36 :         bs_inflate_blob(bs, channel, blobid, false, cb_fn, cb_arg);
    7250          36 : }
    7251             : /* END spdk_bs_inflate_blob */
    7252             : 
    7253             : /* START spdk_bs_blob_shallow_copy */
    7254             : 
    7255             : struct shallow_copy_ctx {
    7256             :         struct spdk_bs_cpl cpl;
    7257             :         int bserrno;
    7258             : 
    7259             :         /* Blob source for copy */
    7260             :         struct spdk_blob_store *bs;
    7261             :         spdk_blob_id blobid;
    7262             :         struct spdk_blob *blob;
    7263             :         struct spdk_io_channel *blob_channel;
    7264             : 
    7265             :         /* Destination device for copy */
    7266             :         struct spdk_bs_dev *ext_dev;
    7267             :         struct spdk_io_channel *ext_channel;
    7268             : 
    7269             :         /* Current cluster for copy operation */
    7270             :         uint64_t cluster;
    7271             : 
    7272             :         /* Buffer for blob reading */
    7273             :         uint8_t *read_buff;
    7274             : 
    7275             :         /* Struct for external device writing */
    7276             :         struct spdk_bs_dev_cb_args ext_args;
    7277             : 
    7278             :         /* Actual number of copied clusters */
    7279             :         uint64_t copied_clusters_count;
    7280             : 
    7281             :         /* Status callback for updates about the ongoing operation */
    7282             :         spdk_blob_shallow_copy_status status_cb;
    7283             : 
    7284             :         /* Argument passed to function status_cb */
    7285             :         void *status_cb_arg;
    7286             : };
    7287             : 
    7288             : static void
    7289          16 : bs_shallow_copy_cleanup_finish(void *cb_arg, int bserrno)
    7290             : {
    7291          16 :         struct shallow_copy_ctx *ctx = cb_arg;
    7292          16 :         struct spdk_bs_cpl *cpl = &ctx->cpl;
    7293             : 
    7294          16 :         if (bserrno != 0) {
    7295           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, cleanup error %d\n", ctx->blob->id, bserrno);
    7296           0 :                 ctx->bserrno = bserrno;
    7297             :         }
    7298             : 
    7299          16 :         ctx->ext_dev->destroy_channel(ctx->ext_dev, ctx->ext_channel);
    7300          16 :         spdk_free(ctx->read_buff);
    7301             : 
    7302          16 :         cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, ctx->bserrno);
    7303             : 
    7304          16 :         free(ctx);
    7305          16 : }
    7306             : 
    7307             : static void
    7308           8 : bs_shallow_copy_bdev_write_cpl(struct spdk_io_channel *channel, void *cb_arg, int bserrno)
    7309             : {
    7310           8 :         struct shallow_copy_ctx *ctx = cb_arg;
    7311           8 :         struct spdk_blob *_blob = ctx->blob;
    7312             : 
    7313           8 :         if (bserrno != 0) {
    7314           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, ext dev write error %d\n", ctx->blob->id, bserrno);
    7315           0 :                 ctx->bserrno = bserrno;
    7316           0 :                 _blob->locked_operation_in_progress = false;
    7317           0 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7318           0 :                 return;
    7319             :         }
    7320             : 
    7321           8 :         ctx->cluster++;
    7322           8 :         if (ctx->status_cb) {
    7323           8 :                 ctx->copied_clusters_count++;
    7324           8 :                 ctx->status_cb(ctx->copied_clusters_count, ctx->status_cb_arg);
    7325             :         }
    7326             : 
    7327           8 :         bs_shallow_copy_cluster_find_next(ctx);
    7328             : }
    7329             : 
    7330             : static void
    7331           8 : bs_shallow_copy_blob_read_cpl(void *cb_arg, int bserrno)
    7332             : {
    7333           8 :         struct shallow_copy_ctx *ctx = cb_arg;
    7334           8 :         struct spdk_bs_dev *ext_dev = ctx->ext_dev;
    7335           8 :         struct spdk_blob *_blob = ctx->blob;
    7336             : 
    7337           8 :         if (bserrno != 0) {
    7338           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, blob read error %d\n", ctx->blob->id, bserrno);
    7339           0 :                 ctx->bserrno = bserrno;
    7340           0 :                 _blob->locked_operation_in_progress = false;
    7341           0 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7342           0 :                 return;
    7343             :         }
    7344             : 
    7345           8 :         ctx->ext_args.channel = ctx->ext_channel;
    7346           8 :         ctx->ext_args.cb_fn = bs_shallow_copy_bdev_write_cpl;
    7347           8 :         ctx->ext_args.cb_arg = ctx;
    7348             : 
    7349           8 :         ext_dev->write(ext_dev, ctx->ext_channel, ctx->read_buff,
    7350           8 :                        bs_cluster_to_lba(_blob->bs, ctx->cluster),
    7351           8 :                        bs_dev_byte_to_lba(_blob->bs->dev, _blob->bs->cluster_sz),
    7352             :                        &ctx->ext_args);
    7353             : }
    7354             : 
    7355             : static void
    7356          12 : bs_shallow_copy_cluster_find_next(void *cb_arg)
    7357             : {
    7358          12 :         struct shallow_copy_ctx *ctx = cb_arg;
    7359          12 :         struct spdk_blob *_blob = ctx->blob;
    7360             : 
    7361          20 :         while (ctx->cluster < _blob->active.num_clusters) {
    7362          16 :                 if (_blob->active.clusters[ctx->cluster] != 0) {
    7363           8 :                         break;
    7364             :                 }
    7365             : 
    7366           8 :                 ctx->cluster++;
    7367             :         }
    7368             : 
    7369          12 :         if (ctx->cluster < _blob->active.num_clusters) {
    7370           8 :                 blob_request_submit_op_single(ctx->blob_channel, _blob, ctx->read_buff,
    7371           8 :                                               bs_cluster_to_lba(_blob->bs, ctx->cluster),
    7372           8 :                                               bs_dev_byte_to_lba(_blob->bs->dev, _blob->bs->cluster_sz),
    7373             :                                               bs_shallow_copy_blob_read_cpl, ctx, SPDK_BLOB_READ);
    7374             :         } else {
    7375           4 :                 _blob->locked_operation_in_progress = false;
    7376           4 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7377             :         }
    7378          12 : }
    7379             : 
    7380             : static void
    7381          16 : bs_shallow_copy_blob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    7382             : {
    7383          16 :         struct shallow_copy_ctx *ctx = cb_arg;
    7384          16 :         struct spdk_bs_dev *ext_dev = ctx->ext_dev;
    7385             :         uint32_t blob_block_size;
    7386             :         uint64_t blob_total_size;
    7387             : 
    7388          16 :         if (bserrno != 0) {
    7389           0 :                 SPDK_ERRLOG("Shallow copy blob open error %d\n", bserrno);
    7390           0 :                 ctx->bserrno = bserrno;
    7391           0 :                 bs_shallow_copy_cleanup_finish(ctx, 0);
    7392           0 :                 return;
    7393             :         }
    7394             : 
    7395          16 :         if (!spdk_blob_is_read_only(_blob)) {
    7396           4 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, blob must be read only\n", _blob->id);
    7397           4 :                 ctx->bserrno = -EPERM;
    7398           4 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7399           4 :                 return;
    7400             :         }
    7401             : 
    7402          12 :         blob_block_size = _blob->bs->dev->blocklen;
    7403          12 :         blob_total_size = spdk_blob_get_num_clusters(_blob) * spdk_bs_get_cluster_size(_blob->bs);
    7404             : 
    7405          12 :         if (blob_total_size > ext_dev->blockcnt * ext_dev->blocklen) {
    7406           4 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, external device must have at least blob size\n",
    7407             :                             _blob->id);
    7408           4 :                 ctx->bserrno = -EINVAL;
    7409           4 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7410           4 :                 return;
    7411             :         }
    7412             : 
    7413           8 :         if (blob_block_size % ext_dev->blocklen != 0) {
    7414           4 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, external device block size is not compatible with \
    7415             : blobstore block size\n", _blob->id);
    7416           4 :                 ctx->bserrno = -EINVAL;
    7417           4 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7418           4 :                 return;
    7419             :         }
    7420             : 
    7421           4 :         ctx->blob = _blob;
    7422             : 
    7423           4 :         if (_blob->locked_operation_in_progress) {
    7424           0 :                 SPDK_DEBUGLOG(blob, "blob 0x%" PRIx64 " shallow copy - another operation in progress\n", _blob->id);
    7425           0 :                 ctx->bserrno = -EBUSY;
    7426           0 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7427           0 :                 return;
    7428             :         }
    7429             : 
    7430           4 :         _blob->locked_operation_in_progress = true;
    7431             : 
    7432           4 :         ctx->cluster = 0;
    7433           4 :         bs_shallow_copy_cluster_find_next(ctx);
    7434             : }
    7435             : 
    7436             : int
    7437          16 : spdk_bs_blob_shallow_copy(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7438             :                           spdk_blob_id blobid, struct spdk_bs_dev *ext_dev,
    7439             :                           spdk_blob_shallow_copy_status status_cb_fn, void *status_cb_arg,
    7440             :                           spdk_blob_op_complete cb_fn, void *cb_arg)
    7441             : {
    7442             :         struct shallow_copy_ctx *ctx;
    7443             :         struct spdk_io_channel *ext_channel;
    7444             : 
    7445          16 :         ctx = calloc(1, sizeof(*ctx));
    7446          16 :         if (!ctx) {
    7447           0 :                 return -ENOMEM;
    7448             :         }
    7449             : 
    7450          16 :         ctx->bs = bs;
    7451          16 :         ctx->blobid = blobid;
    7452          16 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7453          16 :         ctx->cpl.u.bs_basic.cb_fn = cb_fn;
    7454          16 :         ctx->cpl.u.bs_basic.cb_arg = cb_arg;
    7455          16 :         ctx->bserrno = 0;
    7456          16 :         ctx->blob_channel = channel;
    7457          16 :         ctx->status_cb = status_cb_fn;
    7458          16 :         ctx->status_cb_arg = status_cb_arg;
    7459          16 :         ctx->read_buff = spdk_malloc(bs->cluster_sz, bs->dev->blocklen, NULL,
    7460             :                                      SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
    7461          16 :         if (!ctx->read_buff) {
    7462           0 :                 free(ctx);
    7463           0 :                 return -ENOMEM;
    7464             :         }
    7465             : 
    7466          16 :         ext_channel = ext_dev->create_channel(ext_dev);
    7467          16 :         if (!ext_channel) {
    7468           0 :                 spdk_free(ctx->read_buff);
    7469           0 :                 free(ctx);
    7470           0 :                 return -ENOMEM;
    7471             :         }
    7472          16 :         ctx->ext_dev = ext_dev;
    7473          16 :         ctx->ext_channel = ext_channel;
    7474             : 
    7475          16 :         spdk_bs_open_blob(ctx->bs, ctx->blobid, bs_shallow_copy_blob_open_cpl, ctx);
    7476             : 
    7477          16 :         return 0;
    7478             : }
    7479             : /* END spdk_bs_blob_shallow_copy */
    7480             : 
    7481             : /* START spdk_bs_blob_set_parent */
    7482             : 
    7483             : struct set_parent_ctx {
    7484             :         struct spdk_blob_store *bs;
    7485             :         int                     bserrno;
    7486             :         spdk_bs_op_complete     cb_fn;
    7487             :         void                    *cb_arg;
    7488             : 
    7489             :         struct spdk_blob        *blob;
    7490             :         bool                    blob_md_ro;
    7491             : 
    7492             :         struct blob_parent      parent;
    7493             : };
    7494             : 
    7495             : static void
    7496          24 : bs_set_parent_cleanup_finish(void *cb_arg, int bserrno)
    7497             : {
    7498          24 :         struct set_parent_ctx *ctx = cb_arg;
    7499             : 
    7500          24 :         assert(ctx != NULL);
    7501             : 
    7502          24 :         if (bserrno != 0) {
    7503           0 :                 SPDK_ERRLOG("blob set parent finish error %d\n", bserrno);
    7504           0 :                 if (ctx->bserrno == 0) {
    7505           0 :                         ctx->bserrno = bserrno;
    7506             :                 }
    7507             :         }
    7508             : 
    7509          24 :         ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
    7510             : 
    7511          24 :         free(ctx);
    7512          24 : }
    7513             : 
    7514             : static void
    7515          20 : bs_set_parent_close_snapshot(void *cb_arg, int bserrno)
    7516             : {
    7517          20 :         struct set_parent_ctx *ctx = cb_arg;
    7518             : 
    7519          20 :         if (ctx->bserrno != 0) {
    7520           8 :                 spdk_blob_close(ctx->parent.u.snapshot.blob, bs_set_parent_cleanup_finish, ctx);
    7521           8 :                 return;
    7522             :         }
    7523             : 
    7524          12 :         if (bserrno != 0) {
    7525           0 :                 SPDK_ERRLOG("blob close error %d\n", bserrno);
    7526           0 :                 ctx->bserrno = bserrno;
    7527             :         }
    7528             : 
    7529          12 :         bs_set_parent_cleanup_finish(ctx, ctx->bserrno);
    7530             : }
    7531             : 
    7532             : static void
    7533          12 : bs_set_parent_close_blob(void *cb_arg, int bserrno)
    7534             : {
    7535          12 :         struct set_parent_ctx *ctx = cb_arg;
    7536          12 :         struct spdk_blob *blob = ctx->blob;
    7537          12 :         struct spdk_blob *snapshot = ctx->parent.u.snapshot.blob;
    7538             : 
    7539          12 :         if (bserrno != 0 && ctx->bserrno == 0) {
    7540           0 :                 SPDK_ERRLOG("error %d in metadata sync\n", bserrno);
    7541           0 :                 ctx->bserrno = bserrno;
    7542             :         }
    7543             : 
    7544             :         /* Revert md_ro to original state */
    7545          12 :         blob->md_ro = ctx->blob_md_ro;
    7546             : 
    7547          12 :         blob->locked_operation_in_progress = false;
    7548          12 :         snapshot->locked_operation_in_progress = false;
    7549             : 
    7550          12 :         spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7551          12 : }
    7552             : 
    7553             : static void
    7554          12 : bs_set_parent_set_back_bs_dev_done(void *cb_arg, int bserrno)
    7555             : {
    7556          12 :         struct set_parent_ctx *ctx = cb_arg;
    7557          12 :         struct spdk_blob *blob = ctx->blob;
    7558             : 
    7559          12 :         if (bserrno != 0) {
    7560           0 :                 SPDK_ERRLOG("error %d setting back_bs_dev\n", bserrno);
    7561           0 :                 ctx->bserrno = bserrno;
    7562           0 :                 bs_set_parent_close_blob(ctx, bserrno);
    7563           0 :                 return;
    7564             :         }
    7565             : 
    7566          12 :         spdk_blob_sync_md(blob, bs_set_parent_close_blob, ctx);
    7567             : }
    7568             : 
    7569             : static int
    7570          12 : bs_set_parent_refs(struct spdk_blob *blob, struct blob_parent *parent)
    7571             : {
    7572             :         int rc;
    7573             : 
    7574          12 :         bs_blob_list_remove(blob);
    7575             : 
    7576          12 :         rc = blob_set_xattr(blob, BLOB_SNAPSHOT, &parent->u.snapshot.id, sizeof(spdk_blob_id), true);
    7577          12 :         if (rc != 0) {
    7578           0 :                 SPDK_ERRLOG("error %d setting snapshot xattr\n", rc);
    7579           0 :                 return rc;
    7580             :         }
    7581          12 :         blob->parent_id = parent->u.snapshot.id;
    7582             : 
    7583          12 :         if (blob_is_esnap_clone(blob)) {
    7584             :                 /* Remove the xattr that references the external snapshot */
    7585           4 :                 blob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
    7586           4 :                 blob_remove_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
    7587             :         }
    7588             : 
    7589          12 :         bs_blob_list_add(blob);
    7590             : 
    7591          12 :         return 0;
    7592             : }
    7593             : 
    7594             : static void
    7595          20 : bs_set_parent_snapshot_open_cpl(void *cb_arg, struct spdk_blob *snapshot, int bserrno)
    7596             : {
    7597          20 :         struct set_parent_ctx *ctx = cb_arg;
    7598          20 :         struct spdk_blob *blob = ctx->blob;
    7599             :         struct spdk_bs_dev *back_bs_dev;
    7600             : 
    7601          20 :         if (bserrno != 0) {
    7602           0 :                 SPDK_ERRLOG("snapshot open error %d\n", bserrno);
    7603           0 :                 ctx->bserrno = bserrno;
    7604           0 :                 spdk_blob_close(blob, bs_set_parent_cleanup_finish, ctx);
    7605           0 :                 return;
    7606             :         }
    7607             : 
    7608          20 :         ctx->parent.u.snapshot.blob = snapshot;
    7609          20 :         ctx->parent.u.snapshot.id = snapshot->id;
    7610             : 
    7611          20 :         if (!spdk_blob_is_snapshot(snapshot)) {
    7612           4 :                 SPDK_ERRLOG("parent blob is not a snapshot\n");
    7613           4 :                 ctx->bserrno = -EINVAL;
    7614           4 :                 spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7615           4 :                 return;
    7616             :         }
    7617             : 
    7618          16 :         if (blob->active.num_clusters != snapshot->active.num_clusters) {
    7619           4 :                 SPDK_ERRLOG("parent blob has a number of clusters different from child's ones\n");
    7620           4 :                 ctx->bserrno = -EINVAL;
    7621           4 :                 spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7622           4 :                 return;
    7623             :         }
    7624             : 
    7625          12 :         if (blob->locked_operation_in_progress || snapshot->locked_operation_in_progress) {
    7626           0 :                 SPDK_ERRLOG("cannot set parent of blob, another operation in progress\n");
    7627           0 :                 ctx->bserrno = -EBUSY;
    7628           0 :                 spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7629           0 :                 return;
    7630             :         }
    7631             : 
    7632          12 :         blob->locked_operation_in_progress = true;
    7633          12 :         snapshot->locked_operation_in_progress = true;
    7634             : 
    7635             :         /* Temporarily override md_ro flag for MD modification */
    7636          12 :         blob->md_ro = false;
    7637             : 
    7638          12 :         back_bs_dev = bs_create_blob_bs_dev(snapshot);
    7639             : 
    7640          12 :         blob_set_back_bs_dev(blob, back_bs_dev, bs_set_parent_refs, &ctx->parent,
    7641             :                              bs_set_parent_set_back_bs_dev_done,
    7642             :                              ctx);
    7643             : }
    7644             : 
    7645             : static void
    7646          24 : bs_set_parent_blob_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
    7647             : {
    7648          24 :         struct set_parent_ctx *ctx = cb_arg;
    7649             : 
    7650          24 :         if (bserrno != 0) {
    7651           0 :                 SPDK_ERRLOG("blob open error %d\n", bserrno);
    7652           0 :                 ctx->bserrno = bserrno;
    7653           0 :                 bs_set_parent_cleanup_finish(ctx, 0);
    7654           0 :                 return;
    7655             :         }
    7656             : 
    7657          24 :         if (!spdk_blob_is_thin_provisioned(blob)) {
    7658           4 :                 SPDK_ERRLOG("blob is not thin-provisioned\n");
    7659           4 :                 ctx->bserrno = -EINVAL;
    7660           4 :                 spdk_blob_close(blob, bs_set_parent_cleanup_finish, ctx);
    7661           4 :                 return;
    7662             :         }
    7663             : 
    7664          20 :         ctx->blob = blob;
    7665          20 :         ctx->blob_md_ro = blob->md_ro;
    7666             : 
    7667          20 :         spdk_bs_open_blob(ctx->bs, ctx->parent.u.snapshot.id, bs_set_parent_snapshot_open_cpl, ctx);
    7668             : }
    7669             : 
    7670             : void
    7671          36 : spdk_bs_blob_set_parent(struct spdk_blob_store *bs, spdk_blob_id blob_id,
    7672             :                         spdk_blob_id snapshot_id, spdk_blob_op_complete cb_fn, void *cb_arg)
    7673             : {
    7674             :         struct set_parent_ctx *ctx;
    7675             : 
    7676          36 :         if (snapshot_id == SPDK_BLOBID_INVALID) {
    7677           4 :                 SPDK_ERRLOG("snapshot id not valid\n");
    7678           4 :                 cb_fn(cb_arg, -EINVAL);
    7679           4 :                 return;
    7680             :         }
    7681             : 
    7682          32 :         if (blob_id == snapshot_id) {
    7683           4 :                 SPDK_ERRLOG("blob id and snapshot id cannot be the same\n");
    7684           4 :                 cb_fn(cb_arg, -EINVAL);
    7685           4 :                 return;
    7686             :         }
    7687             : 
    7688          28 :         if (spdk_blob_get_parent_snapshot(bs, blob_id) == snapshot_id) {
    7689           4 :                 SPDK_NOTICELOG("snapshot is already the parent of blob\n");
    7690           4 :                 cb_fn(cb_arg, -EEXIST);
    7691           4 :                 return;
    7692             :         }
    7693             : 
    7694          24 :         ctx = calloc(1, sizeof(*ctx));
    7695          24 :         if (!ctx) {
    7696           0 :                 cb_fn(cb_arg, -ENOMEM);
    7697           0 :                 return;
    7698             :         }
    7699             : 
    7700          24 :         ctx->bs = bs;
    7701          24 :         ctx->parent.u.snapshot.id = snapshot_id;
    7702          24 :         ctx->cb_fn = cb_fn;
    7703          24 :         ctx->cb_arg = cb_arg;
    7704          24 :         ctx->bserrno = 0;
    7705             : 
    7706          24 :         spdk_bs_open_blob(bs, blob_id, bs_set_parent_blob_open_cpl, ctx);
    7707             : }
    7708             : /* END spdk_bs_blob_set_parent */
    7709             : 
    7710             : /* START spdk_bs_blob_set_external_parent */
    7711             : 
    7712             : static void
    7713          16 : bs_set_external_parent_cleanup_finish(void *cb_arg, int bserrno)
    7714             : {
    7715          16 :         struct set_parent_ctx *ctx = cb_arg;
    7716             : 
    7717          16 :         if (bserrno != 0) {
    7718           0 :                 SPDK_ERRLOG("blob set external parent finish error %d\n", bserrno);
    7719           0 :                 if (ctx->bserrno == 0) {
    7720           0 :                         ctx->bserrno = bserrno;
    7721             :                 }
    7722             :         }
    7723             : 
    7724          16 :         ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
    7725             : 
    7726          16 :         free(ctx->parent.u.esnap.id);
    7727          16 :         free(ctx);
    7728          16 : }
    7729             : 
    7730             : static void
    7731           8 : bs_set_external_parent_close_blob(void *cb_arg, int bserrno)
    7732             : {
    7733           8 :         struct set_parent_ctx *ctx = cb_arg;
    7734           8 :         struct spdk_blob *blob = ctx->blob;
    7735             : 
    7736           8 :         if (bserrno != 0 && ctx->bserrno == 0) {
    7737           0 :                 SPDK_ERRLOG("error %d in metadata sync\n", bserrno);
    7738           0 :                 ctx->bserrno = bserrno;
    7739             :         }
    7740             : 
    7741             :         /* Revert md_ro to original state */
    7742           8 :         blob->md_ro = ctx->blob_md_ro;
    7743             : 
    7744           8 :         blob->locked_operation_in_progress = false;
    7745             : 
    7746           8 :         spdk_blob_close(blob, bs_set_external_parent_cleanup_finish, ctx);
    7747           8 : }
    7748             : 
    7749             : static void
    7750           8 : bs_set_external_parent_unfrozen(void *cb_arg, int bserrno)
    7751             : {
    7752           8 :         struct set_parent_ctx *ctx = cb_arg;
    7753           8 :         struct spdk_blob *blob = ctx->blob;
    7754             : 
    7755           8 :         if (bserrno != 0) {
    7756           0 :                 SPDK_ERRLOG("error %d setting back_bs_dev\n", bserrno);
    7757           0 :                 ctx->bserrno = bserrno;
    7758           0 :                 bs_set_external_parent_close_blob(ctx, bserrno);
    7759           0 :                 return;
    7760             :         }
    7761             : 
    7762           8 :         spdk_blob_sync_md(blob, bs_set_external_parent_close_blob, ctx);
    7763             : }
    7764             : 
    7765             : static int
    7766           8 : bs_set_external_parent_refs(struct spdk_blob *blob, struct blob_parent *parent)
    7767             : {
    7768             :         int rc;
    7769             : 
    7770           8 :         bs_blob_list_remove(blob);
    7771             : 
    7772           8 :         if (spdk_blob_is_clone(blob)) {
    7773             :                 /* Remove the xattr that references the snapshot */
    7774           0 :                 blob->parent_id = SPDK_BLOBID_INVALID;
    7775           0 :                 blob_remove_xattr(blob, BLOB_SNAPSHOT, true);
    7776             :         }
    7777             : 
    7778           8 :         rc = blob_set_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID, parent->u.esnap.id,
    7779           8 :                             parent->u.esnap.id_len, true);
    7780           8 :         if (rc != 0) {
    7781           0 :                 SPDK_ERRLOG("error %d setting external snapshot xattr\n", rc);
    7782           0 :                 return rc;
    7783             :         }
    7784           8 :         blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    7785           8 :         blob->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
    7786             : 
    7787           8 :         bs_blob_list_add(blob);
    7788             : 
    7789           8 :         return 0;
    7790             : }
    7791             : 
    7792             : static void
    7793          16 : bs_set_external_parent_blob_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
    7794             : {
    7795          16 :         struct set_parent_ctx *ctx = cb_arg;
    7796          16 :         const void *esnap_id;
    7797          16 :         size_t esnap_id_len;
    7798             :         int rc;
    7799             : 
    7800          16 :         if (bserrno != 0) {
    7801           0 :                 SPDK_ERRLOG("blob open error %d\n", bserrno);
    7802           0 :                 ctx->bserrno = bserrno;
    7803           0 :                 bs_set_parent_cleanup_finish(ctx, 0);
    7804           0 :                 return;
    7805             :         }
    7806             : 
    7807          16 :         ctx->blob = blob;
    7808          16 :         ctx->blob_md_ro = blob->md_ro;
    7809             : 
    7810          16 :         rc = spdk_blob_get_esnap_id(blob, &esnap_id, &esnap_id_len);
    7811          16 :         if (rc == 0 && esnap_id != NULL && esnap_id_len == ctx->parent.u.esnap.id_len &&
    7812           4 :             memcmp(esnap_id, ctx->parent.u.esnap.id, esnap_id_len) == 0) {
    7813           4 :                 SPDK_ERRLOG("external snapshot is already the parent of blob\n");
    7814           4 :                 ctx->bserrno = -EEXIST;
    7815           4 :                 goto error;
    7816             :         }
    7817             : 
    7818          12 :         if (!spdk_blob_is_thin_provisioned(blob)) {
    7819           4 :                 SPDK_ERRLOG("blob is not thin-provisioned\n");
    7820           4 :                 ctx->bserrno = -EINVAL;
    7821           4 :                 goto error;
    7822             :         }
    7823             : 
    7824           8 :         if (blob->locked_operation_in_progress) {
    7825           0 :                 SPDK_ERRLOG("cannot set external parent of blob, another operation in progress\n");
    7826           0 :                 ctx->bserrno = -EBUSY;
    7827           0 :                 goto error;
    7828             :         }
    7829             : 
    7830           8 :         blob->locked_operation_in_progress = true;
    7831             : 
    7832             :         /* Temporarily override md_ro flag for MD modification */
    7833           8 :         blob->md_ro = false;
    7834             : 
    7835           8 :         blob_set_back_bs_dev(blob, ctx->parent.u.esnap.back_bs_dev, bs_set_external_parent_refs,
    7836             :                              &ctx->parent, bs_set_external_parent_unfrozen, ctx);
    7837           8 :         return;
    7838             : 
    7839           8 : error:
    7840           8 :         spdk_blob_close(blob, bs_set_external_parent_cleanup_finish, ctx);
    7841             : }
    7842             : 
    7843             : void
    7844          24 : spdk_bs_blob_set_external_parent(struct spdk_blob_store *bs, spdk_blob_id blob_id,
    7845             :                                  struct spdk_bs_dev *esnap_bs_dev, const void *esnap_id,
    7846             :                                  uint32_t esnap_id_len, spdk_blob_op_complete cb_fn, void *cb_arg)
    7847             : {
    7848             :         struct set_parent_ctx *ctx;
    7849             :         uint64_t esnap_dev_size, cluster_sz;
    7850             : 
    7851          24 :         if (sizeof(blob_id) == esnap_id_len && memcmp(&blob_id, esnap_id, sizeof(blob_id)) == 0) {
    7852           4 :                 SPDK_ERRLOG("blob id and external snapshot id cannot be the same\n");
    7853           4 :                 cb_fn(cb_arg, -EINVAL);
    7854           4 :                 return;
    7855             :         }
    7856             : 
    7857          20 :         esnap_dev_size = esnap_bs_dev->blockcnt * esnap_bs_dev->blocklen;
    7858          20 :         cluster_sz = spdk_bs_get_cluster_size(bs);
    7859          20 :         if ((esnap_dev_size % cluster_sz) != 0) {
    7860           4 :                 SPDK_ERRLOG("Esnap device size %" PRIu64 " is not an integer multiple of "
    7861             :                             "cluster size %" PRIu64 "\n", esnap_dev_size, cluster_sz);
    7862           4 :                 cb_fn(cb_arg, -EINVAL);
    7863           4 :                 return;
    7864             :         }
    7865             : 
    7866          16 :         ctx = calloc(1, sizeof(*ctx));
    7867          16 :         if (!ctx) {
    7868           0 :                 cb_fn(cb_arg, -ENOMEM);
    7869           0 :                 return;
    7870             :         }
    7871             : 
    7872          16 :         ctx->parent.u.esnap.id = calloc(1, esnap_id_len);
    7873          16 :         if (!ctx->parent.u.esnap.id) {
    7874           0 :                 free(ctx);
    7875           0 :                 cb_fn(cb_arg, -ENOMEM);
    7876           0 :                 return;
    7877             :         }
    7878             : 
    7879          16 :         ctx->bs = bs;
    7880          16 :         ctx->parent.u.esnap.back_bs_dev = esnap_bs_dev;
    7881          16 :         memcpy(ctx->parent.u.esnap.id, esnap_id, esnap_id_len);
    7882          16 :         ctx->parent.u.esnap.id_len = esnap_id_len;
    7883          16 :         ctx->cb_fn = cb_fn;
    7884          16 :         ctx->cb_arg = cb_arg;
    7885          16 :         ctx->bserrno = 0;
    7886             : 
    7887          16 :         spdk_bs_open_blob(bs, blob_id, bs_set_external_parent_blob_open_cpl, ctx);
    7888             : }
    7889             : /* END spdk_bs_blob_set_external_parent */
    7890             : 
    7891             : /* START spdk_blob_resize */
    7892             : struct spdk_bs_resize_ctx {
    7893             :         spdk_blob_op_complete cb_fn;
    7894             :         void *cb_arg;
    7895             :         struct spdk_blob *blob;
    7896             :         uint64_t sz;
    7897             :         int rc;
    7898             : };
    7899             : 
    7900             : static void
    7901         202 : bs_resize_unfreeze_cpl(void *cb_arg, int rc)
    7902             : {
    7903         202 :         struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
    7904             : 
    7905         202 :         if (rc != 0) {
    7906           0 :                 SPDK_ERRLOG("Unfreeze failed, rc=%d\n", rc);
    7907             :         }
    7908             : 
    7909         202 :         if (ctx->rc != 0) {
    7910           4 :                 SPDK_ERRLOG("Unfreeze failed, ctx->rc=%d\n", ctx->rc);
    7911           4 :                 rc = ctx->rc;
    7912             :         }
    7913             : 
    7914         202 :         ctx->blob->locked_operation_in_progress = false;
    7915             : 
    7916         202 :         ctx->cb_fn(ctx->cb_arg, rc);
    7917         202 :         free(ctx);
    7918         202 : }
    7919             : 
    7920             : static void
    7921         202 : bs_resize_freeze_cpl(void *cb_arg, int rc)
    7922             : {
    7923         202 :         struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
    7924             : 
    7925         202 :         if (rc != 0) {
    7926           0 :                 ctx->blob->locked_operation_in_progress = false;
    7927           0 :                 ctx->cb_fn(ctx->cb_arg, rc);
    7928           0 :                 free(ctx);
    7929           0 :                 return;
    7930             :         }
    7931             : 
    7932         202 :         ctx->rc = blob_resize(ctx->blob, ctx->sz);
    7933             : 
    7934         202 :         blob_unfreeze_io(ctx->blob, bs_resize_unfreeze_cpl, ctx);
    7935             : }
    7936             : 
    7937             : void
    7938         216 : spdk_blob_resize(struct spdk_blob *blob, uint64_t sz, spdk_blob_op_complete cb_fn, void *cb_arg)
    7939             : {
    7940             :         struct spdk_bs_resize_ctx *ctx;
    7941             : 
    7942         216 :         blob_verify_md_op(blob);
    7943             : 
    7944         216 :         SPDK_DEBUGLOG(blob, "Resizing blob 0x%" PRIx64 " to %" PRIu64 " clusters\n", blob->id, sz);
    7945             : 
    7946         216 :         if (blob->md_ro) {
    7947           4 :                 cb_fn(cb_arg, -EPERM);
    7948           4 :                 return;
    7949             :         }
    7950             : 
    7951         212 :         if (sz == blob->active.num_clusters) {
    7952          10 :                 cb_fn(cb_arg, 0);
    7953          10 :                 return;
    7954             :         }
    7955             : 
    7956         202 :         if (blob->locked_operation_in_progress) {
    7957           0 :                 cb_fn(cb_arg, -EBUSY);
    7958           0 :                 return;
    7959             :         }
    7960             : 
    7961         202 :         ctx = calloc(1, sizeof(*ctx));
    7962         202 :         if (!ctx) {
    7963           0 :                 cb_fn(cb_arg, -ENOMEM);
    7964           0 :                 return;
    7965             :         }
    7966             : 
    7967         202 :         blob->locked_operation_in_progress = true;
    7968         202 :         ctx->cb_fn = cb_fn;
    7969         202 :         ctx->cb_arg = cb_arg;
    7970         202 :         ctx->blob = blob;
    7971         202 :         ctx->sz = sz;
    7972         202 :         blob_freeze_io(blob, bs_resize_freeze_cpl, ctx);
    7973             : }
    7974             : 
    7975             : /* END spdk_blob_resize */
    7976             : 
    7977             : 
    7978             : /* START spdk_bs_delete_blob */
    7979             : 
    7980             : static void
    7981        1492 : bs_delete_close_cpl(void *cb_arg, int bserrno)
    7982             : {
    7983        1492 :         spdk_bs_sequence_t *seq = cb_arg;
    7984             : 
    7985        1492 :         bs_sequence_finish(seq, bserrno);
    7986        1492 : }
    7987             : 
    7988             : static void
    7989        1492 : bs_delete_persist_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    7990             : {
    7991        1492 :         struct spdk_blob *blob = cb_arg;
    7992             : 
    7993        1492 :         if (bserrno != 0) {
    7994             :                 /*
    7995             :                  * We already removed this blob from the blobstore tailq, so
    7996             :                  *  we need to free it here since this is the last reference
    7997             :                  *  to it.
    7998             :                  */
    7999           0 :                 blob_free(blob);
    8000           0 :                 bs_delete_close_cpl(seq, bserrno);
    8001           0 :                 return;
    8002             :         }
    8003             : 
    8004             :         /*
    8005             :          * This will immediately decrement the ref_count and call
    8006             :          *  the completion routine since the metadata state is clean.
    8007             :          *  By calling spdk_blob_close, we reduce the number of call
    8008             :          *  points into code that touches the blob->open_ref count
    8009             :          *  and the blobstore's blob list.
    8010             :          */
    8011        1492 :         spdk_blob_close(blob, bs_delete_close_cpl, seq);
    8012             : }
    8013             : 
    8014             : struct delete_snapshot_ctx {
    8015             :         struct spdk_blob_list *parent_snapshot_entry;
    8016             :         struct spdk_blob *snapshot;
    8017             :         struct spdk_blob_md_page *page;
    8018             :         bool snapshot_md_ro;
    8019             :         struct spdk_blob *clone;
    8020             :         bool clone_md_ro;
    8021             :         spdk_blob_op_with_handle_complete cb_fn;
    8022             :         void *cb_arg;
    8023             :         int bserrno;
    8024             :         uint32_t next_extent_page;
    8025             : };
    8026             : 
    8027             : static void
    8028         110 : delete_blob_cleanup_finish(void *cb_arg, int bserrno)
    8029             : {
    8030         110 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8031             : 
    8032         110 :         if (bserrno != 0) {
    8033           0 :                 SPDK_ERRLOG("Snapshot cleanup error %d\n", bserrno);
    8034             :         }
    8035             : 
    8036         110 :         assert(ctx != NULL);
    8037             : 
    8038         110 :         if (bserrno != 0 && ctx->bserrno == 0) {
    8039           0 :                 ctx->bserrno = bserrno;
    8040             :         }
    8041             : 
    8042         110 :         ctx->cb_fn(ctx->cb_arg, ctx->snapshot, ctx->bserrno);
    8043         110 :         spdk_free(ctx->page);
    8044         110 :         free(ctx);
    8045         110 : }
    8046             : 
    8047             : static void
    8048          22 : delete_snapshot_cleanup_snapshot(void *cb_arg, int bserrno)
    8049             : {
    8050          22 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8051             : 
    8052          22 :         if (bserrno != 0) {
    8053           0 :                 ctx->bserrno = bserrno;
    8054           0 :                 SPDK_ERRLOG("Clone cleanup error %d\n", bserrno);
    8055             :         }
    8056             : 
    8057          22 :         if (ctx->bserrno != 0) {
    8058          22 :                 assert(blob_lookup(ctx->snapshot->bs, ctx->snapshot->id) == NULL);
    8059          22 :                 RB_INSERT(spdk_blob_tree, &ctx->snapshot->bs->open_blobs, ctx->snapshot);
    8060          22 :                 spdk_bit_array_set(ctx->snapshot->bs->open_blobids, ctx->snapshot->id);
    8061             :         }
    8062             : 
    8063          22 :         ctx->snapshot->locked_operation_in_progress = false;
    8064          22 :         ctx->snapshot->md_ro = ctx->snapshot_md_ro;
    8065             : 
    8066          22 :         spdk_blob_close(ctx->snapshot, delete_blob_cleanup_finish, ctx);
    8067          22 : }
    8068             : 
    8069             : static void
    8070          12 : delete_snapshot_cleanup_clone(void *cb_arg, int bserrno)
    8071             : {
    8072          12 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8073             : 
    8074          12 :         ctx->clone->locked_operation_in_progress = false;
    8075          12 :         ctx->clone->md_ro = ctx->clone_md_ro;
    8076             : 
    8077          12 :         spdk_blob_close(ctx->clone, delete_snapshot_cleanup_snapshot, ctx);
    8078          12 : }
    8079             : 
    8080             : static void
    8081          48 : delete_snapshot_unfreeze_cpl(void *cb_arg, int bserrno)
    8082             : {
    8083          48 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8084             : 
    8085          48 :         if (bserrno) {
    8086           0 :                 ctx->bserrno = bserrno;
    8087           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8088           0 :                 return;
    8089             :         }
    8090             : 
    8091          48 :         ctx->clone->locked_operation_in_progress = false;
    8092          48 :         spdk_blob_close(ctx->clone, delete_blob_cleanup_finish, ctx);
    8093             : }
    8094             : 
    8095             : static void
    8096          52 : delete_snapshot_sync_snapshot_cpl(void *cb_arg, int bserrno)
    8097             : {
    8098          52 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8099          52 :         struct spdk_blob_list *parent_snapshot_entry = NULL;
    8100          52 :         struct spdk_blob_list *snapshot_entry = NULL;
    8101          52 :         struct spdk_blob_list *clone_entry = NULL;
    8102          52 :         struct spdk_blob_list *snapshot_clone_entry = NULL;
    8103             : 
    8104          52 :         if (bserrno) {
    8105           4 :                 SPDK_ERRLOG("Failed to sync MD on blob\n");
    8106           4 :                 ctx->bserrno = bserrno;
    8107           4 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8108           4 :                 return;
    8109             :         }
    8110             : 
    8111             :         /* Get snapshot entry for the snapshot we want to remove */
    8112          48 :         snapshot_entry = bs_get_snapshot_entry(ctx->snapshot->bs, ctx->snapshot->id);
    8113             : 
    8114          48 :         assert(snapshot_entry != NULL);
    8115             : 
    8116             :         /* Remove clone entry in this snapshot (at this point there can be only one clone) */
    8117          48 :         clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    8118          48 :         assert(clone_entry != NULL);
    8119          48 :         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    8120          48 :         snapshot_entry->clone_count--;
    8121          48 :         assert(TAILQ_EMPTY(&snapshot_entry->clones));
    8122             : 
    8123          48 :         switch (ctx->snapshot->parent_id) {
    8124          40 :         case SPDK_BLOBID_INVALID:
    8125             :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    8126             :                 /* No parent snapshot - just remove clone entry */
    8127          40 :                 free(clone_entry);
    8128          40 :                 break;
    8129           8 :         default:
    8130             :                 /* This snapshot is at the same time a clone of another snapshot - we need to
    8131             :                  * update parent snapshot (remove current clone, add new one inherited from
    8132             :                  * the snapshot that is being removed) */
    8133             : 
    8134             :                 /* Get snapshot entry for parent snapshot and clone entry within that snapshot for
    8135             :                  * snapshot that we are removing */
    8136           8 :                 blob_get_snapshot_and_clone_entries(ctx->snapshot, &parent_snapshot_entry,
    8137             :                                                     &snapshot_clone_entry);
    8138             : 
    8139             :                 /* Switch clone entry in parent snapshot */
    8140           8 :                 TAILQ_INSERT_TAIL(&parent_snapshot_entry->clones, clone_entry, link);
    8141           8 :                 TAILQ_REMOVE(&parent_snapshot_entry->clones, snapshot_clone_entry, link);
    8142           8 :                 free(snapshot_clone_entry);
    8143             :         }
    8144             : 
    8145             :         /* Restore md_ro flags */
    8146          48 :         ctx->clone->md_ro = ctx->clone_md_ro;
    8147          48 :         ctx->snapshot->md_ro = ctx->snapshot_md_ro;
    8148             : 
    8149          48 :         blob_unfreeze_io(ctx->clone, delete_snapshot_unfreeze_cpl, ctx);
    8150             : }
    8151             : 
    8152             : static void
    8153          56 : delete_snapshot_sync_clone_cpl(void *cb_arg, int bserrno)
    8154             : {
    8155          56 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8156             :         uint64_t i;
    8157             : 
    8158          56 :         ctx->snapshot->md_ro = false;
    8159             : 
    8160          56 :         if (bserrno) {
    8161           4 :                 SPDK_ERRLOG("Failed to sync MD on clone\n");
    8162           4 :                 ctx->bserrno = bserrno;
    8163             : 
    8164             :                 /* Restore snapshot to previous state */
    8165           4 :                 bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
    8166           4 :                 if (bserrno != 0) {
    8167           0 :                         delete_snapshot_cleanup_clone(ctx, bserrno);
    8168           0 :                         return;
    8169             :                 }
    8170             : 
    8171           4 :                 spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
    8172           4 :                 return;
    8173             :         }
    8174             : 
    8175             :         /* Clear cluster map entries for snapshot */
    8176         552 :         for (i = 0; i < ctx->snapshot->active.num_clusters && i < ctx->clone->active.num_clusters; i++) {
    8177         500 :                 if (ctx->clone->active.clusters[i] == ctx->snapshot->active.clusters[i]) {
    8178         492 :                         if (ctx->snapshot->active.clusters[i] != 0) {
    8179         328 :                                 ctx->snapshot->active.num_allocated_clusters--;
    8180             :                         }
    8181         492 :                         ctx->snapshot->active.clusters[i] = 0;
    8182             :                 }
    8183             :         }
    8184          78 :         for (i = 0; i < ctx->snapshot->active.num_extent_pages &&
    8185          52 :              i < ctx->clone->active.num_extent_pages; i++) {
    8186          26 :                 if (ctx->clone->active.extent_pages[i] == ctx->snapshot->active.extent_pages[i]) {
    8187          24 :                         ctx->snapshot->active.extent_pages[i] = 0;
    8188             :                 }
    8189             :         }
    8190             : 
    8191          52 :         blob_set_thin_provision(ctx->snapshot);
    8192          52 :         ctx->snapshot->state = SPDK_BLOB_STATE_DIRTY;
    8193             : 
    8194          52 :         if (ctx->parent_snapshot_entry != NULL) {
    8195           8 :                 ctx->snapshot->back_bs_dev = NULL;
    8196             :         }
    8197             : 
    8198          52 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_cpl, ctx);
    8199             : }
    8200             : 
    8201             : static void
    8202          56 : delete_snapshot_update_extent_pages_cpl(struct delete_snapshot_ctx *ctx)
    8203             : {
    8204             :         int bserrno;
    8205             : 
    8206             :         /* Delete old backing bs_dev from clone (related to snapshot that will be removed) */
    8207          56 :         blob_back_bs_destroy(ctx->clone);
    8208             : 
    8209             :         /* Set/remove snapshot xattr and switch parent ID and backing bs_dev on clone... */
    8210          56 :         if (ctx->snapshot->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    8211           8 :                 bserrno = bs_snapshot_copy_xattr(ctx->clone, ctx->snapshot,
    8212             :                                                  BLOB_EXTERNAL_SNAPSHOT_ID);
    8213           8 :                 if (bserrno != 0) {
    8214           0 :                         ctx->bserrno = bserrno;
    8215             : 
    8216             :                         /* Restore snapshot to previous state */
    8217           0 :                         bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
    8218           0 :                         if (bserrno != 0) {
    8219           0 :                                 delete_snapshot_cleanup_clone(ctx, bserrno);
    8220           0 :                                 return;
    8221             :                         }
    8222             : 
    8223           0 :                         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
    8224           0 :                         return;
    8225             :                 }
    8226           8 :                 ctx->clone->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
    8227           8 :                 ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
    8228             :                 /* Do not delete the external snapshot along with this snapshot */
    8229           8 :                 ctx->snapshot->back_bs_dev = NULL;
    8230           8 :                 ctx->clone->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    8231          48 :         } else if (ctx->parent_snapshot_entry != NULL) {
    8232             :                 /* ...to parent snapshot */
    8233           8 :                 ctx->clone->parent_id = ctx->parent_snapshot_entry->id;
    8234           8 :                 ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
    8235           8 :                 blob_set_xattr(ctx->clone, BLOB_SNAPSHOT, &ctx->parent_snapshot_entry->id,
    8236             :                                sizeof(spdk_blob_id),
    8237             :                                true);
    8238             :         } else {
    8239             :                 /* ...to blobid invalid and zeroes dev */
    8240          40 :                 ctx->clone->parent_id = SPDK_BLOBID_INVALID;
    8241          40 :                 ctx->clone->back_bs_dev = bs_create_zeroes_dev();
    8242          40 :                 blob_remove_xattr(ctx->clone, BLOB_SNAPSHOT, true);
    8243             :         }
    8244             : 
    8245          56 :         spdk_blob_sync_md(ctx->clone, delete_snapshot_sync_clone_cpl, ctx);
    8246             : }
    8247             : 
    8248             : static void
    8249          58 : delete_snapshot_update_extent_pages(void *cb_arg, int bserrno)
    8250             : {
    8251          58 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8252             :         uint32_t *extent_page;
    8253             :         uint64_t i;
    8254             : 
    8255          84 :         for (i = ctx->next_extent_page; i < ctx->snapshot->active.num_extent_pages &&
    8256          54 :              i < ctx->clone->active.num_extent_pages; i++) {
    8257          28 :                 if (ctx->snapshot->active.extent_pages[i] == 0) {
    8258             :                         /* No extent page to use from snapshot */
    8259           8 :                         continue;
    8260             :                 }
    8261             : 
    8262          20 :                 extent_page = &ctx->clone->active.extent_pages[i];
    8263          20 :                 if (*extent_page == 0) {
    8264             :                         /* Copy extent page from snapshot when clone did not have a matching one */
    8265          18 :                         *extent_page = ctx->snapshot->active.extent_pages[i];
    8266          18 :                         continue;
    8267             :                 }
    8268             : 
    8269             :                 /* Clone and snapshot both contain partially filled matching extent pages.
    8270             :                  * Update the clone extent page in place with cluster map containing the mix of both. */
    8271           2 :                 ctx->next_extent_page = i + 1;
    8272           2 :                 memset(ctx->page, 0, SPDK_BS_PAGE_SIZE);
    8273             : 
    8274           2 :                 blob_write_extent_page(ctx->clone, *extent_page, i * SPDK_EXTENTS_PER_EP, ctx->page,
    8275             :                                        delete_snapshot_update_extent_pages, ctx);
    8276           2 :                 return;
    8277             :         }
    8278          56 :         delete_snapshot_update_extent_pages_cpl(ctx);
    8279             : }
    8280             : 
    8281             : static void
    8282          60 : delete_snapshot_sync_snapshot_xattr_cpl(void *cb_arg, int bserrno)
    8283             : {
    8284          60 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8285             :         uint64_t i;
    8286             : 
    8287             :         /* Temporarily override md_ro flag for clone for MD modification */
    8288          60 :         ctx->clone_md_ro = ctx->clone->md_ro;
    8289          60 :         ctx->clone->md_ro = false;
    8290             : 
    8291          60 :         if (bserrno) {
    8292           4 :                 SPDK_ERRLOG("Failed to sync MD with xattr on blob\n");
    8293           4 :                 ctx->bserrno = bserrno;
    8294           4 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8295           4 :                 return;
    8296             :         }
    8297             : 
    8298             :         /* Copy snapshot map to clone map (only unallocated clusters in clone) */
    8299         596 :         for (i = 0; i < ctx->snapshot->active.num_clusters && i < ctx->clone->active.num_clusters; i++) {
    8300         540 :                 if (ctx->clone->active.clusters[i] == 0) {
    8301         532 :                         ctx->clone->active.clusters[i] = ctx->snapshot->active.clusters[i];
    8302         532 :                         if (ctx->clone->active.clusters[i] != 0) {
    8303         368 :                                 ctx->clone->active.num_allocated_clusters++;
    8304             :                         }
    8305             :                 }
    8306             :         }
    8307          56 :         ctx->next_extent_page = 0;
    8308          56 :         delete_snapshot_update_extent_pages(ctx, 0);
    8309             : }
    8310             : 
    8311             : static void
    8312           8 : delete_snapshot_esnap_channels_destroyed_cb(void *cb_arg, struct spdk_blob *blob, int bserrno)
    8313             : {
    8314           8 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8315             : 
    8316           8 :         if (bserrno != 0) {
    8317           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to destroy esnap channels: %d\n",
    8318             :                             blob->id, bserrno);
    8319             :                 /* That error should not stop us from syncing metadata. */
    8320             :         }
    8321             : 
    8322           8 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
    8323           8 : }
    8324             : 
    8325             : static void
    8326          60 : delete_snapshot_freeze_io_cb(void *cb_arg, int bserrno)
    8327             : {
    8328          60 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8329             : 
    8330          60 :         if (bserrno) {
    8331           0 :                 SPDK_ERRLOG("Failed to freeze I/O on clone\n");
    8332           0 :                 ctx->bserrno = bserrno;
    8333           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8334           0 :                 return;
    8335             :         }
    8336             : 
    8337             :         /* Temporarily override md_ro flag for snapshot for MD modification */
    8338          60 :         ctx->snapshot_md_ro = ctx->snapshot->md_ro;
    8339          60 :         ctx->snapshot->md_ro = false;
    8340             : 
    8341             :         /* Mark blob as pending for removal for power failure safety, use clone id for recovery */
    8342          60 :         ctx->bserrno = blob_set_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, &ctx->clone->id,
    8343             :                                       sizeof(spdk_blob_id), true);
    8344          60 :         if (ctx->bserrno != 0) {
    8345           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8346           0 :                 return;
    8347             :         }
    8348             : 
    8349          60 :         if (blob_is_esnap_clone(ctx->snapshot)) {
    8350           8 :                 blob_esnap_destroy_bs_dev_channels(ctx->snapshot, false,
    8351             :                                                    delete_snapshot_esnap_channels_destroyed_cb,
    8352             :                                                    ctx);
    8353           8 :                 return;
    8354             :         }
    8355             : 
    8356          52 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
    8357             : }
    8358             : 
    8359             : static void
    8360          70 : delete_snapshot_open_clone_cb(void *cb_arg, struct spdk_blob *clone, int bserrno)
    8361             : {
    8362          70 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8363             : 
    8364          70 :         if (bserrno) {
    8365          10 :                 SPDK_ERRLOG("Failed to open clone\n");
    8366          10 :                 ctx->bserrno = bserrno;
    8367          10 :                 delete_snapshot_cleanup_snapshot(ctx, 0);
    8368          10 :                 return;
    8369             :         }
    8370             : 
    8371          60 :         ctx->clone = clone;
    8372             : 
    8373          60 :         if (clone->locked_operation_in_progress) {
    8374           0 :                 SPDK_DEBUGLOG(blob, "Cannot remove blob - another operation in progress on its clone\n");
    8375           0 :                 ctx->bserrno = -EBUSY;
    8376           0 :                 spdk_blob_close(ctx->clone, delete_snapshot_cleanup_snapshot, ctx);
    8377           0 :                 return;
    8378             :         }
    8379             : 
    8380          60 :         clone->locked_operation_in_progress = true;
    8381             : 
    8382          60 :         blob_freeze_io(clone, delete_snapshot_freeze_io_cb, ctx);
    8383             : }
    8384             : 
    8385             : static void
    8386          70 : update_clone_on_snapshot_deletion(struct spdk_blob *snapshot, struct delete_snapshot_ctx *ctx)
    8387             : {
    8388          70 :         struct spdk_blob_list *snapshot_entry = NULL;
    8389          70 :         struct spdk_blob_list *clone_entry = NULL;
    8390          70 :         struct spdk_blob_list *snapshot_clone_entry = NULL;
    8391             : 
    8392             :         /* Get snapshot entry for the snapshot we want to remove */
    8393          70 :         snapshot_entry = bs_get_snapshot_entry(snapshot->bs, snapshot->id);
    8394             : 
    8395          70 :         assert(snapshot_entry != NULL);
    8396             : 
    8397             :         /* Get clone of the snapshot (at this point there can be only one clone) */
    8398          70 :         clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    8399          70 :         assert(snapshot_entry->clone_count == 1);
    8400          70 :         assert(clone_entry != NULL);
    8401             : 
    8402             :         /* Get snapshot entry for parent snapshot and clone entry within that snapshot for
    8403             :          * snapshot that we are removing */
    8404          70 :         blob_get_snapshot_and_clone_entries(snapshot, &ctx->parent_snapshot_entry,
    8405             :                                             &snapshot_clone_entry);
    8406             : 
    8407          70 :         spdk_bs_open_blob(snapshot->bs, clone_entry->id, delete_snapshot_open_clone_cb, ctx);
    8408          70 : }
    8409             : 
    8410             : static void
    8411        1554 : bs_delete_blob_finish(void *cb_arg, struct spdk_blob *blob, int bserrno)
    8412             : {
    8413        1554 :         spdk_bs_sequence_t *seq = cb_arg;
    8414        1554 :         struct spdk_blob_list *snapshot_entry = NULL;
    8415             :         uint32_t page_num;
    8416             : 
    8417        1554 :         if (bserrno) {
    8418          62 :                 SPDK_ERRLOG("Failed to remove blob\n");
    8419          62 :                 bs_sequence_finish(seq, bserrno);
    8420          62 :                 return;
    8421             :         }
    8422             : 
    8423             :         /* Remove snapshot from the list */
    8424        1492 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    8425        1492 :         if (snapshot_entry != NULL) {
    8426         144 :                 TAILQ_REMOVE(&blob->bs->snapshots, snapshot_entry, link);
    8427         144 :                 free(snapshot_entry);
    8428             :         }
    8429             : 
    8430        1492 :         page_num = bs_blobid_to_page(blob->id);
    8431        1492 :         spdk_bit_array_clear(blob->bs->used_blobids, page_num);
    8432        1492 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    8433        1492 :         blob->active.num_pages = 0;
    8434        1492 :         blob_resize(blob, 0);
    8435             : 
    8436        1492 :         blob_persist(seq, blob, bs_delete_persist_cpl, blob);
    8437             : }
    8438             : 
    8439             : static int
    8440        1554 : bs_is_blob_deletable(struct spdk_blob *blob, bool *update_clone)
    8441             : {
    8442        1554 :         struct spdk_blob_list *snapshot_entry = NULL;
    8443        1554 :         struct spdk_blob_list *clone_entry = NULL;
    8444        1554 :         struct spdk_blob *clone = NULL;
    8445        1554 :         bool has_one_clone = false;
    8446             : 
    8447             :         /* Check if this is a snapshot with clones */
    8448        1554 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    8449        1554 :         if (snapshot_entry != NULL) {
    8450         194 :                 if (snapshot_entry->clone_count > 1) {
    8451          24 :                         SPDK_ERRLOG("Cannot remove snapshot with more than one clone\n");
    8452          24 :                         return -EBUSY;
    8453         170 :                 } else if (snapshot_entry->clone_count == 1) {
    8454          70 :                         has_one_clone = true;
    8455             :                 }
    8456             :         }
    8457             : 
    8458             :         /* Check if someone has this blob open (besides this delete context):
    8459             :          * - open_ref = 1 - only this context opened blob, so it is ok to remove it
    8460             :          * - open_ref <= 2 && has_one_clone = true - clone is holding snapshot
    8461             :          *      and that is ok, because we will update it accordingly */
    8462        1530 :         if (blob->open_ref <= 2 && has_one_clone) {
    8463          70 :                 clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    8464          70 :                 assert(clone_entry != NULL);
    8465          70 :                 clone = blob_lookup(blob->bs, clone_entry->id);
    8466             : 
    8467          70 :                 if (blob->open_ref == 2 && clone == NULL) {
    8468             :                         /* Clone is closed and someone else opened this blob */
    8469           0 :                         SPDK_ERRLOG("Cannot remove snapshot because it is open\n");
    8470           0 :                         return -EBUSY;
    8471             :                 }
    8472             : 
    8473          70 :                 *update_clone = true;
    8474          70 :                 return 0;
    8475             :         }
    8476             : 
    8477        1460 :         if (blob->open_ref > 1) {
    8478          16 :                 SPDK_ERRLOG("Cannot remove snapshot because it is open\n");
    8479          16 :                 return -EBUSY;
    8480             :         }
    8481             : 
    8482        1444 :         assert(has_one_clone == false);
    8483        1444 :         *update_clone = false;
    8484        1444 :         return 0;
    8485             : }
    8486             : 
    8487             : static void
    8488           0 : bs_delete_enomem_close_cpl(void *cb_arg, int bserrno)
    8489             : {
    8490           0 :         spdk_bs_sequence_t *seq = cb_arg;
    8491             : 
    8492           0 :         bs_sequence_finish(seq, -ENOMEM);
    8493           0 : }
    8494             : 
    8495             : static void
    8496        1564 : bs_delete_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
    8497             : {
    8498        1564 :         spdk_bs_sequence_t *seq = cb_arg;
    8499             :         struct delete_snapshot_ctx *ctx;
    8500        1564 :         bool update_clone = false;
    8501             : 
    8502        1564 :         if (bserrno != 0) {
    8503          10 :                 bs_sequence_finish(seq, bserrno);
    8504          10 :                 return;
    8505             :         }
    8506             : 
    8507        1554 :         blob_verify_md_op(blob);
    8508             : 
    8509        1554 :         ctx = calloc(1, sizeof(*ctx));
    8510        1554 :         if (ctx == NULL) {
    8511           0 :                 spdk_blob_close(blob, bs_delete_enomem_close_cpl, seq);
    8512           0 :                 return;
    8513             :         }
    8514             : 
    8515        1554 :         ctx->snapshot = blob;
    8516        1554 :         ctx->cb_fn = bs_delete_blob_finish;
    8517        1554 :         ctx->cb_arg = seq;
    8518             : 
    8519             :         /* Check if blob can be removed and if it is a snapshot with clone on top of it */
    8520        1554 :         ctx->bserrno = bs_is_blob_deletable(blob, &update_clone);
    8521        1554 :         if (ctx->bserrno) {
    8522          40 :                 spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    8523          40 :                 return;
    8524             :         }
    8525             : 
    8526        1514 :         if (blob->locked_operation_in_progress) {
    8527           0 :                 SPDK_DEBUGLOG(blob, "Cannot remove blob - another operation in progress\n");
    8528           0 :                 ctx->bserrno = -EBUSY;
    8529           0 :                 spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    8530           0 :                 return;
    8531             :         }
    8532             : 
    8533        1514 :         blob->locked_operation_in_progress = true;
    8534             : 
    8535             :         /*
    8536             :          * Remove the blob from the blob_store list now, to ensure it does not
    8537             :          *  get returned after this point by blob_lookup().
    8538             :          */
    8539        1514 :         spdk_bit_array_clear(blob->bs->open_blobids, blob->id);
    8540        1514 :         RB_REMOVE(spdk_blob_tree, &blob->bs->open_blobs, blob);
    8541             : 
    8542        1514 :         if (update_clone) {
    8543          70 :                 ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    8544          70 :                 if (!ctx->page) {
    8545           0 :                         ctx->bserrno = -ENOMEM;
    8546           0 :                         spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    8547           0 :                         return;
    8548             :                 }
    8549             :                 /* This blob is a snapshot with active clone - update clone first */
    8550          70 :                 update_clone_on_snapshot_deletion(blob, ctx);
    8551             :         } else {
    8552             :                 /* This blob does not have any clones - just remove it */
    8553        1444 :                 bs_blob_list_remove(blob);
    8554        1444 :                 bs_delete_blob_finish(seq, blob, 0);
    8555        1444 :                 free(ctx);
    8556             :         }
    8557             : }
    8558             : 
    8559             : void
    8560        1564 : spdk_bs_delete_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
    8561             :                     spdk_blob_op_complete cb_fn, void *cb_arg)
    8562             : {
    8563        1564 :         struct spdk_bs_cpl      cpl;
    8564             :         spdk_bs_sequence_t      *seq;
    8565             : 
    8566        1564 :         SPDK_DEBUGLOG(blob, "Deleting blob 0x%" PRIx64 "\n", blobid);
    8567             : 
    8568        1564 :         assert(spdk_get_thread() == bs->md_thread);
    8569             : 
    8570        1564 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    8571        1564 :         cpl.u.blob_basic.cb_fn = cb_fn;
    8572        1564 :         cpl.u.blob_basic.cb_arg = cb_arg;
    8573             : 
    8574        1564 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    8575        1564 :         if (!seq) {
    8576           0 :                 cb_fn(cb_arg, -ENOMEM);
    8577           0 :                 return;
    8578             :         }
    8579             : 
    8580        1564 :         spdk_bs_open_blob(bs, blobid, bs_delete_open_cpl, seq);
    8581             : }
    8582             : 
    8583             : /* END spdk_bs_delete_blob */
    8584             : 
    8585             : /* START spdk_bs_open_blob */
    8586             : 
    8587             : static void
    8588        3474 : bs_open_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8589             : {
    8590        3474 :         struct spdk_blob *blob = cb_arg;
    8591             :         struct spdk_blob *existing;
    8592             : 
    8593        3474 :         if (bserrno != 0) {
    8594          64 :                 blob_free(blob);
    8595          64 :                 seq->cpl.u.blob_handle.blob = NULL;
    8596          64 :                 bs_sequence_finish(seq, bserrno);
    8597          64 :                 return;
    8598             :         }
    8599             : 
    8600        3410 :         existing = blob_lookup(blob->bs, blob->id);
    8601        3410 :         if (existing) {
    8602           4 :                 blob_free(blob);
    8603           4 :                 existing->open_ref++;
    8604           4 :                 seq->cpl.u.blob_handle.blob = existing;
    8605           4 :                 bs_sequence_finish(seq, 0);
    8606           4 :                 return;
    8607             :         }
    8608             : 
    8609        3406 :         blob->open_ref++;
    8610             : 
    8611        3406 :         spdk_bit_array_set(blob->bs->open_blobids, blob->id);
    8612        3406 :         RB_INSERT(spdk_blob_tree, &blob->bs->open_blobs, blob);
    8613             : 
    8614        3406 :         bs_sequence_finish(seq, bserrno);
    8615             : }
    8616             : 
    8617             : static inline void
    8618           4 : blob_open_opts_copy(const struct spdk_blob_open_opts *src, struct spdk_blob_open_opts *dst)
    8619             : {
    8620             : #define FIELD_OK(field) \
    8621             :         offsetof(struct spdk_blob_open_opts, field) + sizeof(src->field) <= src->opts_size
    8622             : 
    8623             : #define SET_FIELD(field) \
    8624             :         if (FIELD_OK(field)) { \
    8625             :                 dst->field = src->field; \
    8626             :         } \
    8627             : 
    8628           4 :         SET_FIELD(clear_method);
    8629           4 :         SET_FIELD(esnap_ctx);
    8630             : 
    8631           4 :         dst->opts_size = src->opts_size;
    8632             : 
    8633             :         /* You should not remove this statement, but need to update the assert statement
    8634             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    8635             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_open_opts) == 24, "Incorrect size");
    8636             : 
    8637             : #undef FIELD_OK
    8638             : #undef SET_FIELD
    8639           4 : }
    8640             : 
    8641             : static void
    8642        4279 : bs_open_blob(struct spdk_blob_store *bs,
    8643             :              spdk_blob_id blobid,
    8644             :              struct spdk_blob_open_opts *opts,
    8645             :              spdk_blob_op_with_handle_complete cb_fn,
    8646             :              void *cb_arg)
    8647             : {
    8648             :         struct spdk_blob                *blob;
    8649        4279 :         struct spdk_bs_cpl              cpl;
    8650        4279 :         struct spdk_blob_open_opts      opts_local;
    8651             :         spdk_bs_sequence_t              *seq;
    8652             :         uint32_t                        page_num;
    8653             : 
    8654        4279 :         SPDK_DEBUGLOG(blob, "Opening blob 0x%" PRIx64 "\n", blobid);
    8655        4279 :         assert(spdk_get_thread() == bs->md_thread);
    8656             : 
    8657        4279 :         page_num = bs_blobid_to_page(blobid);
    8658        4279 :         if (spdk_bit_array_get(bs->used_blobids, page_num) == false) {
    8659             :                 /* Invalid blobid */
    8660          48 :                 cb_fn(cb_arg, NULL, -ENOENT);
    8661          48 :                 return;
    8662             :         }
    8663             : 
    8664        4231 :         blob = blob_lookup(bs, blobid);
    8665        4231 :         if (blob) {
    8666         757 :                 blob->open_ref++;
    8667         757 :                 cb_fn(cb_arg, blob, 0);
    8668         757 :                 return;
    8669             :         }
    8670             : 
    8671        3474 :         blob = blob_alloc(bs, blobid);
    8672        3474 :         if (!blob) {
    8673           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    8674           0 :                 return;
    8675             :         }
    8676             : 
    8677        3474 :         spdk_blob_open_opts_init(&opts_local, sizeof(opts_local));
    8678        3474 :         if (opts) {
    8679           4 :                 blob_open_opts_copy(opts, &opts_local);
    8680             :         }
    8681             : 
    8682        3474 :         blob->clear_method = opts_local.clear_method;
    8683             : 
    8684        3474 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_HANDLE;
    8685        3474 :         cpl.u.blob_handle.cb_fn = cb_fn;
    8686        3474 :         cpl.u.blob_handle.cb_arg = cb_arg;
    8687        3474 :         cpl.u.blob_handle.blob = blob;
    8688        3474 :         cpl.u.blob_handle.esnap_ctx = opts_local.esnap_ctx;
    8689             : 
    8690        3474 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    8691        3474 :         if (!seq) {
    8692           0 :                 blob_free(blob);
    8693           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    8694           0 :                 return;
    8695             :         }
    8696             : 
    8697        3474 :         blob_load(seq, blob, bs_open_blob_cpl, blob);
    8698             : }
    8699             : 
    8700             : void
    8701        4275 : spdk_bs_open_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
    8702             :                   spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    8703             : {
    8704        4275 :         bs_open_blob(bs, blobid, NULL, cb_fn, cb_arg);
    8705        4275 : }
    8706             : 
    8707             : void
    8708           4 : spdk_bs_open_blob_ext(struct spdk_blob_store *bs, spdk_blob_id blobid,
    8709             :                       struct spdk_blob_open_opts *opts, spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    8710             : {
    8711           4 :         bs_open_blob(bs, blobid, opts, cb_fn, cb_arg);
    8712           4 : }
    8713             : 
    8714             : /* END spdk_bs_open_blob */
    8715             : 
    8716             : /* START spdk_blob_set_read_only */
    8717             : int
    8718         236 : spdk_blob_set_read_only(struct spdk_blob *blob)
    8719             : {
    8720         236 :         blob_verify_md_op(blob);
    8721             : 
    8722         236 :         blob->data_ro_flags |= SPDK_BLOB_READ_ONLY;
    8723             : 
    8724         236 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    8725         236 :         return 0;
    8726             : }
    8727             : /* END spdk_blob_set_read_only */
    8728             : 
    8729             : /* START spdk_blob_sync_md */
    8730             : 
    8731             : static void
    8732        1607 : blob_sync_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8733             : {
    8734        1607 :         struct spdk_blob *blob = cb_arg;
    8735             : 
    8736        1607 :         if (bserrno == 0 && (blob->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
    8737         404 :                 blob->data_ro = true;
    8738         404 :                 blob->md_ro = true;
    8739             :         }
    8740             : 
    8741        1607 :         bs_sequence_finish(seq, bserrno);
    8742        1607 : }
    8743             : 
    8744             : static void
    8745        1607 : blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    8746             : {
    8747        1607 :         struct spdk_bs_cpl      cpl;
    8748             :         spdk_bs_sequence_t      *seq;
    8749             : 
    8750        1607 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    8751        1607 :         cpl.u.blob_basic.cb_fn = cb_fn;
    8752        1607 :         cpl.u.blob_basic.cb_arg = cb_arg;
    8753             : 
    8754        1607 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    8755        1607 :         if (!seq) {
    8756           0 :                 cb_fn(cb_arg, -ENOMEM);
    8757           0 :                 return;
    8758             :         }
    8759             : 
    8760        1607 :         blob_persist(seq, blob, blob_sync_md_cpl, blob);
    8761             : }
    8762             : 
    8763             : void
    8764        1097 : spdk_blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    8765             : {
    8766        1097 :         blob_verify_md_op(blob);
    8767             : 
    8768        1097 :         SPDK_DEBUGLOG(blob, "Syncing blob 0x%" PRIx64 "\n", blob->id);
    8769             : 
    8770        1097 :         if (blob->md_ro) {
    8771           4 :                 assert(blob->state == SPDK_BLOB_STATE_CLEAN);
    8772           4 :                 cb_fn(cb_arg, 0);
    8773           4 :                 return;
    8774             :         }
    8775             : 
    8776        1093 :         blob_sync_md(blob, cb_fn, cb_arg);
    8777             : }
    8778             : 
    8779             : /* END spdk_blob_sync_md */
    8780             : 
    8781             : struct spdk_blob_cluster_op_ctx {
    8782             :         struct spdk_thread      *thread;
    8783             :         struct spdk_blob        *blob;
    8784             :         uint32_t                cluster_num;    /* cluster index in blob */
    8785             :         uint32_t                cluster;        /* cluster on disk */
    8786             :         uint32_t                extent_page;    /* extent page on disk */
    8787             :         struct spdk_blob_md_page *page; /* preallocated extent page */
    8788             :         int                     rc;
    8789             :         spdk_blob_op_complete   cb_fn;
    8790             :         void                    *cb_arg;
    8791             : };
    8792             : 
    8793             : static void
    8794         876 : blob_op_cluster_msg_cpl(void *arg)
    8795             : {
    8796         876 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8797             : 
    8798         876 :         ctx->cb_fn(ctx->cb_arg, ctx->rc);
    8799         876 :         free(ctx);
    8800         876 : }
    8801             : 
    8802             : static void
    8803         846 : blob_op_cluster_msg_cb(void *arg, int bserrno)
    8804             : {
    8805         846 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8806             : 
    8807         846 :         ctx->rc = bserrno;
    8808         846 :         spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
    8809         846 : }
    8810             : 
    8811             : static void
    8812          82 : blob_insert_new_ep_cb(void *arg, int bserrno)
    8813             : {
    8814          82 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8815             :         uint32_t *extent_page;
    8816             : 
    8817          82 :         extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
    8818          82 :         *extent_page = ctx->extent_page;
    8819          82 :         ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8820          82 :         blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
    8821          82 : }
    8822             : 
    8823             : struct spdk_blob_write_extent_page_ctx {
    8824             :         struct spdk_blob_store          *bs;
    8825             : 
    8826             :         uint32_t                        extent;
    8827             :         struct spdk_blob_md_page        *page;
    8828             : };
    8829             : 
    8830             : static void
    8831          26 : blob_free_cluster_msg_cb(void *arg, int bserrno)
    8832             : {
    8833          26 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8834             : 
    8835          26 :         spdk_spin_lock(&ctx->blob->bs->used_lock);
    8836          26 :         bs_release_cluster(ctx->blob->bs, ctx->cluster);
    8837          26 :         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8838             : 
    8839          26 :         ctx->rc = bserrno;
    8840          26 :         spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
    8841          26 : }
    8842             : 
    8843             : static void
    8844          26 : blob_free_cluster_update_ep_cb(void *arg, int bserrno)
    8845             : {
    8846          26 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8847             : 
    8848          26 :         if (bserrno != 0 || ctx->blob->bs->clean == 0) {
    8849          26 :                 blob_free_cluster_msg_cb(ctx, bserrno);
    8850          26 :                 return;
    8851             :         }
    8852             : 
    8853           0 :         ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8854           0 :         blob_sync_md(ctx->blob, blob_free_cluster_msg_cb, ctx);
    8855             : }
    8856             : 
    8857             : static void
    8858           0 : blob_free_cluster_free_ep_cb(void *arg, int bserrno)
    8859             : {
    8860           0 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8861             : 
    8862           0 :         spdk_spin_lock(&ctx->blob->bs->used_lock);
    8863           0 :         assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8864           0 :         bs_release_md_page(ctx->blob->bs, ctx->extent_page);
    8865           0 :         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8866           0 :         ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8867           0 :         blob_sync_md(ctx->blob, blob_free_cluster_msg_cb, ctx);
    8868           0 : }
    8869             : 
    8870             : static void
    8871         434 : blob_persist_extent_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8872             : {
    8873         434 :         struct spdk_blob_write_extent_page_ctx *ctx = cb_arg;
    8874             : 
    8875         434 :         free(ctx);
    8876         434 :         bs_sequence_finish(seq, bserrno);
    8877         434 : }
    8878             : 
    8879             : static void
    8880         434 : blob_write_extent_page_ready(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8881             : {
    8882         434 :         struct spdk_blob_write_extent_page_ctx *ctx = cb_arg;
    8883             : 
    8884         434 :         if (bserrno != 0) {
    8885           0 :                 blob_persist_extent_page_cpl(seq, ctx, bserrno);
    8886           0 :                 return;
    8887             :         }
    8888         434 :         bs_sequence_write_dev(seq, ctx->page, bs_md_page_to_lba(ctx->bs, ctx->extent),
    8889         434 :                               bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
    8890             :                               blob_persist_extent_page_cpl, ctx);
    8891             : }
    8892             : 
    8893             : static void
    8894         434 : blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
    8895             :                        struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg)
    8896             : {
    8897             :         struct spdk_blob_write_extent_page_ctx  *ctx;
    8898             :         spdk_bs_sequence_t                      *seq;
    8899         434 :         struct spdk_bs_cpl                      cpl;
    8900             : 
    8901         434 :         ctx = calloc(1, sizeof(*ctx));
    8902         434 :         if (!ctx) {
    8903           0 :                 cb_fn(cb_arg, -ENOMEM);
    8904           0 :                 return;
    8905             :         }
    8906         434 :         ctx->bs = blob->bs;
    8907         434 :         ctx->extent = extent;
    8908         434 :         ctx->page = page;
    8909             : 
    8910         434 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    8911         434 :         cpl.u.blob_basic.cb_fn = cb_fn;
    8912         434 :         cpl.u.blob_basic.cb_arg = cb_arg;
    8913             : 
    8914         434 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    8915         434 :         if (!seq) {
    8916           0 :                 free(ctx);
    8917           0 :                 cb_fn(cb_arg, -ENOMEM);
    8918           0 :                 return;
    8919             :         }
    8920             : 
    8921         434 :         assert(page);
    8922         434 :         page->next = SPDK_INVALID_MD_PAGE;
    8923         434 :         page->id = blob->id;
    8924         434 :         page->sequence_num = 0;
    8925             : 
    8926         434 :         blob_serialize_extent_page(blob, cluster_num, page);
    8927             : 
    8928         434 :         page->crc = blob_md_page_calc_crc(page);
    8929             : 
    8930         434 :         assert(spdk_bit_array_get(blob->bs->used_md_pages, extent) == true);
    8931             : 
    8932         434 :         bs_mark_dirty(seq, blob->bs, blob_write_extent_page_ready, ctx);
    8933             : }
    8934             : 
    8935             : static void
    8936         816 : blob_insert_cluster_msg(void *arg)
    8937             : {
    8938         816 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8939             :         uint32_t *extent_page;
    8940             : 
    8941         816 :         ctx->rc = blob_insert_cluster(ctx->blob, ctx->cluster_num, ctx->cluster);
    8942         816 :         if (ctx->rc != 0) {
    8943           4 :                 spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
    8944           4 :                 return;
    8945             :         }
    8946             : 
    8947         812 :         if (ctx->blob->use_extent_table == false) {
    8948             :                 /* Extent table is not used, proceed with sync of md that will only use extents_rle. */
    8949         406 :                 ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8950         406 :                 blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
    8951         406 :                 return;
    8952             :         }
    8953             : 
    8954         406 :         extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
    8955         406 :         if (*extent_page == 0) {
    8956             :                 /* Extent page requires allocation.
    8957             :                  * It was already claimed in the used_md_pages map and placed in ctx. */
    8958          82 :                 assert(ctx->extent_page != 0);
    8959          82 :                 assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8960          82 :                 blob_write_extent_page(ctx->blob, ctx->extent_page, ctx->cluster_num, ctx->page,
    8961             :                                        blob_insert_new_ep_cb, ctx);
    8962             :         } else {
    8963             :                 /* It is possible for original thread to allocate extent page for
    8964             :                  * different cluster in the same extent page. In such case proceed with
    8965             :                  * updating the existing extent page, but release the additional one. */
    8966         324 :                 if (ctx->extent_page != 0) {
    8967           0 :                         spdk_spin_lock(&ctx->blob->bs->used_lock);
    8968           0 :                         assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8969           0 :                         bs_release_md_page(ctx->blob->bs, ctx->extent_page);
    8970           0 :                         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8971           0 :                         ctx->extent_page = 0;
    8972             :                 }
    8973             :                 /* Extent page already allocated.
    8974             :                  * Every cluster allocation, requires just an update of single extent page. */
    8975         324 :                 blob_write_extent_page(ctx->blob, *extent_page, ctx->cluster_num, ctx->page,
    8976             :                                        blob_op_cluster_msg_cb, ctx);
    8977             :         }
    8978             : }
    8979             : 
    8980             : static void
    8981         816 : blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
    8982             :                                  uint64_t cluster, uint32_t extent_page, struct spdk_blob_md_page *page,
    8983             :                                  spdk_blob_op_complete cb_fn, void *cb_arg)
    8984             : {
    8985             :         struct spdk_blob_cluster_op_ctx *ctx;
    8986             : 
    8987         816 :         ctx = calloc(1, sizeof(*ctx));
    8988         816 :         if (ctx == NULL) {
    8989           0 :                 cb_fn(cb_arg, -ENOMEM);
    8990           0 :                 return;
    8991             :         }
    8992             : 
    8993         816 :         ctx->thread = spdk_get_thread();
    8994         816 :         ctx->blob = blob;
    8995         816 :         ctx->cluster_num = cluster_num;
    8996         816 :         ctx->cluster = cluster;
    8997         816 :         ctx->extent_page = extent_page;
    8998         816 :         ctx->page = page;
    8999         816 :         ctx->cb_fn = cb_fn;
    9000         816 :         ctx->cb_arg = cb_arg;
    9001             : 
    9002         816 :         spdk_thread_send_msg(blob->bs->md_thread, blob_insert_cluster_msg, ctx);
    9003             : }
    9004             : 
    9005             : static void
    9006          60 : blob_free_cluster_msg(void *arg)
    9007             : {
    9008          60 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    9009             :         uint32_t *extent_page;
    9010             :         uint32_t start_cluster_idx;
    9011          60 :         bool free_extent_page = true;
    9012             :         size_t i;
    9013             : 
    9014          60 :         ctx->cluster = bs_lba_to_cluster(ctx->blob->bs, ctx->blob->active.clusters[ctx->cluster_num]);
    9015             : 
    9016             :         /* There were concurrent unmaps to the same cluster, only release the cluster on the first one */
    9017          60 :         if (ctx->cluster == 0) {
    9018           8 :                 blob_op_cluster_msg_cb(ctx, 0);
    9019           8 :                 return;
    9020             :         }
    9021             : 
    9022          52 :         ctx->blob->active.clusters[ctx->cluster_num] = 0;
    9023          52 :         if (ctx->cluster != 0) {
    9024          52 :                 ctx->blob->active.num_allocated_clusters--;
    9025             :         }
    9026             : 
    9027          52 :         if (ctx->blob->use_extent_table == false) {
    9028             :                 /* Extent table is not used, proceed with sync of md that will only use extents_rle. */
    9029          26 :                 spdk_spin_lock(&ctx->blob->bs->used_lock);
    9030          26 :                 bs_release_cluster(ctx->blob->bs, ctx->cluster);
    9031          26 :                 spdk_spin_unlock(&ctx->blob->bs->used_lock);
    9032          26 :                 ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    9033          26 :                 blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
    9034          26 :                 return;
    9035             :         }
    9036             : 
    9037          26 :         extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
    9038             : 
    9039             :         /* There shouldn't be parallel release operations on same cluster */
    9040          26 :         assert(*extent_page == ctx->extent_page);
    9041             : 
    9042          26 :         start_cluster_idx = (ctx->cluster_num / SPDK_EXTENTS_PER_EP) * SPDK_EXTENTS_PER_EP;
    9043          48 :         for (i = 0; i < SPDK_EXTENTS_PER_EP; ++i) {
    9044          48 :                 if (ctx->blob->active.clusters[start_cluster_idx + i] != 0) {
    9045          26 :                         free_extent_page = false;
    9046          26 :                         break;
    9047             :                 }
    9048             :         }
    9049             : 
    9050          26 :         if (free_extent_page) {
    9051           0 :                 assert(ctx->extent_page != 0);
    9052           0 :                 assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    9053           0 :                 ctx->blob->active.extent_pages[bs_cluster_to_extent_table_id(ctx->cluster_num)] = 0;
    9054           0 :                 blob_write_extent_page(ctx->blob, ctx->extent_page, ctx->cluster_num, ctx->page,
    9055             :                                        blob_free_cluster_free_ep_cb, ctx);
    9056             :         } else {
    9057          26 :                 blob_write_extent_page(ctx->blob, *extent_page, ctx->cluster_num, ctx->page,
    9058             :                                        blob_free_cluster_update_ep_cb, ctx);
    9059             :         }
    9060             : }
    9061             : 
    9062             : 
    9063             : static void
    9064          60 : blob_free_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num, uint32_t extent_page,
    9065             :                                struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg)
    9066             : {
    9067             :         struct spdk_blob_cluster_op_ctx *ctx;
    9068             : 
    9069          60 :         ctx = calloc(1, sizeof(*ctx));
    9070          60 :         if (ctx == NULL) {
    9071           0 :                 cb_fn(cb_arg, -ENOMEM);
    9072           0 :                 return;
    9073             :         }
    9074             : 
    9075          60 :         ctx->thread = spdk_get_thread();
    9076          60 :         ctx->blob = blob;
    9077          60 :         ctx->cluster_num = cluster_num;
    9078          60 :         ctx->extent_page = extent_page;
    9079          60 :         ctx->page = page;
    9080          60 :         ctx->cb_fn = cb_fn;
    9081          60 :         ctx->cb_arg = cb_arg;
    9082             : 
    9083          60 :         spdk_thread_send_msg(blob->bs->md_thread, blob_free_cluster_msg, ctx);
    9084             : }
    9085             : 
    9086             : /* START spdk_blob_close */
    9087             : 
    9088             : static void
    9089        4167 : blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9090             : {
    9091        4167 :         struct spdk_blob *blob = cb_arg;
    9092             : 
    9093        4167 :         if (bserrno == 0) {
    9094        4167 :                 blob->open_ref--;
    9095        4167 :                 if (blob->open_ref == 0) {
    9096             :                         /*
    9097             :                          * Blobs with active.num_pages == 0 are deleted blobs.
    9098             :                          *  these blobs are removed from the blob_store list
    9099             :                          *  when the deletion process starts - so don't try to
    9100             :                          *  remove them again.
    9101             :                          */
    9102        3406 :                         if (blob->active.num_pages > 0) {
    9103        1914 :                                 spdk_bit_array_clear(blob->bs->open_blobids, blob->id);
    9104        1914 :                                 RB_REMOVE(spdk_blob_tree, &blob->bs->open_blobs, blob);
    9105             :                         }
    9106        3406 :                         blob_free(blob);
    9107             :                 }
    9108             :         }
    9109             : 
    9110        4167 :         bs_sequence_finish(seq, bserrno);
    9111        4167 : }
    9112             : 
    9113             : static void
    9114         120 : blob_close_esnap_done(void *cb_arg, struct spdk_blob *blob, int bserrno)
    9115             : {
    9116         120 :         spdk_bs_sequence_t      *seq = cb_arg;
    9117             : 
    9118         120 :         if (bserrno != 0) {
    9119           0 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": close failed with error %d\n",
    9120             :                               blob->id, bserrno);
    9121           0 :                 bs_sequence_finish(seq, bserrno);
    9122           0 :                 return;
    9123             :         }
    9124             : 
    9125         120 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": closed, syncing metadata on thread %s\n",
    9126             :                       blob->id, spdk_thread_get_name(spdk_get_thread()));
    9127             : 
    9128             :         /* Sync metadata */
    9129         120 :         blob_persist(seq, blob, blob_close_cpl, blob);
    9130             : }
    9131             : 
    9132             : void
    9133        4167 : spdk_blob_close(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    9134             : {
    9135        4167 :         struct spdk_bs_cpl      cpl;
    9136             :         spdk_bs_sequence_t      *seq;
    9137             : 
    9138        4167 :         blob_verify_md_op(blob);
    9139             : 
    9140        4167 :         SPDK_DEBUGLOG(blob, "Closing blob 0x%" PRIx64 "\n", blob->id);
    9141             : 
    9142        4167 :         if (blob->open_ref == 0) {
    9143           0 :                 cb_fn(cb_arg, -EBADF);
    9144           0 :                 return;
    9145             :         }
    9146             : 
    9147        4167 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    9148        4167 :         cpl.u.blob_basic.cb_fn = cb_fn;
    9149        4167 :         cpl.u.blob_basic.cb_arg = cb_arg;
    9150             : 
    9151        4167 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    9152        4167 :         if (!seq) {
    9153           0 :                 cb_fn(cb_arg, -ENOMEM);
    9154           0 :                 return;
    9155             :         }
    9156             : 
    9157        4167 :         if (blob->open_ref == 1 && blob_is_esnap_clone(blob)) {
    9158         120 :                 blob_esnap_destroy_bs_dev_channels(blob, false, blob_close_esnap_done, seq);
    9159         120 :                 return;
    9160             :         }
    9161             : 
    9162             :         /* Sync metadata */
    9163        4047 :         blob_persist(seq, blob, blob_close_cpl, blob);
    9164             : }
    9165             : 
    9166             : /* END spdk_blob_close */
    9167             : 
    9168         233 : struct spdk_io_channel *spdk_bs_alloc_io_channel(struct spdk_blob_store *bs)
    9169             : {
    9170         233 :         return spdk_get_io_channel(bs);
    9171             : }
    9172             : 
    9173             : void
    9174         233 : spdk_bs_free_io_channel(struct spdk_io_channel *channel)
    9175             : {
    9176         233 :         blob_esnap_destroy_bs_channel(spdk_io_channel_get_ctx(channel));
    9177         233 :         spdk_put_io_channel(channel);
    9178         233 : }
    9179             : 
    9180             : void
    9181         108 : spdk_blob_io_unmap(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9182             :                    uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
    9183             : {
    9184         108 :         blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
    9185             :                                SPDK_BLOB_UNMAP);
    9186         108 : }
    9187             : 
    9188             : void
    9189          48 : spdk_blob_io_write_zeroes(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9190             :                           uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
    9191             : {
    9192          48 :         blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
    9193             :                                SPDK_BLOB_WRITE_ZEROES);
    9194          48 : }
    9195             : 
    9196             : void
    9197       20868 : spdk_blob_io_write(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9198             :                    void *payload, uint64_t offset, uint64_t length,
    9199             :                    spdk_blob_op_complete cb_fn, void *cb_arg)
    9200             : {
    9201       20868 :         blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
    9202             :                                SPDK_BLOB_WRITE);
    9203       20868 : }
    9204             : 
    9205             : void
    9206       17500 : spdk_blob_io_read(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9207             :                   void *payload, uint64_t offset, uint64_t length,
    9208             :                   spdk_blob_op_complete cb_fn, void *cb_arg)
    9209             : {
    9210       17500 :         blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
    9211             :                                SPDK_BLOB_READ);
    9212       17500 : }
    9213             : 
    9214             : void
    9215         140 : spdk_blob_io_writev(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9216             :                     struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9217             :                     spdk_blob_op_complete cb_fn, void *cb_arg)
    9218             : {
    9219         140 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, false, NULL);
    9220         140 : }
    9221             : 
    9222             : void
    9223         940 : spdk_blob_io_readv(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9224             :                    struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9225             :                    spdk_blob_op_complete cb_fn, void *cb_arg)
    9226             : {
    9227         940 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, true, NULL);
    9228         940 : }
    9229             : 
    9230             : void
    9231         208 : spdk_blob_io_writev_ext(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9232             :                         struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9233             :                         spdk_blob_op_complete cb_fn, void *cb_arg, struct spdk_blob_ext_io_opts *io_opts)
    9234             : {
    9235         208 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, false,
    9236             :                                    io_opts);
    9237         208 : }
    9238             : 
    9239             : void
    9240        1300 : spdk_blob_io_readv_ext(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9241             :                        struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9242             :                        spdk_blob_op_complete cb_fn, void *cb_arg, struct spdk_blob_ext_io_opts *io_opts)
    9243             : {
    9244        1300 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, true,
    9245             :                                    io_opts);
    9246        1300 : }
    9247             : 
    9248             : struct spdk_bs_iter_ctx {
    9249             :         int64_t page_num;
    9250             :         struct spdk_blob_store *bs;
    9251             : 
    9252             :         spdk_blob_op_with_handle_complete cb_fn;
    9253             :         void *cb_arg;
    9254             : };
    9255             : 
    9256             : static void
    9257        1164 : bs_iter_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    9258             : {
    9259        1164 :         struct spdk_bs_iter_ctx *ctx = cb_arg;
    9260        1164 :         struct spdk_blob_store *bs = ctx->bs;
    9261             :         spdk_blob_id id;
    9262             : 
    9263        1164 :         if (bserrno == 0) {
    9264         444 :                 ctx->cb_fn(ctx->cb_arg, _blob, bserrno);
    9265         444 :                 free(ctx);
    9266         444 :                 return;
    9267             :         }
    9268             : 
    9269         720 :         ctx->page_num++;
    9270         720 :         ctx->page_num = spdk_bit_array_find_first_set(bs->used_blobids, ctx->page_num);
    9271         720 :         if (ctx->page_num >= spdk_bit_array_capacity(bs->used_blobids)) {
    9272         268 :                 ctx->cb_fn(ctx->cb_arg, NULL, -ENOENT);
    9273         268 :                 free(ctx);
    9274         268 :                 return;
    9275             :         }
    9276             : 
    9277         452 :         id = bs_page_to_blobid(ctx->page_num);
    9278             : 
    9279         452 :         spdk_bs_open_blob(bs, id, bs_iter_cpl, ctx);
    9280             : }
    9281             : 
    9282             : void
    9283         292 : spdk_bs_iter_first(struct spdk_blob_store *bs,
    9284             :                    spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    9285             : {
    9286             :         struct spdk_bs_iter_ctx *ctx;
    9287             : 
    9288         292 :         ctx = calloc(1, sizeof(*ctx));
    9289         292 :         if (!ctx) {
    9290           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    9291           0 :                 return;
    9292             :         }
    9293             : 
    9294         292 :         ctx->page_num = -1;
    9295         292 :         ctx->bs = bs;
    9296         292 :         ctx->cb_fn = cb_fn;
    9297         292 :         ctx->cb_arg = cb_arg;
    9298             : 
    9299         292 :         bs_iter_cpl(ctx, NULL, -1);
    9300             : }
    9301             : 
    9302             : static void
    9303         420 : bs_iter_close_cpl(void *cb_arg, int bserrno)
    9304             : {
    9305         420 :         struct spdk_bs_iter_ctx *ctx = cb_arg;
    9306             : 
    9307         420 :         bs_iter_cpl(ctx, NULL, -1);
    9308         420 : }
    9309             : 
    9310             : void
    9311         420 : spdk_bs_iter_next(struct spdk_blob_store *bs, struct spdk_blob *blob,
    9312             :                   spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    9313             : {
    9314             :         struct spdk_bs_iter_ctx *ctx;
    9315             : 
    9316         420 :         assert(blob != NULL);
    9317             : 
    9318         420 :         ctx = calloc(1, sizeof(*ctx));
    9319         420 :         if (!ctx) {
    9320           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    9321           0 :                 return;
    9322             :         }
    9323             : 
    9324         420 :         ctx->page_num = bs_blobid_to_page(blob->id);
    9325         420 :         ctx->bs = bs;
    9326         420 :         ctx->cb_fn = cb_fn;
    9327         420 :         ctx->cb_arg = cb_arg;
    9328             : 
    9329             :         /* Close the existing blob */
    9330         420 :         spdk_blob_close(blob, bs_iter_close_cpl, ctx);
    9331             : }
    9332             : 
    9333             : static int
    9334         959 : blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
    9335             :                uint16_t value_len, bool internal)
    9336             : {
    9337             :         struct spdk_xattr_tailq *xattrs;
    9338             :         struct spdk_xattr       *xattr;
    9339             :         size_t                  desc_size;
    9340             :         void                    *tmp;
    9341             : 
    9342         959 :         blob_verify_md_op(blob);
    9343             : 
    9344         959 :         if (blob->md_ro) {
    9345           4 :                 return -EPERM;
    9346             :         }
    9347             : 
    9348         955 :         desc_size = sizeof(struct spdk_blob_md_descriptor_xattr) + strlen(name) + value_len;
    9349         955 :         if (desc_size > SPDK_BS_MAX_DESC_SIZE) {
    9350           4 :                 SPDK_DEBUGLOG(blob, "Xattr '%s' of size %zu does not fix into single page %zu\n", name,
    9351             :                               desc_size, SPDK_BS_MAX_DESC_SIZE);
    9352           4 :                 return -ENOMEM;
    9353             :         }
    9354             : 
    9355         951 :         if (internal) {
    9356         740 :                 xattrs = &blob->xattrs_internal;
    9357         740 :                 blob->invalid_flags |= SPDK_BLOB_INTERNAL_XATTR;
    9358             :         } else {
    9359         211 :                 xattrs = &blob->xattrs;
    9360             :         }
    9361             : 
    9362        1182 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9363         340 :                 if (!strcmp(name, xattr->name)) {
    9364         109 :                         tmp = malloc(value_len);
    9365         109 :                         if (!tmp) {
    9366           0 :                                 return -ENOMEM;
    9367             :                         }
    9368             : 
    9369         109 :                         free(xattr->value);
    9370         109 :                         xattr->value_len = value_len;
    9371         109 :                         xattr->value = tmp;
    9372         109 :                         memcpy(xattr->value, value, value_len);
    9373             : 
    9374         109 :                         blob->state = SPDK_BLOB_STATE_DIRTY;
    9375             : 
    9376         109 :                         return 0;
    9377             :                 }
    9378             :         }
    9379             : 
    9380         842 :         xattr = calloc(1, sizeof(*xattr));
    9381         842 :         if (!xattr) {
    9382           0 :                 return -ENOMEM;
    9383             :         }
    9384             : 
    9385         842 :         xattr->name = strdup(name);
    9386         842 :         if (!xattr->name) {
    9387           0 :                 free(xattr);
    9388           0 :                 return -ENOMEM;
    9389             :         }
    9390             : 
    9391         842 :         xattr->value_len = value_len;
    9392         842 :         xattr->value = malloc(value_len);
    9393         842 :         if (!xattr->value) {
    9394           0 :                 free(xattr->name);
    9395           0 :                 free(xattr);
    9396           0 :                 return -ENOMEM;
    9397             :         }
    9398         842 :         memcpy(xattr->value, value, value_len);
    9399         842 :         TAILQ_INSERT_TAIL(xattrs, xattr, link);
    9400             : 
    9401         842 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    9402             : 
    9403         842 :         return 0;
    9404             : }
    9405             : 
    9406             : int
    9407         183 : spdk_blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
    9408             :                     uint16_t value_len)
    9409             : {
    9410         183 :         return blob_set_xattr(blob, name, value, value_len, false);
    9411             : }
    9412             : 
    9413             : static int
    9414         416 : blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal)
    9415             : {
    9416             :         struct spdk_xattr_tailq *xattrs;
    9417             :         struct spdk_xattr       *xattr;
    9418             : 
    9419         416 :         blob_verify_md_op(blob);
    9420             : 
    9421         416 :         if (blob->md_ro) {
    9422           4 :                 return -EPERM;
    9423             :         }
    9424         412 :         xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
    9425             : 
    9426         424 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9427         372 :                 if (!strcmp(name, xattr->name)) {
    9428         360 :                         TAILQ_REMOVE(xattrs, xattr, link);
    9429         360 :                         free(xattr->value);
    9430         360 :                         free(xattr->name);
    9431         360 :                         free(xattr);
    9432             : 
    9433         360 :                         if (internal && TAILQ_EMPTY(&blob->xattrs_internal)) {
    9434         244 :                                 blob->invalid_flags &= ~SPDK_BLOB_INTERNAL_XATTR;
    9435             :                         }
    9436         360 :                         blob->state = SPDK_BLOB_STATE_DIRTY;
    9437             : 
    9438         360 :                         return 0;
    9439             :                 }
    9440             :         }
    9441             : 
    9442          52 :         return -ENOENT;
    9443             : }
    9444             : 
    9445             : int
    9446          36 : spdk_blob_remove_xattr(struct spdk_blob *blob, const char *name)
    9447             : {
    9448          36 :         return blob_remove_xattr(blob, name, false);
    9449             : }
    9450             : 
    9451             : static int
    9452        2292 : blob_get_xattr_value(struct spdk_blob *blob, const char *name,
    9453             :                      const void **value, size_t *value_len, bool internal)
    9454             : {
    9455             :         struct spdk_xattr       *xattr;
    9456             :         struct spdk_xattr_tailq *xattrs;
    9457             : 
    9458        2292 :         xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
    9459             : 
    9460        2922 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9461        1396 :                 if (!strcmp(name, xattr->name)) {
    9462         766 :                         *value = xattr->value;
    9463         766 :                         *value_len = xattr->value_len;
    9464         766 :                         return 0;
    9465             :                 }
    9466             :         }
    9467        1526 :         return -ENOENT;
    9468             : }
    9469             : 
    9470             : int
    9471         154 : spdk_blob_get_xattr_value(struct spdk_blob *blob, const char *name,
    9472             :                           const void **value, size_t *value_len)
    9473             : {
    9474         154 :         blob_verify_md_op(blob);
    9475             : 
    9476         154 :         return blob_get_xattr_value(blob, name, value, value_len, false);
    9477             : }
    9478             : 
    9479             : struct spdk_xattr_names {
    9480             :         uint32_t        count;
    9481             :         const char      *names[0];
    9482             : };
    9483             : 
    9484             : static int
    9485           4 : blob_get_xattr_names(struct spdk_xattr_tailq *xattrs, struct spdk_xattr_names **names)
    9486             : {
    9487             :         struct spdk_xattr       *xattr;
    9488           4 :         int                     count = 0;
    9489             : 
    9490          12 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9491           8 :                 count++;
    9492             :         }
    9493             : 
    9494           4 :         *names = calloc(1, sizeof(struct spdk_xattr_names) + count * sizeof(char *));
    9495           4 :         if (*names == NULL) {
    9496           0 :                 return -ENOMEM;
    9497             :         }
    9498             : 
    9499          12 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9500           8 :                 (*names)->names[(*names)->count++] = xattr->name;
    9501             :         }
    9502             : 
    9503           4 :         return 0;
    9504             : }
    9505             : 
    9506             : int
    9507           4 : spdk_blob_get_xattr_names(struct spdk_blob *blob, struct spdk_xattr_names **names)
    9508             : {
    9509           4 :         blob_verify_md_op(blob);
    9510             : 
    9511           4 :         return blob_get_xattr_names(&blob->xattrs, names);
    9512             : }
    9513             : 
    9514             : uint32_t
    9515           4 : spdk_xattr_names_get_count(struct spdk_xattr_names *names)
    9516             : {
    9517           4 :         assert(names != NULL);
    9518             : 
    9519           4 :         return names->count;
    9520             : }
    9521             : 
    9522             : const char *
    9523           8 : spdk_xattr_names_get_name(struct spdk_xattr_names *names, uint32_t index)
    9524             : {
    9525           8 :         if (index >= names->count) {
    9526           0 :                 return NULL;
    9527             :         }
    9528             : 
    9529           8 :         return names->names[index];
    9530             : }
    9531             : 
    9532             : void
    9533           4 : spdk_xattr_names_free(struct spdk_xattr_names *names)
    9534             : {
    9535           4 :         free(names);
    9536           4 : }
    9537             : 
    9538             : struct spdk_bs_type
    9539           2 : spdk_bs_get_bstype(struct spdk_blob_store *bs)
    9540             : {
    9541           2 :         return bs->bstype;
    9542             : }
    9543             : 
    9544             : void
    9545           0 : spdk_bs_set_bstype(struct spdk_blob_store *bs, struct spdk_bs_type bstype)
    9546             : {
    9547           0 :         memcpy(&bs->bstype, &bstype, sizeof(bstype));
    9548           0 : }
    9549             : 
    9550             : bool
    9551          48 : spdk_blob_is_read_only(struct spdk_blob *blob)
    9552             : {
    9553          48 :         assert(blob != NULL);
    9554          48 :         return (blob->data_ro || blob->md_ro);
    9555             : }
    9556             : 
    9557             : bool
    9558          52 : spdk_blob_is_snapshot(struct spdk_blob *blob)
    9559             : {
    9560             :         struct spdk_blob_list *snapshot_entry;
    9561             : 
    9562          52 :         assert(blob != NULL);
    9563             : 
    9564          52 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    9565          52 :         if (snapshot_entry == NULL) {
    9566          28 :                 return false;
    9567             :         }
    9568             : 
    9569          24 :         return true;
    9570             : }
    9571             : 
    9572             : bool
    9573          68 : spdk_blob_is_clone(struct spdk_blob *blob)
    9574             : {
    9575          68 :         assert(blob != NULL);
    9576             : 
    9577          68 :         if (blob->parent_id != SPDK_BLOBID_INVALID &&
    9578          52 :             blob->parent_id != SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    9579          40 :                 assert(spdk_blob_is_thin_provisioned(blob));
    9580          40 :                 return true;
    9581             :         }
    9582             : 
    9583          28 :         return false;
    9584             : }
    9585             : 
    9586             : bool
    9587       36536 : spdk_blob_is_thin_provisioned(struct spdk_blob *blob)
    9588             : {
    9589       36536 :         assert(blob != NULL);
    9590       36536 :         return !!(blob->invalid_flags & SPDK_BLOB_THIN_PROV);
    9591             : }
    9592             : 
    9593             : bool
    9594       40888 : spdk_blob_is_esnap_clone(const struct spdk_blob *blob)
    9595             : {
    9596       40888 :         return blob_is_esnap_clone(blob);
    9597             : }
    9598             : 
    9599             : static void
    9600        3434 : blob_update_clear_method(struct spdk_blob *blob)
    9601             : {
    9602             :         enum blob_clear_method stored_cm;
    9603             : 
    9604        3434 :         assert(blob != NULL);
    9605             : 
    9606             :         /* If BLOB_CLEAR_WITH_DEFAULT was passed in, use the setting stored
    9607             :          * in metadata previously.  If something other than the default was
    9608             :          * specified, ignore stored value and used what was passed in.
    9609             :          */
    9610        3434 :         stored_cm = ((blob->md_ro_flags & SPDK_BLOB_CLEAR_METHOD) >> SPDK_BLOB_CLEAR_METHOD_SHIFT);
    9611             : 
    9612        3434 :         if (blob->clear_method == BLOB_CLEAR_WITH_DEFAULT) {
    9613        3434 :                 blob->clear_method = stored_cm;
    9614           0 :         } else if (blob->clear_method != stored_cm) {
    9615           0 :                 SPDK_WARNLOG("Using passed in clear method 0x%x instead of stored value of 0x%x\n",
    9616             :                              blob->clear_method, stored_cm);
    9617             :         }
    9618        3434 : }
    9619             : 
    9620             : spdk_blob_id
    9621         258 : spdk_blob_get_parent_snapshot(struct spdk_blob_store *bs, spdk_blob_id blob_id)
    9622             : {
    9623         258 :         struct spdk_blob_list *snapshot_entry = NULL;
    9624         258 :         struct spdk_blob_list *clone_entry = NULL;
    9625             : 
    9626         494 :         TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
    9627         732 :                 TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    9628         496 :                         if (clone_entry->id == blob_id) {
    9629         168 :                                 return snapshot_entry->id;
    9630             :                         }
    9631             :                 }
    9632             :         }
    9633             : 
    9634          90 :         return SPDK_BLOBID_INVALID;
    9635             : }
    9636             : 
    9637             : int
    9638         196 : spdk_blob_get_clones(struct spdk_blob_store *bs, spdk_blob_id blobid, spdk_blob_id *ids,
    9639             :                      size_t *count)
    9640             : {
    9641             :         struct spdk_blob_list *snapshot_entry, *clone_entry;
    9642             :         size_t n;
    9643             : 
    9644         196 :         snapshot_entry = bs_get_snapshot_entry(bs, blobid);
    9645         196 :         if (snapshot_entry == NULL) {
    9646          28 :                 *count = 0;
    9647          28 :                 return 0;
    9648             :         }
    9649             : 
    9650         168 :         if (ids == NULL || *count < snapshot_entry->clone_count) {
    9651           8 :                 *count = snapshot_entry->clone_count;
    9652           8 :                 return -ENOMEM;
    9653             :         }
    9654         160 :         *count = snapshot_entry->clone_count;
    9655             : 
    9656         160 :         n = 0;
    9657         340 :         TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    9658         180 :                 ids[n++] = clone_entry->id;
    9659             :         }
    9660             : 
    9661         160 :         return 0;
    9662             : }
    9663             : 
    9664             : static void
    9665           4 : bs_load_grow_continue(struct spdk_bs_load_ctx *ctx)
    9666             : {
    9667             :         int rc;
    9668             : 
    9669           4 :         if (ctx->super->size == 0) {
    9670           0 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9671             :         }
    9672             : 
    9673           4 :         if (ctx->super->io_unit_size == 0) {
    9674           0 :                 ctx->super->io_unit_size = SPDK_BS_PAGE_SIZE;
    9675             :         }
    9676             : 
    9677             :         /* Parse the super block */
    9678           4 :         ctx->bs->clean = 1;
    9679           4 :         ctx->bs->cluster_sz = ctx->super->cluster_size;
    9680           4 :         ctx->bs->total_clusters = ctx->super->size / ctx->super->cluster_size;
    9681           4 :         ctx->bs->pages_per_cluster = ctx->bs->cluster_sz / SPDK_BS_PAGE_SIZE;
    9682           4 :         if (spdk_u32_is_pow2(ctx->bs->pages_per_cluster)) {
    9683           4 :                 ctx->bs->pages_per_cluster_shift = spdk_u32log2(ctx->bs->pages_per_cluster);
    9684             :         }
    9685           4 :         ctx->bs->io_unit_size = ctx->super->io_unit_size;
    9686           4 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    9687           4 :         if (rc < 0) {
    9688           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    9689           0 :                 return;
    9690             :         }
    9691           4 :         ctx->bs->md_start = ctx->super->md_start;
    9692           4 :         ctx->bs->md_len = ctx->super->md_len;
    9693           4 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->bs->md_len);
    9694           4 :         if (rc < 0) {
    9695           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    9696           0 :                 return;
    9697             :         }
    9698             : 
    9699           8 :         ctx->bs->total_data_clusters = ctx->bs->total_clusters - spdk_divide_round_up(
    9700           4 :                                                ctx->bs->md_start + ctx->bs->md_len, ctx->bs->pages_per_cluster);
    9701           4 :         ctx->bs->super_blob = ctx->super->super_blob;
    9702           4 :         memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
    9703             : 
    9704           4 :         if (ctx->super->used_blobid_mask_len == 0 || ctx->super->clean == 0) {
    9705           0 :                 SPDK_ERRLOG("Can not grow an unclean blobstore, please load it normally to clean it.\n");
    9706           0 :                 bs_load_ctx_fail(ctx, -EIO);
    9707           0 :                 return;
    9708             :         } else {
    9709           4 :                 bs_load_read_used_pages(ctx);
    9710             :         }
    9711             : }
    9712             : 
    9713             : static void
    9714           4 : bs_load_grow_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9715             : {
    9716           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9717             : 
    9718           4 :         if (bserrno != 0) {
    9719           0 :                 bs_load_ctx_fail(ctx, bserrno);
    9720           0 :                 return;
    9721             :         }
    9722           4 :         bs_load_grow_continue(ctx);
    9723             : }
    9724             : 
    9725             : static void
    9726           4 : bs_load_grow_used_clusters_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9727             : {
    9728           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9729             : 
    9730           4 :         if (bserrno != 0) {
    9731           0 :                 bs_load_ctx_fail(ctx, bserrno);
    9732           0 :                 return;
    9733             :         }
    9734             : 
    9735           4 :         spdk_free(ctx->mask);
    9736             : 
    9737           4 :         bs_sequence_write_dev(ctx->seq, ctx->super, bs_page_to_lba(ctx->bs, 0),
    9738           4 :                               bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)),
    9739             :                               bs_load_grow_super_write_cpl, ctx);
    9740             : }
    9741             : 
    9742             : static void
    9743           4 : bs_load_grow_used_clusters_read_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9744             : {
    9745           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9746             :         uint64_t                lba, lba_count;
    9747             :         uint64_t                dev_size;
    9748             :         uint64_t                total_clusters;
    9749             : 
    9750           4 :         if (bserrno != 0) {
    9751           0 :                 bs_load_ctx_fail(ctx, bserrno);
    9752           0 :                 return;
    9753             :         }
    9754             : 
    9755             :         /* The type must be correct */
    9756           4 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS);
    9757             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    9758           4 :         assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof(
    9759             :                                              struct spdk_blob_md_page) * 8));
    9760           4 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9761           4 :         total_clusters = dev_size / ctx->super->cluster_size;
    9762           4 :         ctx->mask->length = total_clusters;
    9763             : 
    9764           4 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    9765           4 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    9766           4 :         bs_sequence_write_dev(ctx->seq, ctx->mask, lba, lba_count,
    9767             :                               bs_load_grow_used_clusters_write_cpl, ctx);
    9768             : }
    9769             : 
    9770             : static void
    9771           4 : bs_load_try_to_grow(struct spdk_bs_load_ctx *ctx)
    9772             : {
    9773             :         uint64_t dev_size, total_clusters, used_cluster_mask_len, max_used_cluster_mask;
    9774             :         uint64_t lba, lba_count, mask_size;
    9775             : 
    9776           4 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9777           4 :         total_clusters = dev_size / ctx->super->cluster_size;
    9778           4 :         used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    9779           4 :                                 spdk_divide_round_up(total_clusters, 8),
    9780             :                                 SPDK_BS_PAGE_SIZE);
    9781           4 :         max_used_cluster_mask = ctx->super->used_blobid_mask_start - ctx->super->used_cluster_mask_start;
    9782             :         /* No necessary to grow or no space to grow */
    9783           4 :         if (ctx->super->size >= dev_size || used_cluster_mask_len > max_used_cluster_mask) {
    9784           0 :                 SPDK_DEBUGLOG(blob, "No grow\n");
    9785           0 :                 bs_load_grow_continue(ctx);
    9786           0 :                 return;
    9787             :         }
    9788             : 
    9789           4 :         SPDK_DEBUGLOG(blob, "Resize blobstore\n");
    9790             : 
    9791           4 :         ctx->super->size = dev_size;
    9792           4 :         ctx->super->used_cluster_mask_len = used_cluster_mask_len;
    9793           4 :         ctx->super->crc = blob_md_page_calc_crc(ctx->super);
    9794             : 
    9795           4 :         mask_size = used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
    9796           4 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
    9797             :                                  SPDK_MALLOC_DMA);
    9798           4 :         if (!ctx->mask) {
    9799           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    9800           0 :                 return;
    9801             :         }
    9802           4 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    9803           4 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    9804           4 :         bs_sequence_read_dev(ctx->seq, ctx->mask, lba, lba_count,
    9805             :                              bs_load_grow_used_clusters_read_cpl, ctx);
    9806             : }
    9807             : 
    9808             : static void
    9809           4 : bs_grow_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9810             : {
    9811           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9812             :         int rc;
    9813             : 
    9814           4 :         rc = bs_super_validate(ctx->super, ctx->bs);
    9815           4 :         if (rc != 0) {
    9816           0 :                 bs_load_ctx_fail(ctx, rc);
    9817           0 :                 return;
    9818             :         }
    9819             : 
    9820           4 :         bs_load_try_to_grow(ctx);
    9821             : }
    9822             : 
    9823             : struct spdk_bs_grow_ctx {
    9824             :         struct spdk_blob_store          *bs;
    9825             :         struct spdk_bs_super_block      *super;
    9826             : 
    9827             :         struct spdk_bit_pool            *new_used_clusters;
    9828             :         struct spdk_bs_md_mask          *new_used_clusters_mask;
    9829             : 
    9830             :         spdk_bs_sequence_t              *seq;
    9831             : };
    9832             : 
    9833             : static void
    9834          32 : bs_grow_live_done(struct spdk_bs_grow_ctx *ctx, int bserrno)
    9835             : {
    9836          32 :         if (bserrno != 0) {
    9837           8 :                 spdk_bit_pool_free(&ctx->new_used_clusters);
    9838             :         }
    9839             : 
    9840          32 :         bs_sequence_finish(ctx->seq, bserrno);
    9841          32 :         free(ctx->new_used_clusters_mask);
    9842          32 :         spdk_free(ctx->super);
    9843          32 :         free(ctx);
    9844          32 : }
    9845             : 
    9846             : static void
    9847           8 : bs_grow_live_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9848             : {
    9849           8 :         struct spdk_bs_grow_ctx *ctx = cb_arg;
    9850           8 :         struct spdk_blob_store *bs = ctx->bs;
    9851             :         uint64_t total_clusters;
    9852             : 
    9853           8 :         if (bserrno != 0) {
    9854           0 :                 bs_grow_live_done(ctx, bserrno);
    9855           0 :                 return;
    9856             :         }
    9857             : 
    9858             :         /*
    9859             :          * Blobstore is not clean until unload, for now only the super block is up to date.
    9860             :          * This is similar to state right after blobstore init, when bs_write_used_md() didn't
    9861             :          * yet execute.
    9862             :          * When cleanly unloaded, the used md pages will be written out.
    9863             :          * In case of unclean shutdown, loading blobstore will go through recovery path correctly
    9864             :          * filling out the used_clusters with new size and writing it out.
    9865             :          */
    9866           8 :         bs->clean = 0;
    9867             : 
    9868             :         /* Reverting the super->size past this point is complex, avoid any error paths
    9869             :          * that require to do so. */
    9870           8 :         spdk_spin_lock(&bs->used_lock);
    9871             : 
    9872           8 :         total_clusters = ctx->super->size / ctx->super->cluster_size;
    9873             : 
    9874           8 :         assert(total_clusters >= spdk_bit_pool_capacity(bs->used_clusters));
    9875           8 :         spdk_bit_pool_store_mask(bs->used_clusters, ctx->new_used_clusters_mask);
    9876             : 
    9877           8 :         assert(total_clusters == spdk_bit_pool_capacity(ctx->new_used_clusters));
    9878           8 :         spdk_bit_pool_load_mask(ctx->new_used_clusters, ctx->new_used_clusters_mask);
    9879             : 
    9880           8 :         spdk_bit_pool_free(&bs->used_clusters);
    9881           8 :         bs->used_clusters = ctx->new_used_clusters;
    9882             : 
    9883           8 :         bs->total_clusters = total_clusters;
    9884          16 :         bs->total_data_clusters = bs->total_clusters - spdk_divide_round_up(
    9885           8 :                                           bs->md_start + bs->md_len, bs->pages_per_cluster);
    9886             : 
    9887           8 :         bs->num_free_clusters = spdk_bit_pool_count_free(bs->used_clusters);
    9888           8 :         assert(ctx->bs->num_free_clusters <= ctx->bs->total_clusters);
    9889           8 :         spdk_spin_unlock(&bs->used_lock);
    9890             : 
    9891           8 :         bs_grow_live_done(ctx, 0);
    9892             : }
    9893             : 
    9894             : static void
    9895          32 : bs_grow_live_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9896             : {
    9897          32 :         struct spdk_bs_grow_ctx *ctx = cb_arg;
    9898             :         uint64_t dev_size, total_clusters, used_cluster_mask_len, max_used_cluster_mask;
    9899             :         int rc;
    9900             : 
    9901          32 :         if (bserrno != 0) {
    9902           0 :                 bs_grow_live_done(ctx, bserrno);
    9903           0 :                 return;
    9904             :         }
    9905             : 
    9906          32 :         rc = bs_super_validate(ctx->super, ctx->bs);
    9907          32 :         if (rc != 0) {
    9908           4 :                 bs_grow_live_done(ctx, rc);
    9909           4 :                 return;
    9910             :         }
    9911             : 
    9912          28 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9913          28 :         total_clusters = dev_size / ctx->super->cluster_size;
    9914          28 :         used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    9915          28 :                                 spdk_divide_round_up(total_clusters, 8),
    9916             :                                 SPDK_BS_PAGE_SIZE);
    9917          28 :         max_used_cluster_mask = ctx->super->used_blobid_mask_start - ctx->super->used_cluster_mask_start;
    9918             :         /* Only checking dev_size. Since it can change, but total_clusters remain the same. */
    9919          28 :         if (dev_size == ctx->super->size) {
    9920          16 :                 SPDK_DEBUGLOG(blob, "No need to grow blobstore\n");
    9921          16 :                 bs_grow_live_done(ctx, 0);
    9922          16 :                 return;
    9923             :         }
    9924             :         /*
    9925             :          * Blobstore cannot be shrunk, so check before if:
    9926             :          * - new size of the device is smaller than size in super_block
    9927             :          * - new total number of clusters is smaller than used_clusters bit_pool
    9928             :          * - there is enough space in metadata for used_cluster_mask to be written out
    9929             :          */
    9930          12 :         if (dev_size < ctx->super->size ||
    9931          12 :             total_clusters < spdk_bit_pool_capacity(ctx->bs->used_clusters) ||
    9932             :             used_cluster_mask_len > max_used_cluster_mask) {
    9933           4 :                 SPDK_DEBUGLOG(blob, "No space to grow blobstore\n");
    9934           4 :                 bs_grow_live_done(ctx, -ENOSPC);
    9935           4 :                 return;
    9936             :         }
    9937             : 
    9938           8 :         SPDK_DEBUGLOG(blob, "Resizing blobstore\n");
    9939             : 
    9940           8 :         ctx->new_used_clusters_mask = calloc(1, total_clusters);
    9941           8 :         if (!ctx->new_used_clusters_mask) {
    9942           0 :                 bs_grow_live_done(ctx, -ENOMEM);
    9943           0 :                 return;
    9944             :         }
    9945           8 :         ctx->new_used_clusters = spdk_bit_pool_create(total_clusters);
    9946           8 :         if (!ctx->new_used_clusters) {
    9947           0 :                 bs_grow_live_done(ctx, -ENOMEM);
    9948           0 :                 return;
    9949             :         }
    9950             : 
    9951           8 :         ctx->super->clean = 0;
    9952           8 :         ctx->super->size = dev_size;
    9953           8 :         ctx->super->used_cluster_mask_len = used_cluster_mask_len;
    9954           8 :         bs_write_super(seq, ctx->bs, ctx->super, bs_grow_live_super_write_cpl, ctx);
    9955             : }
    9956             : 
    9957             : void
    9958          32 : spdk_bs_grow_live(struct spdk_blob_store *bs,
    9959             :                   spdk_bs_op_complete cb_fn, void *cb_arg)
    9960             : {
    9961          32 :         struct spdk_bs_cpl      cpl;
    9962             :         struct spdk_bs_grow_ctx *ctx;
    9963             : 
    9964          32 :         assert(spdk_get_thread() == bs->md_thread);
    9965             : 
    9966          32 :         SPDK_DEBUGLOG(blob, "Growing blobstore on dev %p\n", bs->dev);
    9967             : 
    9968          32 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    9969          32 :         cpl.u.bs_basic.cb_fn = cb_fn;
    9970          32 :         cpl.u.bs_basic.cb_arg = cb_arg;
    9971             : 
    9972          32 :         ctx = calloc(1, sizeof(struct spdk_bs_grow_ctx));
    9973          32 :         if (!ctx) {
    9974           0 :                 cb_fn(cb_arg, -ENOMEM);
    9975           0 :                 return;
    9976             :         }
    9977          32 :         ctx->bs = bs;
    9978             : 
    9979          32 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    9980             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    9981          32 :         if (!ctx->super) {
    9982           0 :                 free(ctx);
    9983           0 :                 cb_fn(cb_arg, -ENOMEM);
    9984           0 :                 return;
    9985             :         }
    9986             : 
    9987          32 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    9988          32 :         if (!ctx->seq) {
    9989           0 :                 spdk_free(ctx->super);
    9990           0 :                 free(ctx);
    9991           0 :                 cb_fn(cb_arg, -ENOMEM);
    9992           0 :                 return;
    9993             :         }
    9994             : 
    9995             :         /* Read the super block */
    9996          32 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    9997          32 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    9998             :                              bs_grow_live_load_super_cpl, ctx);
    9999             : }
   10000             : 
   10001             : void
   10002           4 : spdk_bs_grow(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
   10003             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
   10004             : {
   10005           4 :         struct spdk_blob_store  *bs;
   10006           4 :         struct spdk_bs_cpl      cpl;
   10007           4 :         struct spdk_bs_load_ctx *ctx;
   10008           4 :         struct spdk_bs_opts     opts = {};
   10009             :         int err;
   10010             : 
   10011           4 :         SPDK_DEBUGLOG(blob, "Loading blobstore from dev %p\n", dev);
   10012             : 
   10013           4 :         if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
   10014           0 :                 SPDK_DEBUGLOG(blob, "unsupported dev block length of %d\n", dev->blocklen);
   10015           0 :                 dev->destroy(dev);
   10016           0 :                 cb_fn(cb_arg, NULL, -EINVAL);
   10017           0 :                 return;
   10018             :         }
   10019             : 
   10020           4 :         spdk_bs_opts_init(&opts, sizeof(opts));
   10021           4 :         if (o) {
   10022           4 :                 if (bs_opts_copy(o, &opts)) {
   10023           0 :                         return;
   10024             :                 }
   10025             :         }
   10026             : 
   10027           4 :         if (opts.max_md_ops == 0 || opts.max_channel_ops == 0) {
   10028           0 :                 dev->destroy(dev);
   10029           0 :                 cb_fn(cb_arg, NULL, -EINVAL);
   10030           0 :                 return;
   10031             :         }
   10032             : 
   10033           4 :         err = bs_alloc(dev, &opts, &bs, &ctx);
   10034           4 :         if (err) {
   10035           0 :                 dev->destroy(dev);
   10036           0 :                 cb_fn(cb_arg, NULL, err);
   10037           0 :                 return;
   10038             :         }
   10039             : 
   10040           4 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
   10041           4 :         cpl.u.bs_handle.cb_fn = cb_fn;
   10042           4 :         cpl.u.bs_handle.cb_arg = cb_arg;
   10043           4 :         cpl.u.bs_handle.bs = bs;
   10044             : 
   10045           4 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
   10046           4 :         if (!ctx->seq) {
   10047           0 :                 spdk_free(ctx->super);
   10048           0 :                 free(ctx);
   10049           0 :                 bs_free(bs);
   10050           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
   10051           0 :                 return;
   10052             :         }
   10053             : 
   10054             :         /* Read the super block */
   10055           4 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
   10056           4 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
   10057             :                              bs_grow_load_super_cpl, ctx);
   10058             : }
   10059             : 
   10060             : int
   10061          24 : spdk_blob_get_esnap_id(struct spdk_blob *blob, const void **id, size_t *len)
   10062             : {
   10063          24 :         if (!blob_is_esnap_clone(blob)) {
   10064          12 :                 return -EINVAL;
   10065             :         }
   10066             : 
   10067          12 :         return blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, id, len, true);
   10068             : }
   10069             : 
   10070             : struct spdk_io_channel *
   10071        8840 : blob_esnap_get_io_channel(struct spdk_io_channel *ch, struct spdk_blob *blob)
   10072             : {
   10073        8840 :         struct spdk_bs_channel          *bs_channel = spdk_io_channel_get_ctx(ch);
   10074        8840 :         struct spdk_bs_dev              *bs_dev = blob->back_bs_dev;
   10075        8840 :         struct blob_esnap_channel       find = {};
   10076             :         struct blob_esnap_channel       *esnap_channel, *existing;
   10077             : 
   10078        8840 :         find.blob_id = blob->id;
   10079        8840 :         esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
   10080        8840 :         if (spdk_likely(esnap_channel != NULL)) {
   10081        8796 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": using cached channel on thread %s\n",
   10082             :                               blob->id, spdk_thread_get_name(spdk_get_thread()));
   10083        8796 :                 return esnap_channel->channel;
   10084             :         }
   10085             : 
   10086          44 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": allocating channel on thread %s\n",
   10087             :                       blob->id, spdk_thread_get_name(spdk_get_thread()));
   10088             : 
   10089          44 :         esnap_channel = calloc(1, sizeof(*esnap_channel));
   10090          44 :         if (esnap_channel == NULL) {
   10091           0 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " channel allocation failed: no memory\n",
   10092             :                                find.blob_id);
   10093           0 :                 return NULL;
   10094             :         }
   10095          44 :         esnap_channel->channel = bs_dev->create_channel(bs_dev);
   10096          44 :         if (esnap_channel->channel == NULL) {
   10097           0 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " back channel allocation failed\n", blob->id);
   10098           0 :                 free(esnap_channel);
   10099           0 :                 return NULL;
   10100             :         }
   10101          44 :         esnap_channel->blob_id = find.blob_id;
   10102          44 :         existing = RB_INSERT(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
   10103          44 :         if (spdk_unlikely(existing != NULL)) {
   10104             :                 /*
   10105             :                  * This should be unreachable: all modifications to this tree happen on this thread.
   10106             :                  */
   10107           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 "lost race to allocate a channel\n", find.blob_id);
   10108           0 :                 assert(false);
   10109             : 
   10110             :                 bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
   10111             :                 free(esnap_channel);
   10112             : 
   10113             :                 return existing->channel;
   10114             :         }
   10115             : 
   10116          44 :         return esnap_channel->channel;
   10117             : }
   10118             : 
   10119             : static int
   10120        8816 : blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2)
   10121             : {
   10122        8816 :         return (c1->blob_id < c2->blob_id ? -1 : c1->blob_id > c2->blob_id);
   10123             : }
   10124             : 
   10125             : struct blob_esnap_destroy_ctx {
   10126             :         spdk_blob_op_with_handle_complete       cb_fn;
   10127             :         void                                    *cb_arg;
   10128             :         struct spdk_blob                        *blob;
   10129             :         struct spdk_bs_dev                      *back_bs_dev;
   10130             :         bool                                    abort_io;
   10131             : };
   10132             : 
   10133             : static void
   10134         152 : blob_esnap_destroy_channels_done(struct spdk_io_channel_iter *i, int status)
   10135             : {
   10136         152 :         struct blob_esnap_destroy_ctx   *ctx = spdk_io_channel_iter_get_ctx(i);
   10137         152 :         struct spdk_blob                *blob = ctx->blob;
   10138         152 :         struct spdk_blob_store          *bs = blob->bs;
   10139             : 
   10140         152 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": done destroying channels for this blob\n",
   10141             :                       blob->id);
   10142             : 
   10143         152 :         if (ctx->cb_fn != NULL) {
   10144         136 :                 ctx->cb_fn(ctx->cb_arg, blob, status);
   10145             :         }
   10146         152 :         free(ctx);
   10147             : 
   10148         152 :         bs->esnap_channels_unloading--;
   10149         152 :         if (bs->esnap_channels_unloading == 0 && bs->esnap_unload_cb_fn != NULL) {
   10150           4 :                 spdk_bs_unload(bs, bs->esnap_unload_cb_fn, bs->esnap_unload_cb_arg);
   10151             :         }
   10152         152 : }
   10153             : 
   10154             : static void
   10155         160 : blob_esnap_destroy_one_channel(struct spdk_io_channel_iter *i)
   10156             : {
   10157         160 :         struct blob_esnap_destroy_ctx   *ctx = spdk_io_channel_iter_get_ctx(i);
   10158         160 :         struct spdk_blob                *blob = ctx->blob;
   10159         160 :         struct spdk_bs_dev              *bs_dev = ctx->back_bs_dev;
   10160         160 :         struct spdk_io_channel          *channel = spdk_io_channel_iter_get_channel(i);
   10161         160 :         struct spdk_bs_channel          *bs_channel = spdk_io_channel_get_ctx(channel);
   10162             :         struct blob_esnap_channel       *esnap_channel;
   10163         160 :         struct blob_esnap_channel       find = {};
   10164             : 
   10165         160 :         assert(spdk_get_thread() == spdk_io_channel_get_thread(channel));
   10166             : 
   10167         160 :         find.blob_id = blob->id;
   10168         160 :         esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
   10169         160 :         if (esnap_channel != NULL) {
   10170          12 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channel on thread %s\n",
   10171             :                               blob->id, spdk_thread_get_name(spdk_get_thread()));
   10172          12 :                 RB_REMOVE(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
   10173             : 
   10174          12 :                 if (ctx->abort_io) {
   10175             :                         spdk_bs_user_op_t *op, *tmp;
   10176             : 
   10177           8 :                         TAILQ_FOREACH_SAFE(op, &bs_channel->queued_io, link, tmp) {
   10178           0 :                                 if (op->back_channel == esnap_channel->channel) {
   10179           0 :                                         TAILQ_REMOVE(&bs_channel->queued_io, op, link);
   10180           0 :                                         bs_user_op_abort(op, -EIO);
   10181             :                                 }
   10182             :                         }
   10183             :                 }
   10184             : 
   10185          12 :                 bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
   10186          12 :                 free(esnap_channel);
   10187             :         }
   10188             : 
   10189         160 :         spdk_for_each_channel_continue(i, 0);
   10190         160 : }
   10191             : 
   10192             : /*
   10193             :  * Destroy the channels for a specific blob on each thread with a blobstore channel. This should be
   10194             :  * used when closing an esnap clone blob and after decoupling from the parent.
   10195             :  */
   10196             : static void
   10197         500 : blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, bool abort_io,
   10198             :                                    spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
   10199             : {
   10200             :         struct blob_esnap_destroy_ctx   *ctx;
   10201             : 
   10202         500 :         if (!blob_is_esnap_clone(blob) || blob->back_bs_dev == NULL) {
   10203         348 :                 if (cb_fn != NULL) {
   10204         348 :                         cb_fn(cb_arg, blob, 0);
   10205             :                 }
   10206         348 :                 return;
   10207             :         }
   10208             : 
   10209         152 :         ctx = calloc(1, sizeof(*ctx));
   10210         152 :         if (ctx == NULL) {
   10211           0 :                 if (cb_fn != NULL) {
   10212           0 :                         cb_fn(cb_arg, blob, -ENOMEM);
   10213             :                 }
   10214           0 :                 return;
   10215             :         }
   10216         152 :         ctx->cb_fn = cb_fn;
   10217         152 :         ctx->cb_arg = cb_arg;
   10218         152 :         ctx->blob = blob;
   10219         152 :         ctx->back_bs_dev = blob->back_bs_dev;
   10220         152 :         ctx->abort_io = abort_io;
   10221             : 
   10222         152 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channels for this blob\n",
   10223             :                       blob->id);
   10224             : 
   10225         152 :         blob->bs->esnap_channels_unloading++;
   10226         152 :         spdk_for_each_channel(blob->bs, blob_esnap_destroy_one_channel, ctx,
   10227             :                               blob_esnap_destroy_channels_done);
   10228             : }
   10229             : 
   10230             : /*
   10231             :  * Destroy all bs_dev channels on a specific blobstore channel. This should be used when a
   10232             :  * bs_channel is destroyed.
   10233             :  */
   10234             : static void
   10235        1029 : blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch)
   10236             : {
   10237             :         struct blob_esnap_channel *esnap_channel, *esnap_channel_tmp;
   10238             : 
   10239        1029 :         assert(spdk_get_thread() == spdk_io_channel_get_thread(spdk_io_channel_from_ctx(ch)));
   10240             : 
   10241        1029 :         SPDK_DEBUGLOG(blob_esnap, "destroying channels on thread %s\n",
   10242             :                       spdk_thread_get_name(spdk_get_thread()));
   10243        1061 :         RB_FOREACH_SAFE(esnap_channel, blob_esnap_channel_tree, &ch->esnap_channels,
   10244             :                         esnap_channel_tmp) {
   10245          32 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64
   10246             :                               ": destroying one channel in thread %s\n",
   10247             :                               esnap_channel->blob_id, spdk_thread_get_name(spdk_get_thread()));
   10248          32 :                 RB_REMOVE(blob_esnap_channel_tree, &ch->esnap_channels, esnap_channel);
   10249          32 :                 spdk_put_io_channel(esnap_channel->channel);
   10250          32 :                 free(esnap_channel);
   10251             :         }
   10252        1029 :         SPDK_DEBUGLOG(blob_esnap, "done destroying channels on thread %s\n",
   10253             :                       spdk_thread_get_name(spdk_get_thread()));
   10254        1029 : }
   10255             : 
   10256             : static void
   10257          28 : blob_set_back_bs_dev_done(void *_ctx, int bserrno)
   10258             : {
   10259          28 :         struct set_bs_dev_ctx   *ctx = _ctx;
   10260             : 
   10261          28 :         if (bserrno != 0) {
   10262             :                 /* Even though the unfreeze failed, the update may have succeed. */
   10263           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": unfreeze failed with error %d\n", ctx->blob->id,
   10264             :                             bserrno);
   10265             :         }
   10266          28 :         ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
   10267          28 :         free(ctx);
   10268          28 : }
   10269             : 
   10270             : static void
   10271          28 : blob_frozen_set_back_bs_dev(void *_ctx, struct spdk_blob *blob, int bserrno)
   10272             : {
   10273          28 :         struct set_bs_dev_ctx   *ctx = _ctx;
   10274             :         int rc;
   10275             : 
   10276          28 :         if (bserrno != 0) {
   10277           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to release old back_bs_dev with error %d\n",
   10278             :                             blob->id, bserrno);
   10279           0 :                 ctx->bserrno = bserrno;
   10280           0 :                 blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
   10281           0 :                 return;
   10282             :         }
   10283             : 
   10284          28 :         if (blob->back_bs_dev != NULL) {
   10285          28 :                 blob_back_bs_dev_unref(blob);
   10286             :         }
   10287             : 
   10288          28 :         if (ctx->parent_refs_cb_fn) {
   10289          20 :                 rc = ctx->parent_refs_cb_fn(blob, ctx->parent_refs_cb_arg);
   10290          20 :                 if (rc != 0) {
   10291           0 :                         ctx->bserrno = rc;
   10292           0 :                         blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
   10293           0 :                         return;
   10294             :                 }
   10295             :         }
   10296             : 
   10297          28 :         SPDK_NOTICELOG("blob 0x%" PRIx64 ": hotplugged back_bs_dev\n", blob->id);
   10298          28 :         blob->back_bs_dev = ctx->back_bs_dev;
   10299          28 :         ctx->bserrno = 0;
   10300             : 
   10301          28 :         blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
   10302             : }
   10303             : 
   10304             : static void
   10305          28 : blob_set_back_bs_dev_frozen(void *_ctx, int bserrno)
   10306             : {
   10307          28 :         struct set_bs_dev_ctx   *ctx = _ctx;
   10308          28 :         struct spdk_blob        *blob = ctx->blob;
   10309             : 
   10310          28 :         if (bserrno != 0) {
   10311           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to freeze with error %d\n", blob->id,
   10312             :                             bserrno);
   10313           0 :                 ctx->cb_fn(ctx->cb_arg, bserrno);
   10314           0 :                 free(ctx);
   10315           0 :                 return;
   10316             :         }
   10317             : 
   10318             :         /*
   10319             :          * This does not prevent future reads from the esnap device because any future IO will
   10320             :          * lazily create a new esnap IO channel.
   10321             :          */
   10322          28 :         blob_esnap_destroy_bs_dev_channels(blob, true, blob_frozen_set_back_bs_dev, ctx);
   10323             : }
   10324             : 
   10325             : void
   10326           8 : spdk_blob_set_esnap_bs_dev(struct spdk_blob *blob, struct spdk_bs_dev *back_bs_dev,
   10327             :                            spdk_blob_op_complete cb_fn, void *cb_arg)
   10328             : {
   10329           8 :         if (!blob_is_esnap_clone(blob)) {
   10330           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": not an esnap clone\n", blob->id);
   10331           0 :                 cb_fn(cb_arg, -EINVAL);
   10332           0 :                 return;
   10333             :         }
   10334             : 
   10335           8 :         blob_set_back_bs_dev(blob, back_bs_dev, NULL, NULL, cb_fn, cb_arg);
   10336             : }
   10337             : 
   10338             : struct spdk_bs_dev *
   10339           4 : spdk_blob_get_esnap_bs_dev(const struct spdk_blob *blob)
   10340             : {
   10341           4 :         if (!blob_is_esnap_clone(blob)) {
   10342           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": not an esnap clone\n", blob->id);
   10343           0 :                 return NULL;
   10344             :         }
   10345             : 
   10346           4 :         return blob->back_bs_dev;
   10347             : }
   10348             : 
   10349             : bool
   10350          28 : spdk_blob_is_degraded(const struct spdk_blob *blob)
   10351             : {
   10352          28 :         if (blob->bs->dev->is_degraded != NULL && blob->bs->dev->is_degraded(blob->bs->dev)) {
   10353           4 :                 return true;
   10354             :         }
   10355          24 :         if (blob->back_bs_dev == NULL || blob->back_bs_dev->is_degraded == NULL) {
   10356          12 :                 return false;
   10357             :         }
   10358             : 
   10359          12 :         return blob->back_bs_dev->is_degraded(blob->back_bs_dev);
   10360             : }
   10361             : 
   10362           3 : SPDK_LOG_REGISTER_COMPONENT(blob)
   10363           3 : SPDK_LOG_REGISTER_COMPONENT(blob_esnap)

Generated by: LCOV version 1.15