LCOV - code coverage report
Current view: top level - lib/blob - blobstore.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 3697 4615 80.1 %
Date: 2024-07-14 05:23:08 Functions: 305 326 93.6 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2017 Intel Corporation.
       3             :  *   All rights reserved.
       4             :  *   Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
       5             :  */
       6             : 
       7             : #include "spdk/stdinc.h"
       8             : 
       9             : #include "spdk/blob.h"
      10             : #include "spdk/crc32.h"
      11             : #include "spdk/env.h"
      12             : #include "spdk/queue.h"
      13             : #include "spdk/thread.h"
      14             : #include "spdk/bit_array.h"
      15             : #include "spdk/bit_pool.h"
      16             : #include "spdk/likely.h"
      17             : #include "spdk/util.h"
      18             : #include "spdk/string.h"
      19             : 
      20             : #include "spdk_internal/assert.h"
      21             : #include "spdk/log.h"
      22             : 
      23             : #include "blobstore.h"
      24             : 
      25             : #define BLOB_CRC32C_INITIAL    0xffffffffUL
      26             : 
      27             : static int bs_register_md_thread(struct spdk_blob_store *bs);
      28             : static int bs_unregister_md_thread(struct spdk_blob_store *bs);
      29             : static void blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
      30             : static void blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
      31             :                 uint64_t cluster, uint32_t extent, struct spdk_blob_md_page *page,
      32             :                 spdk_blob_op_complete cb_fn, void *cb_arg);
      33             : 
      34             : static int blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
      35             :                           uint16_t value_len, bool internal);
      36             : static int blob_get_xattr_value(struct spdk_blob *blob, const char *name,
      37             :                                 const void **value, size_t *value_len, bool internal);
      38             : static int blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal);
      39             : 
      40             : static void blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
      41             :                                    struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);
      42             : 
      43             : /*
      44             :  * External snapshots require a channel per thread per esnap bdev.  The tree
      45             :  * is populated lazily as blob IOs are handled by the back_bs_dev. When this
      46             :  * channel is destroyed, all the channels in the tree are destroyed.
      47             :  */
      48             : 
      49             : struct blob_esnap_channel {
      50             :         RB_ENTRY(blob_esnap_channel)    node;
      51             :         spdk_blob_id                    blob_id;
      52             :         struct spdk_io_channel          *channel;
      53             : };
      54             : 
      55             : static int blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2);
      56             : static void blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, bool abort_io,
      57             :                 spdk_blob_op_with_handle_complete cb_fn, void *cb_arg);
      58             : static void blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch);
      59        6005 : RB_GENERATE_STATIC(blob_esnap_channel_tree, blob_esnap_channel, node, blob_esnap_channel_compare)
      60             : 
      61             : static inline bool
      62       36034 : blob_is_esnap_clone(const struct spdk_blob *blob)
      63             : {
      64       36034 :         assert(blob != NULL);
      65       36034 :         return !!(blob->invalid_flags & SPDK_BLOB_EXTERNAL_SNAPSHOT);
      66             : }
      67             : 
      68             : static int
      69        1809 : blob_id_cmp(struct spdk_blob *blob1, struct spdk_blob *blob2)
      70             : {
      71        1809 :         assert(blob1 != NULL && blob2 != NULL);
      72        1809 :         return (blob1->id < blob2->id ? -1 : blob1->id > blob2->id);
      73             : }
      74             : 
      75       13877 : RB_GENERATE_STATIC(spdk_blob_tree, spdk_blob, link, blob_id_cmp);
      76             : 
      77             : static void
      78       34483 : blob_verify_md_op(struct spdk_blob *blob)
      79             : {
      80       34483 :         assert(blob != NULL);
      81       34483 :         assert(spdk_get_thread() == blob->bs->md_thread);
      82       34483 :         assert(blob->state != SPDK_BLOB_STATE_LOADING);
      83       34483 : }
      84             : 
      85             : static struct spdk_blob_list *
      86        3596 : bs_get_snapshot_entry(struct spdk_blob_store *bs, spdk_blob_id blobid)
      87             : {
      88        3596 :         struct spdk_blob_list *snapshot_entry = NULL;
      89             : 
      90        4316 :         TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
      91        1404 :                 if (snapshot_entry->id == blobid) {
      92         684 :                         break;
      93             :                 }
      94             :         }
      95             : 
      96        3596 :         return snapshot_entry;
      97             : }
      98             : 
      99             : static void
     100        2798 : bs_claim_md_page(struct spdk_blob_store *bs, uint32_t page)
     101             : {
     102        2798 :         assert(spdk_spin_held(&bs->used_lock));
     103        2798 :         assert(page < spdk_bit_array_capacity(bs->used_md_pages));
     104        2798 :         assert(spdk_bit_array_get(bs->used_md_pages, page) == false);
     105             : 
     106        2798 :         spdk_bit_array_set(bs->used_md_pages, page);
     107        2798 : }
     108             : 
     109             : static void
     110        2122 : bs_release_md_page(struct spdk_blob_store *bs, uint32_t page)
     111             : {
     112        2122 :         assert(spdk_spin_held(&bs->used_lock));
     113        2122 :         assert(page < spdk_bit_array_capacity(bs->used_md_pages));
     114        2122 :         assert(spdk_bit_array_get(bs->used_md_pages, page) == true);
     115             : 
     116        2122 :         spdk_bit_array_clear(bs->used_md_pages, page);
     117        2122 : }
     118             : 
     119             : static uint32_t
     120        7976 : bs_claim_cluster(struct spdk_blob_store *bs)
     121             : {
     122             :         uint32_t cluster_num;
     123             : 
     124        7976 :         assert(spdk_spin_held(&bs->used_lock));
     125             : 
     126        7976 :         cluster_num = spdk_bit_pool_allocate_bit(bs->used_clusters);
     127        7976 :         if (cluster_num == UINT32_MAX) {
     128           0 :                 return UINT32_MAX;
     129             :         }
     130             : 
     131        7976 :         SPDK_DEBUGLOG(blob, "Claiming cluster %u\n", cluster_num);
     132        7976 :         bs->num_free_clusters--;
     133             : 
     134        7976 :         return cluster_num;
     135             : }
     136             : 
     137             : static void
     138        2315 : bs_release_cluster(struct spdk_blob_store *bs, uint32_t cluster_num)
     139             : {
     140        2315 :         assert(spdk_spin_held(&bs->used_lock));
     141        2315 :         assert(cluster_num < spdk_bit_pool_capacity(bs->used_clusters));
     142        2315 :         assert(spdk_bit_pool_is_allocated(bs->used_clusters, cluster_num) == true);
     143        2315 :         assert(bs->num_free_clusters < bs->total_clusters);
     144             : 
     145        2315 :         SPDK_DEBUGLOG(blob, "Releasing cluster %u\n", cluster_num);
     146             : 
     147        2315 :         spdk_bit_pool_free_bit(bs->used_clusters, cluster_num);
     148        2315 :         bs->num_free_clusters++;
     149        2315 : }
     150             : 
     151             : static int
     152        7976 : blob_insert_cluster(struct spdk_blob *blob, uint32_t cluster_num, uint64_t cluster)
     153             : {
     154        7976 :         uint64_t *cluster_lba = &blob->active.clusters[cluster_num];
     155             : 
     156        7976 :         blob_verify_md_op(blob);
     157             : 
     158        7976 :         if (*cluster_lba != 0) {
     159           4 :                 return -EEXIST;
     160             :         }
     161             : 
     162        7972 :         *cluster_lba = bs_cluster_to_lba(blob->bs, cluster);
     163        7972 :         return 0;
     164             : }
     165             : 
     166             : static int
     167        7976 : bs_allocate_cluster(struct spdk_blob *blob, uint32_t cluster_num,
     168             :                     uint64_t *cluster, uint32_t *lowest_free_md_page, bool update_map)
     169             : {
     170        7976 :         uint32_t *extent_page = 0;
     171             : 
     172        7976 :         assert(spdk_spin_held(&blob->bs->used_lock));
     173             : 
     174        7976 :         *cluster = bs_claim_cluster(blob->bs);
     175        7976 :         if (*cluster == UINT32_MAX) {
     176             :                 /* No more free clusters. Cannot satisfy the request */
     177           0 :                 return -ENOSPC;
     178             :         }
     179             : 
     180        7976 :         if (blob->use_extent_table) {
     181        4046 :                 extent_page = bs_cluster_to_extent_page(blob, cluster_num);
     182        4046 :                 if (*extent_page == 0) {
     183             :                         /* Extent page shall never occupy md_page so start the search from 1 */
     184         714 :                         if (*lowest_free_md_page == 0) {
     185         712 :                                 *lowest_free_md_page = 1;
     186             :                         }
     187             :                         /* No extent_page is allocated for the cluster */
     188         714 :                         *lowest_free_md_page = spdk_bit_array_find_first_clear(blob->bs->used_md_pages,
     189             :                                                *lowest_free_md_page);
     190         714 :                         if (*lowest_free_md_page == UINT32_MAX) {
     191             :                                 /* No more free md pages. Cannot satisfy the request */
     192           0 :                                 bs_release_cluster(blob->bs, *cluster);
     193           0 :                                 return -ENOSPC;
     194             :                         }
     195         714 :                         bs_claim_md_page(blob->bs, *lowest_free_md_page);
     196             :                 }
     197             :         }
     198             : 
     199        7976 :         SPDK_DEBUGLOG(blob, "Claiming cluster %" PRIu64 " for blob 0x%" PRIx64 "\n", *cluster,
     200             :                       blob->id);
     201             : 
     202        7976 :         if (update_map) {
     203        7364 :                 blob_insert_cluster(blob, cluster_num, *cluster);
     204        7364 :                 if (blob->use_extent_table && *extent_page == 0) {
     205         642 :                         *extent_page = *lowest_free_md_page;
     206             :                 }
     207             :         }
     208             : 
     209        7976 :         return 0;
     210             : }
     211             : 
     212             : static void
     213        5306 : blob_xattrs_init(struct spdk_blob_xattr_opts *xattrs)
     214             : {
     215        5306 :         xattrs->count = 0;
     216        5306 :         xattrs->names = NULL;
     217        5306 :         xattrs->ctx = NULL;
     218        5306 :         xattrs->get_value = NULL;
     219        5306 : }
     220             : 
     221             : void
     222        3504 : spdk_blob_opts_init(struct spdk_blob_opts *opts, size_t opts_size)
     223             : {
     224        3504 :         if (!opts) {
     225           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
     226           0 :                 return;
     227             :         }
     228             : 
     229        3504 :         if (!opts_size) {
     230           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
     231           0 :                 return;
     232             :         }
     233             : 
     234        3504 :         memset(opts, 0, opts_size);
     235        3504 :         opts->opts_size = opts_size;
     236             : 
     237             : #define FIELD_OK(field) \
     238             :         offsetof(struct spdk_blob_opts, field) + sizeof(opts->field) <= opts_size
     239             : 
     240             : #define SET_FIELD(field, value) \
     241             :         if (FIELD_OK(field)) { \
     242             :                 opts->field = value; \
     243             :         } \
     244             : 
     245        3504 :         SET_FIELD(num_clusters, 0);
     246        3504 :         SET_FIELD(thin_provision, false);
     247        3504 :         SET_FIELD(clear_method, BLOB_CLEAR_WITH_DEFAULT);
     248             : 
     249        3504 :         if (FIELD_OK(xattrs)) {
     250        3504 :                 blob_xattrs_init(&opts->xattrs);
     251             :         }
     252             : 
     253        3504 :         SET_FIELD(use_extent_table, true);
     254             : 
     255             : #undef FIELD_OK
     256             : #undef SET_FIELD
     257             : }
     258             : 
     259             : void
     260        3282 : spdk_blob_open_opts_init(struct spdk_blob_open_opts *opts, size_t opts_size)
     261             : {
     262        3282 :         if (!opts) {
     263           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
     264           0 :                 return;
     265             :         }
     266             : 
     267        3282 :         if (!opts_size) {
     268           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
     269           0 :                 return;
     270             :         }
     271             : 
     272        3282 :         memset(opts, 0, opts_size);
     273        3282 :         opts->opts_size = opts_size;
     274             : 
     275             : #define FIELD_OK(field) \
     276             :         offsetof(struct spdk_blob_open_opts, field) + sizeof(opts->field) <= opts_size
     277             : 
     278             : #define SET_FIELD(field, value) \
     279             :         if (FIELD_OK(field)) { \
     280             :                 opts->field = value; \
     281             :         } \
     282             : 
     283        3282 :         SET_FIELD(clear_method, BLOB_CLEAR_WITH_DEFAULT);
     284             : 
     285             : #undef FIELD_OK
     286             : #undef SET_FILED
     287             : }
     288             : 
     289             : static struct spdk_blob *
     290        5080 : blob_alloc(struct spdk_blob_store *bs, spdk_blob_id id)
     291             : {
     292             :         struct spdk_blob *blob;
     293             : 
     294        5080 :         blob = calloc(1, sizeof(*blob));
     295        5080 :         if (!blob) {
     296           0 :                 return NULL;
     297             :         }
     298             : 
     299        5080 :         blob->id = id;
     300        5080 :         blob->bs = bs;
     301             : 
     302        5080 :         blob->parent_id = SPDK_BLOBID_INVALID;
     303             : 
     304        5080 :         blob->state = SPDK_BLOB_STATE_DIRTY;
     305        5080 :         blob->extent_rle_found = false;
     306        5080 :         blob->extent_table_found = false;
     307        5080 :         blob->active.num_pages = 1;
     308        5080 :         blob->active.pages = calloc(1, sizeof(*blob->active.pages));
     309        5080 :         if (!blob->active.pages) {
     310           0 :                 free(blob);
     311           0 :                 return NULL;
     312             :         }
     313             : 
     314        5080 :         blob->active.pages[0] = bs_blobid_to_page(id);
     315             : 
     316        5080 :         TAILQ_INIT(&blob->xattrs);
     317        5080 :         TAILQ_INIT(&blob->xattrs_internal);
     318        5080 :         TAILQ_INIT(&blob->pending_persists);
     319        5080 :         TAILQ_INIT(&blob->persists_to_complete);
     320             : 
     321        5080 :         return blob;
     322             : }
     323             : 
     324             : static void
     325       10160 : xattrs_free(struct spdk_xattr_tailq *xattrs)
     326             : {
     327             :         struct spdk_xattr       *xattr, *xattr_tmp;
     328             : 
     329       11762 :         TAILQ_FOREACH_SAFE(xattr, xattrs, link, xattr_tmp) {
     330        1602 :                 TAILQ_REMOVE(xattrs, xattr, link);
     331        1602 :                 free(xattr->name);
     332        1602 :                 free(xattr->value);
     333        1602 :                 free(xattr);
     334             :         }
     335       10160 : }
     336             : 
     337             : static void
     338        5080 : blob_free(struct spdk_blob *blob)
     339             : {
     340        5080 :         assert(blob != NULL);
     341        5080 :         assert(TAILQ_EMPTY(&blob->pending_persists));
     342        5080 :         assert(TAILQ_EMPTY(&blob->persists_to_complete));
     343             : 
     344        5080 :         free(blob->active.extent_pages);
     345        5080 :         free(blob->clean.extent_pages);
     346        5080 :         free(blob->active.clusters);
     347        5080 :         free(blob->clean.clusters);
     348        5080 :         free(blob->active.pages);
     349        5080 :         free(blob->clean.pages);
     350             : 
     351        5080 :         xattrs_free(&blob->xattrs);
     352        5080 :         xattrs_free(&blob->xattrs_internal);
     353             : 
     354        5080 :         if (blob->back_bs_dev) {
     355         944 :                 blob->back_bs_dev->destroy(blob->back_bs_dev);
     356             :         }
     357             : 
     358        5080 :         free(blob);
     359        5080 : }
     360             : 
     361             : static void
     362         288 : blob_back_bs_destroy_esnap_done(void *ctx, struct spdk_blob *blob, int bserrno)
     363             : {
     364         288 :         struct spdk_bs_dev      *bs_dev = ctx;
     365             : 
     366         288 :         if (bserrno != 0) {
     367             :                 /*
     368             :                  * This is probably due to a memory allocation failure when creating the
     369             :                  * blob_esnap_destroy_ctx before iterating threads.
     370             :                  */
     371           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": Unable to destroy bs dev channels: error %d\n",
     372             :                             blob->id, bserrno);
     373           0 :                 assert(false);
     374             :         }
     375             : 
     376         288 :         if (bs_dev == NULL) {
     377             :                 /*
     378             :                  * This check exists to make scanbuild happy.
     379             :                  *
     380             :                  * blob->back_bs_dev for an esnap is NULL during the first iteration of blobs while
     381             :                  * the blobstore is being loaded. It could also be NULL if there was an error
     382             :                  * opening the esnap device. In each of these cases, no channels could have been
     383             :                  * created because back_bs_dev->create_channel() would have led to a NULL pointer
     384             :                  * deref.
     385             :                  */
     386           0 :                 assert(false);
     387             :                 return;
     388             :         }
     389             : 
     390         288 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": calling destroy on back_bs_dev\n", blob->id);
     391         288 :         bs_dev->destroy(bs_dev);
     392             : }
     393             : 
     394             : static void
     395         288 : blob_back_bs_destroy(struct spdk_blob *blob)
     396             : {
     397         288 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": preparing to destroy back_bs_dev\n",
     398             :                       blob->id);
     399             : 
     400         288 :         blob_esnap_destroy_bs_dev_channels(blob, false, blob_back_bs_destroy_esnap_done,
     401         288 :                                            blob->back_bs_dev);
     402         288 :         blob->back_bs_dev = NULL;
     403         288 : }
     404             : 
     405             : struct freeze_io_ctx {
     406             :         struct spdk_bs_cpl cpl;
     407             :         struct spdk_blob *blob;
     408             : };
     409             : 
     410             : static void
     411         458 : blob_io_sync(struct spdk_io_channel_iter *i)
     412             : {
     413         458 :         spdk_for_each_channel_continue(i, 0);
     414         458 : }
     415             : 
     416             : static void
     417         446 : blob_execute_queued_io(struct spdk_io_channel_iter *i)
     418             : {
     419         446 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
     420         446 :         struct spdk_bs_channel *ch = spdk_io_channel_get_ctx(_ch);
     421         446 :         struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
     422             :         struct spdk_bs_request_set      *set;
     423             :         struct spdk_bs_user_op_args     *args;
     424             :         spdk_bs_user_op_t *op, *tmp;
     425             : 
     426         450 :         TAILQ_FOREACH_SAFE(op, &ch->queued_io, link, tmp) {
     427           4 :                 set = (struct spdk_bs_request_set *)op;
     428           4 :                 args = &set->u.user_op;
     429             : 
     430           4 :                 if (args->blob == ctx->blob) {
     431           4 :                         TAILQ_REMOVE(&ch->queued_io, op, link);
     432           4 :                         bs_user_op_execute(op);
     433             :                 }
     434             :         }
     435             : 
     436         446 :         spdk_for_each_channel_continue(i, 0);
     437         446 : }
     438             : 
     439             : static void
     440         872 : blob_io_cpl(struct spdk_io_channel_iter *i, int status)
     441             : {
     442         872 :         struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
     443             : 
     444         872 :         ctx->cpl.u.blob_basic.cb_fn(ctx->cpl.u.blob_basic.cb_arg, 0);
     445             : 
     446         872 :         free(ctx);
     447         872 : }
     448             : 
     449             : static void
     450         442 : blob_freeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
     451             : {
     452             :         struct freeze_io_ctx *ctx;
     453             : 
     454         442 :         blob_verify_md_op(blob);
     455             : 
     456         442 :         ctx = calloc(1, sizeof(*ctx));
     457         442 :         if (!ctx) {
     458           0 :                 cb_fn(cb_arg, -ENOMEM);
     459           0 :                 return;
     460             :         }
     461             : 
     462         442 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
     463         442 :         ctx->cpl.u.blob_basic.cb_fn = cb_fn;
     464         442 :         ctx->cpl.u.blob_basic.cb_arg = cb_arg;
     465         442 :         ctx->blob = blob;
     466             : 
     467             :         /* Freeze I/O on blob */
     468         442 :         blob->frozen_refcnt++;
     469             : 
     470         442 :         spdk_for_each_channel(blob->bs, blob_io_sync, ctx, blob_io_cpl);
     471             : }
     472             : 
     473             : static void
     474         430 : blob_unfreeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
     475             : {
     476             :         struct freeze_io_ctx *ctx;
     477             : 
     478         430 :         blob_verify_md_op(blob);
     479             : 
     480         430 :         ctx = calloc(1, sizeof(*ctx));
     481         430 :         if (!ctx) {
     482           0 :                 cb_fn(cb_arg, -ENOMEM);
     483           0 :                 return;
     484             :         }
     485             : 
     486         430 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
     487         430 :         ctx->cpl.u.blob_basic.cb_fn = cb_fn;
     488         430 :         ctx->cpl.u.blob_basic.cb_arg = cb_arg;
     489         430 :         ctx->blob = blob;
     490             : 
     491         430 :         assert(blob->frozen_refcnt > 0);
     492             : 
     493         430 :         blob->frozen_refcnt--;
     494             : 
     495         430 :         spdk_for_each_channel(blob->bs, blob_execute_queued_io, ctx, blob_io_cpl);
     496             : }
     497             : 
     498             : static int
     499        7838 : blob_mark_clean(struct spdk_blob *blob)
     500             : {
     501        7838 :         uint32_t *extent_pages = NULL;
     502        7838 :         uint64_t *clusters = NULL;
     503        7838 :         uint32_t *pages = NULL;
     504             : 
     505        7838 :         assert(blob != NULL);
     506             : 
     507        7838 :         if (blob->active.num_extent_pages) {
     508        2731 :                 assert(blob->active.extent_pages);
     509        2731 :                 extent_pages = calloc(blob->active.num_extent_pages, sizeof(*blob->active.extent_pages));
     510        2731 :                 if (!extent_pages) {
     511           0 :                         return -ENOMEM;
     512             :                 }
     513        2731 :                 memcpy(extent_pages, blob->active.extent_pages,
     514        2731 :                        blob->active.num_extent_pages * sizeof(*extent_pages));
     515             :         }
     516             : 
     517        7838 :         if (blob->active.num_clusters) {
     518        5574 :                 assert(blob->active.clusters);
     519        5574 :                 clusters = calloc(blob->active.num_clusters, sizeof(*blob->active.clusters));
     520        5574 :                 if (!clusters) {
     521           0 :                         free(extent_pages);
     522           0 :                         return -ENOMEM;
     523             :                 }
     524        5574 :                 memcpy(clusters, blob->active.clusters, blob->active.num_clusters * sizeof(*blob->active.clusters));
     525             :         }
     526             : 
     527        7838 :         if (blob->active.num_pages) {
     528        6422 :                 assert(blob->active.pages);
     529        6422 :                 pages = calloc(blob->active.num_pages, sizeof(*blob->active.pages));
     530        6422 :                 if (!pages) {
     531           0 :                         free(extent_pages);
     532           0 :                         free(clusters);
     533           0 :                         return -ENOMEM;
     534             :                 }
     535        6422 :                 memcpy(pages, blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
     536             :         }
     537             : 
     538        7838 :         free(blob->clean.extent_pages);
     539        7838 :         free(blob->clean.clusters);
     540        7838 :         free(blob->clean.pages);
     541             : 
     542        7838 :         blob->clean.num_extent_pages = blob->active.num_extent_pages;
     543        7838 :         blob->clean.extent_pages = blob->active.extent_pages;
     544        7838 :         blob->clean.num_clusters = blob->active.num_clusters;
     545        7838 :         blob->clean.clusters = blob->active.clusters;
     546        7838 :         blob->clean.num_pages = blob->active.num_pages;
     547        7838 :         blob->clean.pages = blob->active.pages;
     548             : 
     549        7838 :         blob->active.extent_pages = extent_pages;
     550        7838 :         blob->active.clusters = clusters;
     551        7838 :         blob->active.pages = pages;
     552             : 
     553             :         /* If the metadata was dirtied again while the metadata was being written to disk,
     554             :          *  we do not want to revert the DIRTY state back to CLEAN here.
     555             :          */
     556        7838 :         if (blob->state == SPDK_BLOB_STATE_LOADING) {
     557        3214 :                 blob->state = SPDK_BLOB_STATE_CLEAN;
     558             :         }
     559             : 
     560        7838 :         return 0;
     561             : }
     562             : 
     563             : static int
     564        1176 : blob_deserialize_xattr(struct spdk_blob *blob,
     565             :                        struct spdk_blob_md_descriptor_xattr *desc_xattr, bool internal)
     566             : {
     567             :         struct spdk_xattr                       *xattr;
     568             : 
     569        1176 :         if (desc_xattr->length != sizeof(desc_xattr->name_length) +
     570             :             sizeof(desc_xattr->value_length) +
     571        1176 :             desc_xattr->name_length + desc_xattr->value_length) {
     572           0 :                 return -EINVAL;
     573             :         }
     574             : 
     575        1176 :         xattr = calloc(1, sizeof(*xattr));
     576        1176 :         if (xattr == NULL) {
     577           0 :                 return -ENOMEM;
     578             :         }
     579             : 
     580        1176 :         xattr->name = malloc(desc_xattr->name_length + 1);
     581        1176 :         if (xattr->name == NULL) {
     582           0 :                 free(xattr);
     583           0 :                 return -ENOMEM;
     584             :         }
     585             : 
     586        1176 :         xattr->value = malloc(desc_xattr->value_length);
     587        1176 :         if (xattr->value == NULL) {
     588           0 :                 free(xattr->name);
     589           0 :                 free(xattr);
     590           0 :                 return -ENOMEM;
     591             :         }
     592             : 
     593        1176 :         memcpy(xattr->name, desc_xattr->name, desc_xattr->name_length);
     594        1176 :         xattr->name[desc_xattr->name_length] = '\0';
     595        1176 :         xattr->value_len = desc_xattr->value_length;
     596        1176 :         memcpy(xattr->value,
     597        1176 :                (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
     598        1176 :                desc_xattr->value_length);
     599             : 
     600        1176 :         TAILQ_INSERT_TAIL(internal ? &blob->xattrs_internal : &blob->xattrs, xattr, link);
     601             : 
     602        1176 :         return 0;
     603             : }
     604             : 
     605             : 
     606             : static int
     607        4380 : blob_parse_page(const struct spdk_blob_md_page *page, struct spdk_blob *blob)
     608             : {
     609             :         struct spdk_blob_md_descriptor *desc;
     610        4380 :         size_t  cur_desc = 0;
     611             :         void *tmp;
     612             : 
     613        4380 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
     614       12804 :         while (cur_desc < sizeof(page->descriptors)) {
     615       12804 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
     616        4332 :                         if (desc->length == 0) {
     617             :                                 /* If padding and length are 0, this terminates the page */
     618        4332 :                                 break;
     619             :                         }
     620        8472 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
     621             :                         struct spdk_blob_md_descriptor_flags    *desc_flags;
     622             : 
     623        3246 :                         desc_flags = (struct spdk_blob_md_descriptor_flags *)desc;
     624             : 
     625        3246 :                         if (desc_flags->length != sizeof(*desc_flags) - sizeof(*desc)) {
     626           0 :                                 return -EINVAL;
     627             :                         }
     628             : 
     629        3246 :                         if ((desc_flags->invalid_flags | SPDK_BLOB_INVALID_FLAGS_MASK) !=
     630             :                             SPDK_BLOB_INVALID_FLAGS_MASK) {
     631           8 :                                 return -EINVAL;
     632             :                         }
     633             : 
     634        3238 :                         if ((desc_flags->data_ro_flags | SPDK_BLOB_DATA_RO_FLAGS_MASK) !=
     635             :                             SPDK_BLOB_DATA_RO_FLAGS_MASK) {
     636          12 :                                 blob->data_ro = true;
     637          12 :                                 blob->md_ro = true;
     638             :                         }
     639             : 
     640        3238 :                         if ((desc_flags->md_ro_flags | SPDK_BLOB_MD_RO_FLAGS_MASK) !=
     641             :                             SPDK_BLOB_MD_RO_FLAGS_MASK) {
     642          12 :                                 blob->md_ro = true;
     643             :                         }
     644             : 
     645        3238 :                         if ((desc_flags->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
     646         514 :                                 blob->data_ro = true;
     647         514 :                                 blob->md_ro = true;
     648             :                         }
     649             : 
     650        3238 :                         blob->invalid_flags = desc_flags->invalid_flags;
     651        3238 :                         blob->data_ro_flags = desc_flags->data_ro_flags;
     652        3238 :                         blob->md_ro_flags = desc_flags->md_ro_flags;
     653             : 
     654        5226 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
     655             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
     656             :                         unsigned int                            i, j;
     657        1346 :                         unsigned int                            cluster_count = blob->active.num_clusters;
     658             : 
     659        1346 :                         if (blob->extent_table_found) {
     660             :                                 /* Extent Table already present in the md,
     661             :                                  * both descriptors should never be at the same time. */
     662           0 :                                 return -EINVAL;
     663             :                         }
     664        1346 :                         blob->extent_rle_found = true;
     665             : 
     666        1346 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
     667             : 
     668        1346 :                         if (desc_extent_rle->length == 0 ||
     669        1346 :                             (desc_extent_rle->length % sizeof(desc_extent_rle->extents[0]) != 0)) {
     670           0 :                                 return -EINVAL;
     671             :                         }
     672             : 
     673        2854 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
     674       20894 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
     675       19386 :                                         if (desc_extent_rle->extents[i].cluster_idx != 0) {
     676        6638 :                                                 if (!spdk_bit_pool_is_allocated(blob->bs->used_clusters,
     677        6638 :                                                                                 desc_extent_rle->extents[i].cluster_idx + j)) {
     678           0 :                                                         return -EINVAL;
     679             :                                                 }
     680             :                                         }
     681       19386 :                                         cluster_count++;
     682             :                                 }
     683             :                         }
     684             : 
     685        1346 :                         if (cluster_count == 0) {
     686           0 :                                 return -EINVAL;
     687             :                         }
     688        1346 :                         tmp = realloc(blob->active.clusters, cluster_count * sizeof(*blob->active.clusters));
     689        1346 :                         if (tmp == NULL) {
     690           0 :                                 return -ENOMEM;
     691             :                         }
     692        1346 :                         blob->active.clusters = tmp;
     693        1346 :                         blob->active.cluster_array_size = cluster_count;
     694             : 
     695        2854 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
     696       20894 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
     697       19386 :                                         if (desc_extent_rle->extents[i].cluster_idx != 0) {
     698        6638 :                                                 blob->active.clusters[blob->active.num_clusters++] = bs_cluster_to_lba(blob->bs,
     699        6638 :                                                                 desc_extent_rle->extents[i].cluster_idx + j);
     700       12748 :                                         } else if (spdk_blob_is_thin_provisioned(blob)) {
     701       12748 :                                                 blob->active.clusters[blob->active.num_clusters++] = 0;
     702             :                                         } else {
     703           0 :                                                 return -EINVAL;
     704             :                                         }
     705             :                                 }
     706             :                         }
     707        3880 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
     708             :                         struct spdk_blob_md_descriptor_extent_table *desc_extent_table;
     709        1670 :                         uint32_t num_extent_pages = blob->active.num_extent_pages;
     710             :                         uint32_t i, j;
     711             :                         size_t extent_pages_length;
     712             : 
     713        1670 :                         desc_extent_table = (struct spdk_blob_md_descriptor_extent_table *)desc;
     714        1670 :                         extent_pages_length = desc_extent_table->length - sizeof(desc_extent_table->num_clusters);
     715             : 
     716        1670 :                         if (blob->extent_rle_found) {
     717             :                                 /* This means that Extent RLE is present in MD,
     718             :                                  * both should never be at the same time. */
     719           0 :                                 return -EINVAL;
     720        1670 :                         } else if (blob->extent_table_found &&
     721           0 :                                    desc_extent_table->num_clusters != blob->remaining_clusters_in_et) {
     722             :                                 /* Number of clusters in this ET does not match number
     723             :                                  * from previously read EXTENT_TABLE. */
     724           0 :                                 return -EINVAL;
     725             :                         }
     726             : 
     727        1670 :                         if (desc_extent_table->length == 0 ||
     728        1670 :                             (extent_pages_length % sizeof(desc_extent_table->extent_page[0]) != 0)) {
     729           0 :                                 return -EINVAL;
     730             :                         }
     731             : 
     732        1670 :                         blob->extent_table_found = true;
     733             : 
     734        3098 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
     735        1428 :                                 num_extent_pages += desc_extent_table->extent_page[i].num_pages;
     736             :                         }
     737             : 
     738        1670 :                         if (num_extent_pages > 0) {
     739        1412 :                                 tmp = realloc(blob->active.extent_pages, num_extent_pages * sizeof(uint32_t));
     740        1412 :                                 if (tmp == NULL) {
     741           0 :                                         return -ENOMEM;
     742             :                                 }
     743        1412 :                                 blob->active.extent_pages = tmp;
     744             :                         }
     745        1670 :                         blob->active.extent_pages_array_size = num_extent_pages;
     746             : 
     747        1670 :                         blob->remaining_clusters_in_et = desc_extent_table->num_clusters;
     748             : 
     749             :                         /* Extent table entries contain md page numbers for extent pages.
     750             :                          * Zeroes represent unallocated extent pages, those are run-length-encoded.
     751             :                          */
     752        3098 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
     753        1428 :                                 if (desc_extent_table->extent_page[i].page_idx != 0) {
     754        1040 :                                         assert(desc_extent_table->extent_page[i].num_pages == 1);
     755        1040 :                                         blob->active.extent_pages[blob->active.num_extent_pages++] =
     756        1040 :                                                 desc_extent_table->extent_page[i].page_idx;
     757         388 :                                 } else if (spdk_blob_is_thin_provisioned(blob)) {
     758         776 :                                         for (j = 0; j < desc_extent_table->extent_page[i].num_pages; j++) {
     759         388 :                                                 blob->active.extent_pages[blob->active.num_extent_pages++] = 0;
     760             :                                         }
     761             :                                 } else {
     762           0 :                                         return -EINVAL;
     763             :                                 }
     764             :                         }
     765        2210 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
     766             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
     767             :                         unsigned int                                    i;
     768        1034 :                         unsigned int                                    cluster_count = 0;
     769             :                         size_t                                          cluster_idx_length;
     770             : 
     771        1034 :                         if (blob->extent_rle_found) {
     772             :                                 /* This means that Extent RLE is present in MD,
     773             :                                  * both should never be at the same time. */
     774           0 :                                 return -EINVAL;
     775             :                         }
     776             : 
     777        1034 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
     778        1034 :                         cluster_idx_length = desc_extent->length - sizeof(desc_extent->start_cluster_idx);
     779             : 
     780        1034 :                         if (desc_extent->length <= sizeof(desc_extent->start_cluster_idx) ||
     781        1034 :                             (cluster_idx_length % sizeof(desc_extent->cluster_idx[0]) != 0)) {
     782           0 :                                 return -EINVAL;
     783             :                         }
     784             : 
     785       16266 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
     786       15232 :                                 if (desc_extent->cluster_idx[i] != 0) {
     787        6908 :                                         if (!spdk_bit_pool_is_allocated(blob->bs->used_clusters, desc_extent->cluster_idx[i])) {
     788           0 :                                                 return -EINVAL;
     789             :                                         }
     790             :                                 }
     791       15232 :                                 cluster_count++;
     792             :                         }
     793             : 
     794        1034 :                         if (cluster_count == 0) {
     795           0 :                                 return -EINVAL;
     796             :                         }
     797             : 
     798             :                         /* When reading extent pages sequentially starting cluster idx should match
     799             :                          * current size of a blob.
     800             :                          * If changed to batch reading, this check shall be removed. */
     801        1034 :                         if (desc_extent->start_cluster_idx != blob->active.num_clusters) {
     802           0 :                                 return -EINVAL;
     803             :                         }
     804             : 
     805        1034 :                         tmp = realloc(blob->active.clusters,
     806        1034 :                                       (cluster_count + blob->active.num_clusters) * sizeof(*blob->active.clusters));
     807        1034 :                         if (tmp == NULL) {
     808           0 :                                 return -ENOMEM;
     809             :                         }
     810        1034 :                         blob->active.clusters = tmp;
     811        1034 :                         blob->active.cluster_array_size = (cluster_count + blob->active.num_clusters);
     812             : 
     813       16266 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
     814       15232 :                                 if (desc_extent->cluster_idx[i] != 0) {
     815        6908 :                                         blob->active.clusters[blob->active.num_clusters++] = bs_cluster_to_lba(blob->bs,
     816             :                                                         desc_extent->cluster_idx[i]);
     817        8324 :                                 } else if (spdk_blob_is_thin_provisioned(blob)) {
     818        8324 :                                         blob->active.clusters[blob->active.num_clusters++] = 0;
     819             :                                 } else {
     820           0 :                                         return -EINVAL;
     821             :                                 }
     822             :                         }
     823        1034 :                         assert(desc_extent->start_cluster_idx + cluster_count == blob->active.num_clusters);
     824        1034 :                         assert(blob->remaining_clusters_in_et >= cluster_count);
     825        1034 :                         blob->remaining_clusters_in_et -= cluster_count;
     826        1176 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
     827             :                         int rc;
     828             : 
     829         394 :                         rc = blob_deserialize_xattr(blob,
     830             :                                                     (struct spdk_blob_md_descriptor_xattr *) desc, false);
     831         394 :                         if (rc != 0) {
     832           0 :                                 return rc;
     833             :                         }
     834         782 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
     835             :                         int rc;
     836             : 
     837         782 :                         rc = blob_deserialize_xattr(blob,
     838             :                                                     (struct spdk_blob_md_descriptor_xattr *) desc, true);
     839         782 :                         if (rc != 0) {
     840           0 :                                 return rc;
     841             :                         }
     842             :                 } else {
     843             :                         /* Unrecognized descriptor type.  Do not fail - just continue to the
     844             :                          *  next descriptor.  If this descriptor is associated with some feature
     845             :                          *  defined in a newer version of blobstore, that version of blobstore
     846             :                          *  should create and set an associated feature flag to specify if this
     847             :                          *  blob can be loaded or not.
     848             :                          */
     849             :                 }
     850             : 
     851             :                 /* Advance to the next descriptor */
     852        8464 :                 cur_desc += sizeof(*desc) + desc->length;
     853        8464 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
     854          40 :                         break;
     855             :                 }
     856        8424 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
     857             :         }
     858             : 
     859        4372 :         return 0;
     860             : }
     861             : 
     862             : static bool bs_load_cur_extent_page_valid(struct spdk_blob_md_page *page);
     863             : 
     864             : static int
     865        1034 : blob_parse_extent_page(struct spdk_blob_md_page *extent_page, struct spdk_blob *blob)
     866             : {
     867        1034 :         assert(blob != NULL);
     868        1034 :         assert(blob->state == SPDK_BLOB_STATE_LOADING);
     869             : 
     870        1034 :         if (bs_load_cur_extent_page_valid(extent_page) == false) {
     871           0 :                 return -ENOENT;
     872             :         }
     873             : 
     874        1034 :         return blob_parse_page(extent_page, blob);
     875             : }
     876             : 
     877             : static int
     878        3250 : blob_parse(const struct spdk_blob_md_page *pages, uint32_t page_count,
     879             :            struct spdk_blob *blob)
     880             : {
     881             :         const struct spdk_blob_md_page *page;
     882             :         uint32_t i;
     883             :         int rc;
     884             :         void *tmp;
     885             : 
     886        3250 :         assert(page_count > 0);
     887        3250 :         assert(pages[0].sequence_num == 0);
     888        3250 :         assert(blob != NULL);
     889        3250 :         assert(blob->state == SPDK_BLOB_STATE_LOADING);
     890        3250 :         assert(blob->active.clusters == NULL);
     891             : 
     892             :         /* The blobid provided doesn't match what's in the MD, this can
     893             :          * happen for example if a bogus blobid is passed in through open.
     894             :          */
     895        3250 :         if (blob->id != pages[0].id) {
     896           4 :                 SPDK_ERRLOG("Blobid (0x%" PRIx64 ") doesn't match what's in metadata "
     897             :                             "(0x%" PRIx64 ")\n", blob->id, pages[0].id);
     898           4 :                 return -ENOENT;
     899             :         }
     900             : 
     901        3246 :         tmp = realloc(blob->active.pages, page_count * sizeof(*blob->active.pages));
     902        3246 :         if (!tmp) {
     903           0 :                 return -ENOMEM;
     904             :         }
     905        3246 :         blob->active.pages = tmp;
     906             : 
     907        3246 :         blob->active.pages[0] = pages[0].id;
     908             : 
     909        3346 :         for (i = 1; i < page_count; i++) {
     910         100 :                 assert(spdk_bit_array_get(blob->bs->used_md_pages, pages[i - 1].next));
     911         100 :                 blob->active.pages[i] = pages[i - 1].next;
     912             :         }
     913        3246 :         blob->active.num_pages = page_count;
     914             : 
     915        6584 :         for (i = 0; i < page_count; i++) {
     916        3346 :                 page = &pages[i];
     917             : 
     918        3346 :                 assert(page->id == blob->id);
     919        3346 :                 assert(page->sequence_num == i);
     920             : 
     921        3346 :                 rc = blob_parse_page(page, blob);
     922        3346 :                 if (rc != 0) {
     923           8 :                         return rc;
     924             :                 }
     925             :         }
     926             : 
     927        3238 :         return 0;
     928             : }
     929             : 
     930             : static int
     931        3994 : blob_serialize_add_page(const struct spdk_blob *blob,
     932             :                         struct spdk_blob_md_page **pages,
     933             :                         uint32_t *page_count,
     934             :                         struct spdk_blob_md_page **last_page)
     935             : {
     936             :         struct spdk_blob_md_page *page, *tmp_pages;
     937             : 
     938        3994 :         assert(pages != NULL);
     939        3994 :         assert(page_count != NULL);
     940             : 
     941        3994 :         *last_page = NULL;
     942        3994 :         if (*page_count == 0) {
     943        3906 :                 assert(*pages == NULL);
     944        3906 :                 *pages = spdk_malloc(SPDK_BS_PAGE_SIZE, 0,
     945             :                                      NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
     946        3906 :                 if (*pages == NULL) {
     947           0 :                         return -ENOMEM;
     948             :                 }
     949        3906 :                 *page_count = 1;
     950             :         } else {
     951          88 :                 assert(*pages != NULL);
     952          88 :                 tmp_pages = spdk_realloc(*pages, SPDK_BS_PAGE_SIZE * (*page_count + 1), 0);
     953          88 :                 if (tmp_pages == NULL) {
     954           0 :                         return -ENOMEM;
     955             :                 }
     956          88 :                 (*page_count)++;
     957          88 :                 *pages = tmp_pages;
     958             :         }
     959             : 
     960        3994 :         page = &(*pages)[*page_count - 1];
     961        3994 :         memset(page, 0, sizeof(*page));
     962        3994 :         page->id = blob->id;
     963        3994 :         page->sequence_num = *page_count - 1;
     964        3994 :         page->next = SPDK_INVALID_MD_PAGE;
     965        3994 :         *last_page = page;
     966             : 
     967        3994 :         return 0;
     968             : }
     969             : 
     970             : /* Transform the in-memory representation 'xattr' into an on-disk xattr descriptor.
     971             :  * Update required_sz on both success and failure.
     972             :  *
     973             :  */
     974             : static int
     975        1561 : blob_serialize_xattr(const struct spdk_xattr *xattr,
     976             :                      uint8_t *buf, size_t buf_sz,
     977             :                      size_t *required_sz, bool internal)
     978             : {
     979             :         struct spdk_blob_md_descriptor_xattr    *desc;
     980             : 
     981        1561 :         *required_sz = sizeof(struct spdk_blob_md_descriptor_xattr) +
     982        1561 :                        strlen(xattr->name) +
     983        1561 :                        xattr->value_len;
     984             : 
     985        1561 :         if (buf_sz < *required_sz) {
     986          48 :                 return -1;
     987             :         }
     988             : 
     989        1513 :         desc = (struct spdk_blob_md_descriptor_xattr *)buf;
     990             : 
     991        1513 :         desc->type = internal ? SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL : SPDK_MD_DESCRIPTOR_TYPE_XATTR;
     992        1513 :         desc->length = sizeof(desc->name_length) +
     993             :                        sizeof(desc->value_length) +
     994        1513 :                        strlen(xattr->name) +
     995        1513 :                        xattr->value_len;
     996        1513 :         desc->name_length = strlen(xattr->name);
     997        1513 :         desc->value_length = xattr->value_len;
     998             : 
     999        1513 :         memcpy(desc->name, xattr->name, desc->name_length);
    1000        1513 :         memcpy((void *)((uintptr_t)desc->name + desc->name_length),
    1001        1513 :                xattr->value,
    1002        1513 :                desc->value_length);
    1003             : 
    1004        1513 :         return 0;
    1005             : }
    1006             : 
    1007             : static void
    1008        1569 : blob_serialize_extent_table_entry(const struct spdk_blob *blob,
    1009             :                                   uint64_t start_ep, uint64_t *next_ep,
    1010             :                                   uint8_t **buf, size_t *remaining_sz)
    1011             : {
    1012             :         struct spdk_blob_md_descriptor_extent_table *desc;
    1013             :         size_t cur_sz;
    1014             :         uint64_t i, et_idx;
    1015             :         uint32_t extent_page, ep_len;
    1016             : 
    1017             :         /* The buffer must have room for at least num_clusters entry */
    1018        1569 :         cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc->num_clusters);
    1019        1569 :         if (*remaining_sz < cur_sz) {
    1020          20 :                 *next_ep = start_ep;
    1021          20 :                 return;
    1022             :         }
    1023             : 
    1024        1549 :         desc = (struct spdk_blob_md_descriptor_extent_table *)*buf;
    1025        1549 :         desc->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE;
    1026             : 
    1027        1549 :         desc->num_clusters = blob->active.num_clusters;
    1028             : 
    1029        1549 :         ep_len = 1;
    1030        1549 :         et_idx = 0;
    1031        4052 :         for (i = start_ep; i < blob->active.num_extent_pages; i++) {
    1032        2503 :                 if (*remaining_sz < cur_sz  + sizeof(desc->extent_page[0])) {
    1033             :                         /* If we ran out of buffer space, return */
    1034           0 :                         break;
    1035             :                 }
    1036             : 
    1037        2503 :                 extent_page = blob->active.extent_pages[i];
    1038             :                 /* Verify that next extent_page is unallocated */
    1039        2503 :                 if (extent_page == 0 &&
    1040        1478 :                     (i + 1 < blob->active.num_extent_pages && blob->active.extent_pages[i + 1] == 0)) {
    1041        1078 :                         ep_len++;
    1042        1078 :                         continue;
    1043             :                 }
    1044        1425 :                 desc->extent_page[et_idx].page_idx = extent_page;
    1045        1425 :                 desc->extent_page[et_idx].num_pages = ep_len;
    1046        1425 :                 et_idx++;
    1047             : 
    1048        1425 :                 ep_len = 1;
    1049        1425 :                 cur_sz += sizeof(desc->extent_page[et_idx]);
    1050             :         }
    1051        1549 :         *next_ep = i;
    1052             : 
    1053        1549 :         desc->length = sizeof(desc->num_clusters) + sizeof(desc->extent_page[0]) * et_idx;
    1054        1549 :         *remaining_sz -= sizeof(struct spdk_blob_md_descriptor) + desc->length;
    1055        1549 :         *buf += sizeof(struct spdk_blob_md_descriptor) + desc->length;
    1056             : }
    1057             : 
    1058             : static int
    1059        1551 : blob_serialize_extent_table(const struct spdk_blob *blob,
    1060             :                             struct spdk_blob_md_page **pages,
    1061             :                             struct spdk_blob_md_page *cur_page,
    1062             :                             uint32_t *page_count, uint8_t **buf,
    1063             :                             size_t *remaining_sz)
    1064             : {
    1065        1551 :         uint64_t                                last_extent_page;
    1066             :         int                                     rc;
    1067             : 
    1068        1551 :         last_extent_page = 0;
    1069             :         /* At least single extent table entry has to be always persisted.
    1070             :          * Such case occurs with num_extent_pages == 0. */
    1071        1569 :         while (last_extent_page <= blob->active.num_extent_pages) {
    1072        1569 :                 blob_serialize_extent_table_entry(blob, last_extent_page, &last_extent_page, buf,
    1073             :                                                   remaining_sz);
    1074             : 
    1075        1569 :                 if (last_extent_page == blob->active.num_extent_pages) {
    1076        1551 :                         break;
    1077             :                 }
    1078             : 
    1079          18 :                 rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1080          18 :                 if (rc < 0) {
    1081           0 :                         return rc;
    1082             :                 }
    1083             : 
    1084          18 :                 *buf = (uint8_t *)cur_page->descriptors;
    1085          18 :                 *remaining_sz = sizeof(cur_page->descriptors);
    1086             :         }
    1087             : 
    1088        1551 :         return 0;
    1089             : }
    1090             : 
    1091             : static void
    1092        1553 : blob_serialize_extent_rle(const struct spdk_blob *blob,
    1093             :                           uint64_t start_cluster, uint64_t *next_cluster,
    1094             :                           uint8_t **buf, size_t *buf_sz)
    1095             : {
    1096             :         struct spdk_blob_md_descriptor_extent_rle *desc_extent_rle;
    1097             :         size_t cur_sz;
    1098             :         uint64_t i, extent_idx;
    1099             :         uint64_t lba, lba_per_cluster, lba_count;
    1100             : 
    1101             :         /* The buffer must have room for at least one extent */
    1102        1553 :         cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc_extent_rle->extents[0]);
    1103        1553 :         if (*buf_sz < cur_sz) {
    1104          18 :                 *next_cluster = start_cluster;
    1105          18 :                 return;
    1106             :         }
    1107             : 
    1108        1535 :         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)*buf;
    1109        1535 :         desc_extent_rle->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE;
    1110             : 
    1111        1535 :         lba_per_cluster = bs_cluster_to_lba(blob->bs, 1);
    1112             :         /* Assert for scan-build false positive */
    1113        1535 :         assert(lba_per_cluster > 0);
    1114             : 
    1115        1535 :         lba = blob->active.clusters[start_cluster];
    1116        1535 :         lba_count = lba_per_cluster;
    1117        1535 :         extent_idx = 0;
    1118      677848 :         for (i = start_cluster + 1; i < blob->active.num_clusters; i++) {
    1119      676317 :                 if ((lba + lba_count) == blob->active.clusters[i] && lba != 0) {
    1120             :                         /* Run-length encode sequential non-zero LBA */
    1121        6706 :                         lba_count += lba_per_cluster;
    1122        6706 :                         continue;
    1123      669611 :                 } else if (lba == 0 && blob->active.clusters[i] == 0) {
    1124             :                         /* Run-length encode unallocated clusters */
    1125      668966 :                         lba_count += lba_per_cluster;
    1126      668966 :                         continue;
    1127             :                 }
    1128         645 :                 desc_extent_rle->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
    1129         645 :                 desc_extent_rle->extents[extent_idx].length = lba_count / lba_per_cluster;
    1130         645 :                 extent_idx++;
    1131             : 
    1132         645 :                 cur_sz += sizeof(desc_extent_rle->extents[extent_idx]);
    1133             : 
    1134         645 :                 if (*buf_sz < cur_sz) {
    1135             :                         /* If we ran out of buffer space, return */
    1136           4 :                         *next_cluster = i;
    1137           4 :                         break;
    1138             :                 }
    1139             : 
    1140         641 :                 lba = blob->active.clusters[i];
    1141         641 :                 lba_count = lba_per_cluster;
    1142             :         }
    1143             : 
    1144        1535 :         if (*buf_sz >= cur_sz) {
    1145        1531 :                 desc_extent_rle->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
    1146        1531 :                 desc_extent_rle->extents[extent_idx].length = lba_count / lba_per_cluster;
    1147        1531 :                 extent_idx++;
    1148             : 
    1149        1531 :                 *next_cluster = blob->active.num_clusters;
    1150             :         }
    1151             : 
    1152        1535 :         desc_extent_rle->length = sizeof(desc_extent_rle->extents[0]) * extent_idx;
    1153        1535 :         *buf_sz -= sizeof(struct spdk_blob_md_descriptor) + desc_extent_rle->length;
    1154        1535 :         *buf += sizeof(struct spdk_blob_md_descriptor) + desc_extent_rle->length;
    1155             : }
    1156             : 
    1157             : static int
    1158        1697 : blob_serialize_extents_rle(const struct spdk_blob *blob,
    1159             :                            struct spdk_blob_md_page **pages,
    1160             :                            struct spdk_blob_md_page *cur_page,
    1161             :                            uint32_t *page_count, uint8_t **buf,
    1162             :                            size_t *remaining_sz)
    1163             : {
    1164        1697 :         uint64_t                                last_cluster;
    1165             :         int                                     rc;
    1166             : 
    1167        1697 :         last_cluster = 0;
    1168        1719 :         while (last_cluster < blob->active.num_clusters) {
    1169        1553 :                 blob_serialize_extent_rle(blob, last_cluster, &last_cluster, buf, remaining_sz);
    1170             : 
    1171        1553 :                 if (last_cluster == blob->active.num_clusters) {
    1172        1531 :                         break;
    1173             :                 }
    1174             : 
    1175          22 :                 rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1176          22 :                 if (rc < 0) {
    1177           0 :                         return rc;
    1178             :                 }
    1179             : 
    1180          22 :                 *buf = (uint8_t *)cur_page->descriptors;
    1181          22 :                 *remaining_sz = sizeof(cur_page->descriptors);
    1182             :         }
    1183             : 
    1184        1697 :         return 0;
    1185             : }
    1186             : 
    1187             : static void
    1188         964 : blob_serialize_extent_page(const struct spdk_blob *blob,
    1189             :                            uint64_t cluster, struct spdk_blob_md_page *page)
    1190             : {
    1191             :         struct spdk_blob_md_descriptor_extent_page *desc_extent;
    1192             :         uint64_t i, extent_idx;
    1193             :         uint64_t lba, lba_per_cluster;
    1194         964 :         uint64_t start_cluster_idx = (cluster / SPDK_EXTENTS_PER_EP) * SPDK_EXTENTS_PER_EP;
    1195             : 
    1196         964 :         desc_extent = (struct spdk_blob_md_descriptor_extent_page *) page->descriptors;
    1197         964 :         desc_extent->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE;
    1198             : 
    1199         964 :         lba_per_cluster = bs_cluster_to_lba(blob->bs, 1);
    1200             : 
    1201         964 :         desc_extent->start_cluster_idx = start_cluster_idx;
    1202         964 :         extent_idx = 0;
    1203       24786 :         for (i = start_cluster_idx; i < blob->active.num_clusters; i++) {
    1204       23856 :                 lba = blob->active.clusters[i];
    1205       23856 :                 desc_extent->cluster_idx[extent_idx++] = lba / lba_per_cluster;
    1206       23856 :                 if (extent_idx >= SPDK_EXTENTS_PER_EP) {
    1207          34 :                         break;
    1208             :                 }
    1209             :         }
    1210         964 :         desc_extent->length = sizeof(desc_extent->start_cluster_idx) +
    1211             :                               sizeof(desc_extent->cluster_idx[0]) * extent_idx;
    1212         964 : }
    1213             : 
    1214             : static void
    1215        3248 : blob_serialize_flags(const struct spdk_blob *blob,
    1216             :                      uint8_t *buf, size_t *buf_sz)
    1217             : {
    1218             :         struct spdk_blob_md_descriptor_flags *desc;
    1219             : 
    1220             :         /*
    1221             :          * Flags get serialized first, so we should always have room for the flags
    1222             :          *  descriptor.
    1223             :          */
    1224        3248 :         assert(*buf_sz >= sizeof(*desc));
    1225             : 
    1226        3248 :         desc = (struct spdk_blob_md_descriptor_flags *)buf;
    1227        3248 :         desc->type = SPDK_MD_DESCRIPTOR_TYPE_FLAGS;
    1228        3248 :         desc->length = sizeof(*desc) - sizeof(struct spdk_blob_md_descriptor);
    1229        3248 :         desc->invalid_flags = blob->invalid_flags;
    1230        3248 :         desc->data_ro_flags = blob->data_ro_flags;
    1231        3248 :         desc->md_ro_flags = blob->md_ro_flags;
    1232             : 
    1233        3248 :         *buf_sz -= sizeof(*desc);
    1234        3248 : }
    1235             : 
    1236             : static int
    1237        6496 : blob_serialize_xattrs(const struct spdk_blob *blob,
    1238             :                       const struct spdk_xattr_tailq *xattrs, bool internal,
    1239             :                       struct spdk_blob_md_page **pages,
    1240             :                       struct spdk_blob_md_page *cur_page,
    1241             :                       uint32_t *page_count, uint8_t **buf,
    1242             :                       size_t *remaining_sz)
    1243             : {
    1244             :         const struct spdk_xattr *xattr;
    1245             :         int     rc;
    1246             : 
    1247        8009 :         TAILQ_FOREACH(xattr, xattrs, link) {
    1248        1513 :                 size_t required_sz = 0;
    1249             : 
    1250        1513 :                 rc = blob_serialize_xattr(xattr,
    1251             :                                           *buf, *remaining_sz,
    1252             :                                           &required_sz, internal);
    1253        1513 :                 if (rc < 0) {
    1254             :                         /* Need to add a new page to the chain */
    1255          48 :                         rc = blob_serialize_add_page(blob, pages, page_count,
    1256             :                                                      &cur_page);
    1257          48 :                         if (rc < 0) {
    1258           0 :                                 spdk_free(*pages);
    1259           0 :                                 *pages = NULL;
    1260           0 :                                 *page_count = 0;
    1261           0 :                                 return rc;
    1262             :                         }
    1263             : 
    1264          48 :                         *buf = (uint8_t *)cur_page->descriptors;
    1265          48 :                         *remaining_sz = sizeof(cur_page->descriptors);
    1266             : 
    1267             :                         /* Try again */
    1268          48 :                         required_sz = 0;
    1269          48 :                         rc = blob_serialize_xattr(xattr,
    1270             :                                                   *buf, *remaining_sz,
    1271             :                                                   &required_sz, internal);
    1272             : 
    1273          48 :                         if (rc < 0) {
    1274           0 :                                 spdk_free(*pages);
    1275           0 :                                 *pages = NULL;
    1276           0 :                                 *page_count = 0;
    1277           0 :                                 return rc;
    1278             :                         }
    1279             :                 }
    1280             : 
    1281        1513 :                 *remaining_sz -= required_sz;
    1282        1513 :                 *buf += required_sz;
    1283             :         }
    1284             : 
    1285        6496 :         return 0;
    1286             : }
    1287             : 
    1288             : static int
    1289        3248 : blob_serialize(const struct spdk_blob *blob, struct spdk_blob_md_page **pages,
    1290             :                uint32_t *page_count)
    1291             : {
    1292        3248 :         struct spdk_blob_md_page                *cur_page;
    1293             :         int                                     rc;
    1294        3248 :         uint8_t                                 *buf;
    1295        3248 :         size_t                                  remaining_sz;
    1296             : 
    1297        3248 :         assert(pages != NULL);
    1298        3248 :         assert(page_count != NULL);
    1299        3248 :         assert(blob != NULL);
    1300        3248 :         assert(blob->state == SPDK_BLOB_STATE_DIRTY);
    1301             : 
    1302        3248 :         *pages = NULL;
    1303        3248 :         *page_count = 0;
    1304             : 
    1305             :         /* A blob always has at least 1 page, even if it has no descriptors */
    1306        3248 :         rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1307        3248 :         if (rc < 0) {
    1308           0 :                 return rc;
    1309             :         }
    1310             : 
    1311        3248 :         buf = (uint8_t *)cur_page->descriptors;
    1312        3248 :         remaining_sz = sizeof(cur_page->descriptors);
    1313             : 
    1314             :         /* Serialize flags */
    1315        3248 :         blob_serialize_flags(blob, buf, &remaining_sz);
    1316        3248 :         buf += sizeof(struct spdk_blob_md_descriptor_flags);
    1317             : 
    1318             :         /* Serialize xattrs */
    1319        3248 :         rc = blob_serialize_xattrs(blob, &blob->xattrs, false,
    1320             :                                    pages, cur_page, page_count, &buf, &remaining_sz);
    1321        3248 :         if (rc < 0) {
    1322           0 :                 return rc;
    1323             :         }
    1324             : 
    1325             :         /* Serialize internal xattrs */
    1326        3248 :         rc = blob_serialize_xattrs(blob, &blob->xattrs_internal, true,
    1327             :                                    pages, cur_page, page_count, &buf, &remaining_sz);
    1328        3248 :         if (rc < 0) {
    1329           0 :                 return rc;
    1330             :         }
    1331             : 
    1332        3248 :         if (blob->use_extent_table) {
    1333             :                 /* Serialize extent table */
    1334        1551 :                 rc = blob_serialize_extent_table(blob, pages, cur_page, page_count, &buf, &remaining_sz);
    1335             :         } else {
    1336             :                 /* Serialize extents */
    1337        1697 :                 rc = blob_serialize_extents_rle(blob, pages, cur_page, page_count, &buf, &remaining_sz);
    1338             :         }
    1339             : 
    1340        3248 :         return rc;
    1341             : }
    1342             : 
    1343             : struct spdk_blob_load_ctx {
    1344             :         struct spdk_blob                *blob;
    1345             : 
    1346             :         struct spdk_blob_md_page        *pages;
    1347             :         uint32_t                        num_pages;
    1348             :         uint32_t                        next_extent_page;
    1349             :         spdk_bs_sequence_t              *seq;
    1350             : 
    1351             :         spdk_bs_sequence_cpl            cb_fn;
    1352             :         void                            *cb_arg;
    1353             : };
    1354             : 
    1355             : static uint32_t
    1356       19150 : blob_md_page_calc_crc(void *page)
    1357             : {
    1358             :         uint32_t                crc;
    1359             : 
    1360       19150 :         crc = BLOB_CRC32C_INITIAL;
    1361       19150 :         crc = spdk_crc32c_update(page, SPDK_BS_PAGE_SIZE - 4, crc);
    1362       19150 :         crc ^= BLOB_CRC32C_INITIAL;
    1363             : 
    1364       19150 :         return crc;
    1365             : 
    1366             : }
    1367             : 
    1368             : static void
    1369        3278 : blob_load_final(struct spdk_blob_load_ctx *ctx, int bserrno)
    1370             : {
    1371        3278 :         struct spdk_blob                *blob = ctx->blob;
    1372             : 
    1373        3278 :         if (bserrno == 0) {
    1374        3214 :                 blob_mark_clean(blob);
    1375             :         }
    1376             : 
    1377        3278 :         ctx->cb_fn(ctx->seq, ctx->cb_arg, bserrno);
    1378             : 
    1379             :         /* Free the memory */
    1380        3278 :         spdk_free(ctx->pages);
    1381        3278 :         free(ctx);
    1382        3278 : }
    1383             : 
    1384             : static void
    1385         410 : blob_load_snapshot_cpl(void *cb_arg, struct spdk_blob *snapshot, int bserrno)
    1386             : {
    1387         410 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1388         410 :         struct spdk_blob                *blob = ctx->blob;
    1389             : 
    1390         410 :         if (bserrno == 0) {
    1391         404 :                 blob->back_bs_dev = bs_create_blob_bs_dev(snapshot);
    1392         404 :                 if (blob->back_bs_dev == NULL) {
    1393           0 :                         bserrno = -ENOMEM;
    1394             :                 }
    1395             :         }
    1396         410 :         if (bserrno != 0) {
    1397           6 :                 SPDK_ERRLOG("Snapshot fail\n");
    1398             :         }
    1399             : 
    1400         410 :         blob_load_final(ctx, bserrno);
    1401         410 : }
    1402             : 
    1403             : static void blob_update_clear_method(struct spdk_blob *blob);
    1404             : 
    1405             : static int
    1406          96 : blob_load_esnap(struct spdk_blob *blob, void *blob_ctx)
    1407             : {
    1408          96 :         struct spdk_blob_store *bs = blob->bs;
    1409          96 :         struct spdk_bs_dev *bs_dev = NULL;
    1410          96 :         const void *esnap_id = NULL;
    1411          96 :         size_t id_len = 0;
    1412             :         int rc;
    1413             : 
    1414          96 :         if (bs->esnap_bs_dev_create == NULL) {
    1415           8 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " is an esnap clone but the blobstore was opened "
    1416             :                                "without support for esnap clones\n", blob->id);
    1417           8 :                 return -ENOTSUP;
    1418             :         }
    1419          88 :         assert(blob->back_bs_dev == NULL);
    1420             : 
    1421          88 :         rc = blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, &esnap_id, &id_len, true);
    1422          88 :         if (rc != 0) {
    1423           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " is an esnap clone but has no esnap ID\n", blob->id);
    1424           0 :                 return -EINVAL;
    1425             :         }
    1426          88 :         assert(id_len > 0 && id_len < UINT32_MAX);
    1427             : 
    1428          88 :         SPDK_INFOLOG(blob, "Creating external snapshot device\n");
    1429             : 
    1430          88 :         rc = bs->esnap_bs_dev_create(bs->esnap_ctx, blob_ctx, blob, esnap_id, (uint32_t)id_len,
    1431             :                                      &bs_dev);
    1432          88 :         if (rc != 0) {
    1433           0 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": failed to load back_bs_dev "
    1434             :                               "with error %d\n", blob->id, rc);
    1435           0 :                 return rc;
    1436             :         }
    1437             : 
    1438             :         /*
    1439             :          * Note: bs_dev might be NULL if the consumer chose to not open the external snapshot.
    1440             :          * This especially might happen during spdk_bs_load() iteration.
    1441             :          */
    1442          88 :         if (bs_dev != NULL) {
    1443          88 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": loaded back_bs_dev\n", blob->id);
    1444          88 :                 if ((bs->io_unit_size % bs_dev->blocklen) != 0) {
    1445           4 :                         SPDK_NOTICELOG("blob 0x%" PRIx64 " external snapshot device block size %u "
    1446             :                                        "is not compatible with blobstore block size %u\n",
    1447             :                                        blob->id, bs_dev->blocklen, bs->io_unit_size);
    1448           4 :                         bs_dev->destroy(bs_dev);
    1449           4 :                         return -EINVAL;
    1450             :                 }
    1451             :         }
    1452             : 
    1453          84 :         blob->back_bs_dev = bs_dev;
    1454          84 :         blob->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
    1455             : 
    1456          84 :         return 0;
    1457             : }
    1458             : 
    1459             : static void
    1460        3232 : blob_load_backing_dev(spdk_bs_sequence_t *seq, void *cb_arg)
    1461             : {
    1462        3232 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1463        3232 :         struct spdk_blob                *blob = ctx->blob;
    1464        3232 :         const void                      *value;
    1465        3232 :         size_t                          len;
    1466             :         int                             rc;
    1467             : 
    1468        3232 :         if (blob_is_esnap_clone(blob)) {
    1469          96 :                 rc = blob_load_esnap(blob, seq->cpl.u.blob_handle.esnap_ctx);
    1470          96 :                 blob_load_final(ctx, rc);
    1471          96 :                 return;
    1472             :         }
    1473             : 
    1474        3136 :         if (spdk_blob_is_thin_provisioned(blob)) {
    1475         914 :                 rc = blob_get_xattr_value(blob, BLOB_SNAPSHOT, &value, &len, true);
    1476         914 :                 if (rc == 0) {
    1477         410 :                         if (len != sizeof(spdk_blob_id)) {
    1478           0 :                                 blob_load_final(ctx, -EINVAL);
    1479           0 :                                 return;
    1480             :                         }
    1481             :                         /* open snapshot blob and continue in the callback function */
    1482         410 :                         blob->parent_id = *(spdk_blob_id *)value;
    1483         410 :                         spdk_bs_open_blob(blob->bs, blob->parent_id,
    1484             :                                           blob_load_snapshot_cpl, ctx);
    1485         410 :                         return;
    1486             :                 } else {
    1487             :                         /* add zeroes_dev for thin provisioned blob */
    1488         504 :                         blob->back_bs_dev = bs_create_zeroes_dev();
    1489             :                 }
    1490             :         } else {
    1491             :                 /* standard blob */
    1492        2222 :                 blob->back_bs_dev = NULL;
    1493             :         }
    1494        2726 :         blob_load_final(ctx, 0);
    1495             : }
    1496             : 
    1497             : static void
    1498        2710 : blob_load_cpl_extents_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1499             : {
    1500        2710 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1501        2710 :         struct spdk_blob                *blob = ctx->blob;
    1502             :         struct spdk_blob_md_page        *page;
    1503             :         uint64_t                        i;
    1504             :         uint32_t                        crc;
    1505             :         uint64_t                        lba;
    1506             :         void                            *tmp;
    1507             :         uint64_t                        sz;
    1508             : 
    1509        2710 :         if (bserrno) {
    1510           6 :                 SPDK_ERRLOG("Extent page read failed: %d\n", bserrno);
    1511           6 :                 blob_load_final(ctx, bserrno);
    1512           6 :                 return;
    1513             :         }
    1514             : 
    1515        2704 :         if (ctx->pages == NULL) {
    1516             :                 /* First iteration of this function, allocate buffer for single EXTENT_PAGE */
    1517        1670 :                 ctx->pages = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
    1518             :                                           NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    1519        1670 :                 if (!ctx->pages) {
    1520           0 :                         blob_load_final(ctx, -ENOMEM);
    1521           0 :                         return;
    1522             :                 }
    1523        1670 :                 ctx->num_pages = 1;
    1524        1670 :                 ctx->next_extent_page = 0;
    1525             :         } else {
    1526        1034 :                 page = &ctx->pages[0];
    1527        1034 :                 crc = blob_md_page_calc_crc(page);
    1528        1034 :                 if (crc != page->crc) {
    1529           0 :                         blob_load_final(ctx, -EINVAL);
    1530           0 :                         return;
    1531             :                 }
    1532             : 
    1533        1034 :                 if (page->next != SPDK_INVALID_MD_PAGE) {
    1534           0 :                         blob_load_final(ctx, -EINVAL);
    1535           0 :                         return;
    1536             :                 }
    1537             : 
    1538        1034 :                 bserrno = blob_parse_extent_page(page, blob);
    1539        1034 :                 if (bserrno) {
    1540           0 :                         blob_load_final(ctx, bserrno);
    1541           0 :                         return;
    1542             :                 }
    1543             :         }
    1544             : 
    1545        3092 :         for (i = ctx->next_extent_page; i < blob->active.num_extent_pages; i++) {
    1546        1428 :                 if (blob->active.extent_pages[i] != 0) {
    1547             :                         /* Extent page was allocated, read and parse it. */
    1548        1040 :                         lba = bs_md_page_to_lba(blob->bs, blob->active.extent_pages[i]);
    1549        1040 :                         ctx->next_extent_page = i + 1;
    1550             : 
    1551        1040 :                         bs_sequence_read_dev(seq, &ctx->pages[0], lba,
    1552        1040 :                                              bs_byte_to_lba(blob->bs, SPDK_BS_PAGE_SIZE),
    1553             :                                              blob_load_cpl_extents_cpl, ctx);
    1554        1040 :                         return;
    1555             :                 } else {
    1556             :                         /* Thin provisioned blobs can point to unallocated extent pages.
    1557             :                          * In this case blob size should be increased by up to the amount left in remaining_clusters_in_et. */
    1558             : 
    1559         388 :                         sz = spdk_min(blob->remaining_clusters_in_et, SPDK_EXTENTS_PER_EP);
    1560         388 :                         blob->active.num_clusters += sz;
    1561         388 :                         blob->remaining_clusters_in_et -= sz;
    1562             : 
    1563         388 :                         assert(spdk_blob_is_thin_provisioned(blob));
    1564         388 :                         assert(i + 1 < blob->active.num_extent_pages || blob->remaining_clusters_in_et == 0);
    1565             : 
    1566         388 :                         tmp = realloc(blob->active.clusters, blob->active.num_clusters * sizeof(*blob->active.clusters));
    1567         388 :                         if (tmp == NULL) {
    1568           0 :                                 blob_load_final(ctx, -ENOMEM);
    1569           0 :                                 return;
    1570             :                         }
    1571         388 :                         memset(tmp + sizeof(*blob->active.clusters) * blob->active.cluster_array_size, 0,
    1572         388 :                                sizeof(*blob->active.clusters) * (blob->active.num_clusters - blob->active.cluster_array_size));
    1573         388 :                         blob->active.clusters = tmp;
    1574         388 :                         blob->active.cluster_array_size = blob->active.num_clusters;
    1575             :                 }
    1576             :         }
    1577             : 
    1578        1664 :         blob_load_backing_dev(seq, ctx);
    1579             : }
    1580             : 
    1581             : static void
    1582        3378 : blob_load_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1583             : {
    1584        3378 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1585        3378 :         struct spdk_blob                *blob = ctx->blob;
    1586             :         struct spdk_blob_md_page        *page;
    1587             :         int                             rc;
    1588             :         uint32_t                        crc;
    1589             :         uint32_t                        current_page;
    1590             : 
    1591        3378 :         if (ctx->num_pages == 1) {
    1592        3278 :                 current_page = bs_blobid_to_page(blob->id);
    1593             :         } else {
    1594         100 :                 assert(ctx->num_pages != 0);
    1595         100 :                 page = &ctx->pages[ctx->num_pages - 2];
    1596         100 :                 current_page = page->next;
    1597             :         }
    1598             : 
    1599        3378 :         if (bserrno) {
    1600          20 :                 SPDK_ERRLOG("Metadata page %d read failed for blobid 0x%" PRIx64 ": %d\n",
    1601             :                             current_page, blob->id, bserrno);
    1602          20 :                 blob_load_final(ctx, bserrno);
    1603          20 :                 return;
    1604             :         }
    1605             : 
    1606        3358 :         page = &ctx->pages[ctx->num_pages - 1];
    1607        3358 :         crc = blob_md_page_calc_crc(page);
    1608        3358 :         if (crc != page->crc) {
    1609           8 :                 SPDK_ERRLOG("Metadata page %d crc mismatch for blobid 0x%" PRIx64 "\n",
    1610             :                             current_page, blob->id);
    1611           8 :                 blob_load_final(ctx, -EINVAL);
    1612           8 :                 return;
    1613             :         }
    1614             : 
    1615        3350 :         if (page->next != SPDK_INVALID_MD_PAGE) {
    1616             :                 struct spdk_blob_md_page *tmp_pages;
    1617         100 :                 uint32_t next_page = page->next;
    1618         100 :                 uint64_t next_lba = bs_md_page_to_lba(blob->bs, next_page);
    1619             : 
    1620             :                 /* Read the next page */
    1621         100 :                 tmp_pages = spdk_realloc(ctx->pages, (sizeof(*page) * (ctx->num_pages + 1)), 0);
    1622         100 :                 if (tmp_pages == NULL) {
    1623           0 :                         blob_load_final(ctx, -ENOMEM);
    1624           0 :                         return;
    1625             :                 }
    1626         100 :                 ctx->num_pages++;
    1627         100 :                 ctx->pages = tmp_pages;
    1628             : 
    1629         100 :                 bs_sequence_read_dev(seq, &ctx->pages[ctx->num_pages - 1],
    1630             :                                      next_lba,
    1631         100 :                                      bs_byte_to_lba(blob->bs, sizeof(*page)),
    1632             :                                      blob_load_cpl, ctx);
    1633         100 :                 return;
    1634             :         }
    1635             : 
    1636             :         /* Parse the pages */
    1637        3250 :         rc = blob_parse(ctx->pages, ctx->num_pages, blob);
    1638        3250 :         if (rc) {
    1639          12 :                 blob_load_final(ctx, rc);
    1640          12 :                 return;
    1641             :         }
    1642             : 
    1643        3238 :         if (blob->extent_table_found == true) {
    1644             :                 /* If EXTENT_TABLE was found, that means support for it should be enabled. */
    1645        1670 :                 assert(blob->extent_rle_found == false);
    1646        1670 :                 blob->use_extent_table = true;
    1647             :         } else {
    1648             :                 /* If EXTENT_RLE or no extent_* descriptor was found disable support
    1649             :                  * for extent table. No extent_* descriptors means that blob has length of 0
    1650             :                  * and no extent_rle descriptors were persisted for it.
    1651             :                  * EXTENT_TABLE if used, is always present in metadata regardless of length. */
    1652        1568 :                 blob->use_extent_table = false;
    1653             :         }
    1654             : 
    1655             :         /* Check the clear_method stored in metadata vs what may have been passed
    1656             :          * via spdk_bs_open_blob_ext() and update accordingly.
    1657             :          */
    1658        3238 :         blob_update_clear_method(blob);
    1659             : 
    1660        3238 :         spdk_free(ctx->pages);
    1661        3238 :         ctx->pages = NULL;
    1662             : 
    1663        3238 :         if (blob->extent_table_found) {
    1664        1670 :                 blob_load_cpl_extents_cpl(seq, ctx, 0);
    1665             :         } else {
    1666        1568 :                 blob_load_backing_dev(seq, ctx);
    1667             :         }
    1668             : }
    1669             : 
    1670             : /* Load a blob from disk given a blobid */
    1671             : static void
    1672        3278 : blob_load(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
    1673             :           spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    1674             : {
    1675             :         struct spdk_blob_load_ctx *ctx;
    1676             :         struct spdk_blob_store *bs;
    1677             :         uint32_t page_num;
    1678             :         uint64_t lba;
    1679             : 
    1680        3278 :         blob_verify_md_op(blob);
    1681             : 
    1682        3278 :         bs = blob->bs;
    1683             : 
    1684        3278 :         ctx = calloc(1, sizeof(*ctx));
    1685        3278 :         if (!ctx) {
    1686           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    1687           0 :                 return;
    1688             :         }
    1689             : 
    1690        3278 :         ctx->blob = blob;
    1691        3278 :         ctx->pages = spdk_realloc(ctx->pages, SPDK_BS_PAGE_SIZE, 0);
    1692        3278 :         if (!ctx->pages) {
    1693           0 :                 free(ctx);
    1694           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    1695           0 :                 return;
    1696             :         }
    1697        3278 :         ctx->num_pages = 1;
    1698        3278 :         ctx->cb_fn = cb_fn;
    1699        3278 :         ctx->cb_arg = cb_arg;
    1700        3278 :         ctx->seq = seq;
    1701             : 
    1702        3278 :         page_num = bs_blobid_to_page(blob->id);
    1703        3278 :         lba = bs_md_page_to_lba(blob->bs, page_num);
    1704             : 
    1705        3278 :         blob->state = SPDK_BLOB_STATE_LOADING;
    1706             : 
    1707        3278 :         bs_sequence_read_dev(seq, &ctx->pages[0], lba,
    1708        3278 :                              bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE),
    1709             :                              blob_load_cpl, ctx);
    1710             : }
    1711             : 
    1712             : struct spdk_blob_persist_ctx {
    1713             :         struct spdk_blob                *blob;
    1714             : 
    1715             :         struct spdk_blob_md_page        *pages;
    1716             :         uint32_t                        next_extent_page;
    1717             :         struct spdk_blob_md_page        *extent_page;
    1718             : 
    1719             :         spdk_bs_sequence_t              *seq;
    1720             :         spdk_bs_sequence_cpl            cb_fn;
    1721             :         void                            *cb_arg;
    1722             :         TAILQ_ENTRY(spdk_blob_persist_ctx) link;
    1723             : };
    1724             : 
    1725             : static void
    1726        1242 : bs_batch_clear_dev(struct spdk_blob_persist_ctx *ctx, spdk_bs_batch_t *batch, uint64_t lba,
    1727             :                    uint64_t lba_count)
    1728             : {
    1729        1242 :         switch (ctx->blob->clear_method) {
    1730        1242 :         case BLOB_CLEAR_WITH_DEFAULT:
    1731             :         case BLOB_CLEAR_WITH_UNMAP:
    1732        1242 :                 bs_batch_unmap_dev(batch, lba, lba_count);
    1733        1242 :                 break;
    1734           0 :         case BLOB_CLEAR_WITH_WRITE_ZEROES:
    1735           0 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    1736           0 :                 break;
    1737           0 :         case BLOB_CLEAR_WITH_NONE:
    1738             :         default:
    1739           0 :                 break;
    1740             :         }
    1741        1242 : }
    1742             : 
    1743             : static int
    1744        1124 : bs_super_validate(struct spdk_bs_super_block *super, struct spdk_blob_store *bs)
    1745             : {
    1746             :         uint32_t        crc;
    1747             :         static const char zeros[SPDK_BLOBSTORE_TYPE_LENGTH];
    1748             : 
    1749        1124 :         if (super->version > SPDK_BS_VERSION ||
    1750        1120 :             super->version < SPDK_BS_INITIAL_VERSION) {
    1751           8 :                 return -EILSEQ;
    1752             :         }
    1753             : 
    1754        1116 :         if (memcmp(super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    1755             :                    sizeof(super->signature)) != 0) {
    1756           0 :                 return -EILSEQ;
    1757             :         }
    1758             : 
    1759        1116 :         crc = blob_md_page_calc_crc(super);
    1760        1116 :         if (crc != super->crc) {
    1761           4 :                 return -EILSEQ;
    1762             :         }
    1763             : 
    1764        1112 :         if (memcmp(&bs->bstype, &super->bstype, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
    1765        1098 :                 SPDK_DEBUGLOG(blob, "Bstype matched - loading blobstore\n");
    1766          14 :         } else if (memcmp(&bs->bstype, zeros, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
    1767           6 :                 SPDK_DEBUGLOG(blob, "Bstype wildcard used - loading blobstore regardless bstype\n");
    1768             :         } else {
    1769           8 :                 SPDK_DEBUGLOG(blob, "Unexpected bstype\n");
    1770           8 :                 SPDK_LOGDUMP(blob, "Expected:", bs->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
    1771           8 :                 SPDK_LOGDUMP(blob, "Found:", super->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
    1772           8 :                 return -ENXIO;
    1773             :         }
    1774             : 
    1775        1104 :         if (super->size > bs->dev->blockcnt * bs->dev->blocklen) {
    1776           8 :                 SPDK_NOTICELOG("Size mismatch, dev size: %" PRIu64 ", blobstore size: %" PRIu64 "\n",
    1777             :                                bs->dev->blockcnt * bs->dev->blocklen, super->size);
    1778           8 :                 return -EILSEQ;
    1779             :         }
    1780             : 
    1781        1096 :         return 0;
    1782             : }
    1783             : 
    1784             : static void bs_mark_dirty(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    1785             :                           spdk_bs_sequence_cpl cb_fn, void *cb_arg);
    1786             : 
    1787             : static void
    1788        4676 : blob_persist_complete_cb(void *arg)
    1789             : {
    1790        4676 :         struct spdk_blob_persist_ctx *ctx = arg;
    1791             : 
    1792             :         /* Call user callback */
    1793        4676 :         ctx->cb_fn(ctx->seq, ctx->cb_arg, 0);
    1794             : 
    1795             :         /* Free the memory */
    1796        4676 :         spdk_free(ctx->pages);
    1797        4676 :         free(ctx);
    1798        4676 : }
    1799             : 
    1800             : static void blob_persist_start(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
    1801             : 
    1802             : static void
    1803        4676 : blob_persist_complete(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx, int bserrno)
    1804             : {
    1805             :         struct spdk_blob_persist_ctx    *next_persist, *tmp;
    1806        4676 :         struct spdk_blob                *blob = ctx->blob;
    1807             : 
    1808        4676 :         if (bserrno == 0) {
    1809        4624 :                 blob_mark_clean(blob);
    1810             :         }
    1811             : 
    1812        4676 :         assert(ctx == TAILQ_FIRST(&blob->persists_to_complete));
    1813             : 
    1814             :         /* Complete all persists that were pending when the current persist started */
    1815        9352 :         TAILQ_FOREACH_SAFE(next_persist, &blob->persists_to_complete, link, tmp) {
    1816        4676 :                 TAILQ_REMOVE(&blob->persists_to_complete, next_persist, link);
    1817        4676 :                 spdk_thread_send_msg(spdk_get_thread(), blob_persist_complete_cb, next_persist);
    1818             :         }
    1819             : 
    1820        4676 :         if (TAILQ_EMPTY(&blob->pending_persists)) {
    1821        4655 :                 return;
    1822             :         }
    1823             : 
    1824             :         /* Queue up all pending persists for completion and start blob persist with first one */
    1825          21 :         TAILQ_SWAP(&blob->persists_to_complete, &blob->pending_persists, spdk_blob_persist_ctx, link);
    1826          21 :         next_persist = TAILQ_FIRST(&blob->persists_to_complete);
    1827             : 
    1828          21 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    1829          21 :         bs_mark_dirty(seq, blob->bs, blob_persist_start, next_persist);
    1830             : }
    1831             : 
    1832             : static void
    1833        4624 : blob_persist_clear_extents_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1834             : {
    1835        4624 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    1836        4624 :         struct spdk_blob                *blob = ctx->blob;
    1837        4624 :         struct spdk_blob_store          *bs = blob->bs;
    1838             :         size_t                          i;
    1839             : 
    1840        4624 :         if (bserrno != 0) {
    1841           0 :                 blob_persist_complete(seq, ctx, bserrno);
    1842           0 :                 return;
    1843             :         }
    1844             : 
    1845        4624 :         spdk_spin_lock(&bs->used_lock);
    1846             : 
    1847             :         /* Release all extent_pages that were truncated */
    1848        6342 :         for (i = blob->active.num_extent_pages; i < blob->active.extent_pages_array_size; i++) {
    1849             :                 /* Nothing to release if it was not allocated */
    1850        1718 :                 if (blob->active.extent_pages[i] != 0) {
    1851         620 :                         bs_release_md_page(bs, blob->active.extent_pages[i]);
    1852             :                 }
    1853             :         }
    1854             : 
    1855        4624 :         spdk_spin_unlock(&bs->used_lock);
    1856             : 
    1857        4624 :         if (blob->active.num_extent_pages == 0) {
    1858        3289 :                 free(blob->active.extent_pages);
    1859        3289 :                 blob->active.extent_pages = NULL;
    1860        3289 :                 blob->active.extent_pages_array_size = 0;
    1861        1335 :         } else if (blob->active.num_extent_pages != blob->active.extent_pages_array_size) {
    1862             : #ifndef __clang_analyzer__
    1863             :                 void *tmp;
    1864             : 
    1865             :                 /* scan-build really can't figure reallocs, workaround it */
    1866           2 :                 tmp = realloc(blob->active.extent_pages, sizeof(uint32_t) * blob->active.num_extent_pages);
    1867           2 :                 assert(tmp != NULL);
    1868           2 :                 blob->active.extent_pages = tmp;
    1869             : #endif
    1870           2 :                 blob->active.extent_pages_array_size = blob->active.num_extent_pages;
    1871             :         }
    1872             : 
    1873        4624 :         blob_persist_complete(seq, ctx, bserrno);
    1874             : }
    1875             : 
    1876             : static void
    1877        4624 : blob_persist_clear_extents(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    1878             : {
    1879        4624 :         struct spdk_blob                *blob = ctx->blob;
    1880        4624 :         struct spdk_blob_store          *bs = blob->bs;
    1881             :         size_t                          i;
    1882             :         uint64_t                        lba;
    1883             :         uint64_t                        lba_count;
    1884             :         spdk_bs_batch_t                 *batch;
    1885             : 
    1886        4624 :         batch = bs_sequence_to_batch(seq, blob_persist_clear_extents_cpl, ctx);
    1887        4624 :         lba_count = bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE);
    1888             : 
    1889             :         /* Clear all extent_pages that were truncated */
    1890        6342 :         for (i = blob->active.num_extent_pages; i < blob->active.extent_pages_array_size; i++) {
    1891             :                 /* Nothing to clear if it was not allocated */
    1892        1718 :                 if (blob->active.extent_pages[i] != 0) {
    1893         620 :                         lba = bs_md_page_to_lba(bs, blob->active.extent_pages[i]);
    1894         620 :                         bs_batch_write_zeroes_dev(batch, lba, lba_count);
    1895             :                 }
    1896             :         }
    1897             : 
    1898        4624 :         bs_batch_close(batch);
    1899        4624 : }
    1900             : 
    1901             : static void
    1902        4624 : blob_persist_clear_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1903             : {
    1904        4624 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    1905        4624 :         struct spdk_blob                *blob = ctx->blob;
    1906        4624 :         struct spdk_blob_store          *bs = blob->bs;
    1907             :         size_t                          i;
    1908             : 
    1909        4624 :         if (bserrno != 0) {
    1910           0 :                 blob_persist_complete(seq, ctx, bserrno);
    1911           0 :                 return;
    1912             :         }
    1913             : 
    1914        4624 :         spdk_spin_lock(&bs->used_lock);
    1915             :         /* Release all clusters that were truncated */
    1916     1073475 :         for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
    1917     1068851 :                 uint32_t cluster_num = bs_lba_to_cluster(bs, blob->active.clusters[i]);
    1918             : 
    1919             :                 /* Nothing to release if it was not allocated */
    1920     1068851 :                 if (blob->active.clusters[i] != 0) {
    1921        2311 :                         bs_release_cluster(bs, cluster_num);
    1922             :                 }
    1923             :         }
    1924        4624 :         spdk_spin_unlock(&bs->used_lock);
    1925             : 
    1926        4624 :         if (blob->active.num_clusters == 0) {
    1927        1776 :                 free(blob->active.clusters);
    1928        1776 :                 blob->active.clusters = NULL;
    1929        1776 :                 blob->active.cluster_array_size = 0;
    1930        2848 :         } else if (blob->active.num_clusters != blob->active.cluster_array_size) {
    1931             : #ifndef __clang_analyzer__
    1932             :                 void *tmp;
    1933             : 
    1934             :                 /* scan-build really can't figure reallocs, workaround it */
    1935          10 :                 tmp = realloc(blob->active.clusters, sizeof(*blob->active.clusters) * blob->active.num_clusters);
    1936          10 :                 assert(tmp != NULL);
    1937          10 :                 blob->active.clusters = tmp;
    1938             : 
    1939             : #endif
    1940          10 :                 blob->active.cluster_array_size = blob->active.num_clusters;
    1941             :         }
    1942             : 
    1943             :         /* Move on to clearing extent pages */
    1944        4624 :         blob_persist_clear_extents(seq, ctx);
    1945             : }
    1946             : 
    1947             : static void
    1948        4624 : blob_persist_clear_clusters(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    1949             : {
    1950        4624 :         struct spdk_blob                *blob = ctx->blob;
    1951        4624 :         struct spdk_blob_store          *bs = blob->bs;
    1952             :         spdk_bs_batch_t                 *batch;
    1953             :         size_t                          i;
    1954             :         uint64_t                        lba;
    1955             :         uint64_t                        lba_count;
    1956             : 
    1957             :         /* Clusters don't move around in blobs. The list shrinks or grows
    1958             :          * at the end, but no changes ever occur in the middle of the list.
    1959             :          */
    1960             : 
    1961        4624 :         batch = bs_sequence_to_batch(seq, blob_persist_clear_clusters_cpl, ctx);
    1962             : 
    1963             :         /* Clear all clusters that were truncated */
    1964        4624 :         lba = 0;
    1965        4624 :         lba_count = 0;
    1966     1073475 :         for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
    1967     1068851 :                 uint64_t next_lba = blob->active.clusters[i];
    1968     1068851 :                 uint64_t next_lba_count = bs_cluster_to_lba(bs, 1);
    1969             : 
    1970     1068851 :                 if (next_lba > 0 && (lba + lba_count) == next_lba) {
    1971             :                         /* This cluster is contiguous with the previous one. */
    1972        1069 :                         lba_count += next_lba_count;
    1973        1069 :                         continue;
    1974     1067782 :                 } else if (next_lba == 0) {
    1975     1066540 :                         continue;
    1976             :                 }
    1977             : 
    1978             :                 /* This cluster is not contiguous with the previous one. */
    1979             : 
    1980             :                 /* If a run of LBAs previously existing, clear them now */
    1981        1242 :                 if (lba_count > 0) {
    1982          36 :                         bs_batch_clear_dev(ctx, batch, lba, lba_count);
    1983             :                 }
    1984             : 
    1985             :                 /* Start building the next batch */
    1986        1242 :                 lba = next_lba;
    1987        1242 :                 if (next_lba > 0) {
    1988        1242 :                         lba_count = next_lba_count;
    1989             :                 } else {
    1990           0 :                         lba_count = 0;
    1991             :                 }
    1992             :         }
    1993             : 
    1994             :         /* If we ended with a contiguous set of LBAs, clear them now */
    1995        4624 :         if (lba_count > 0) {
    1996        1206 :                 bs_batch_clear_dev(ctx, batch, lba, lba_count);
    1997             :         }
    1998             : 
    1999        4624 :         bs_batch_close(batch);
    2000        4624 : }
    2001             : 
    2002             : static void
    2003        4628 : blob_persist_zero_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2004             : {
    2005        4628 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2006        4628 :         struct spdk_blob                *blob = ctx->blob;
    2007        4628 :         struct spdk_blob_store          *bs = blob->bs;
    2008             :         size_t                          i;
    2009             : 
    2010        4628 :         if (bserrno != 0) {
    2011           4 :                 blob_persist_complete(seq, ctx, bserrno);
    2012           4 :                 return;
    2013             :         }
    2014             : 
    2015        4624 :         spdk_spin_lock(&bs->used_lock);
    2016             : 
    2017             :         /* This loop starts at 1 because the first page is special and handled
    2018             :          * below. The pages (except the first) are never written in place,
    2019             :          * so any pages in the clean list must be zeroed.
    2020             :          */
    2021        4692 :         for (i = 1; i < blob->clean.num_pages; i++) {
    2022          68 :                 bs_release_md_page(bs, blob->clean.pages[i]);
    2023             :         }
    2024             : 
    2025        4624 :         if (blob->active.num_pages == 0) {
    2026             :                 uint32_t page_num;
    2027             : 
    2028        1416 :                 page_num = bs_blobid_to_page(blob->id);
    2029        1416 :                 bs_release_md_page(bs, page_num);
    2030             :         }
    2031             : 
    2032        4624 :         spdk_spin_unlock(&bs->used_lock);
    2033             : 
    2034             :         /* Move on to clearing clusters */
    2035        4624 :         blob_persist_clear_clusters(seq, ctx);
    2036             : }
    2037             : 
    2038             : static void
    2039        4668 : blob_persist_zero_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2040             : {
    2041        4668 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2042        4668 :         struct spdk_blob                *blob = ctx->blob;
    2043        4668 :         struct spdk_blob_store          *bs = blob->bs;
    2044             :         uint64_t                        lba;
    2045             :         uint64_t                        lba_count;
    2046             :         spdk_bs_batch_t                 *batch;
    2047             :         size_t                          i;
    2048             : 
    2049        4668 :         if (bserrno != 0) {
    2050          40 :                 blob_persist_complete(seq, ctx, bserrno);
    2051          40 :                 return;
    2052             :         }
    2053             : 
    2054        4628 :         batch = bs_sequence_to_batch(seq, blob_persist_zero_pages_cpl, ctx);
    2055             : 
    2056        4628 :         lba_count = bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE);
    2057             : 
    2058             :         /* This loop starts at 1 because the first page is special and handled
    2059             :          * below. The pages (except the first) are never written in place,
    2060             :          * so any pages in the clean list must be zeroed.
    2061             :          */
    2062        4696 :         for (i = 1; i < blob->clean.num_pages; i++) {
    2063          68 :                 lba = bs_md_page_to_lba(bs, blob->clean.pages[i]);
    2064             : 
    2065          68 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    2066             :         }
    2067             : 
    2068             :         /* The first page will only be zeroed if this is a delete. */
    2069        4628 :         if (blob->active.num_pages == 0) {
    2070             :                 uint32_t page_num;
    2071             : 
    2072             :                 /* The first page in the metadata goes where the blobid indicates */
    2073        1420 :                 page_num = bs_blobid_to_page(blob->id);
    2074        1420 :                 lba = bs_md_page_to_lba(bs, page_num);
    2075             : 
    2076        1420 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    2077             :         }
    2078             : 
    2079        4628 :         bs_batch_close(batch);
    2080             : }
    2081             : 
    2082             : static void
    2083        3248 : blob_persist_write_page_root(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2084             : {
    2085        3248 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2086        3248 :         struct spdk_blob                *blob = ctx->blob;
    2087        3248 :         struct spdk_blob_store          *bs = blob->bs;
    2088             :         uint64_t                        lba;
    2089             :         uint32_t                        lba_count;
    2090             :         struct spdk_blob_md_page        *page;
    2091             : 
    2092        3248 :         if (bserrno != 0) {
    2093           0 :                 blob_persist_complete(seq, ctx, bserrno);
    2094           0 :                 return;
    2095             :         }
    2096             : 
    2097        3248 :         if (blob->active.num_pages == 0) {
    2098             :                 /* Move on to the next step */
    2099           0 :                 blob_persist_zero_pages(seq, ctx, 0);
    2100           0 :                 return;
    2101             :         }
    2102             : 
    2103        3248 :         lba_count = bs_byte_to_lba(bs, sizeof(*page));
    2104             : 
    2105        3248 :         page = &ctx->pages[0];
    2106             :         /* The first page in the metadata goes where the blobid indicates */
    2107        3248 :         lba = bs_md_page_to_lba(bs, bs_blobid_to_page(blob->id));
    2108             : 
    2109        3248 :         bs_sequence_write_dev(seq, page, lba, lba_count,
    2110             :                               blob_persist_zero_pages, ctx);
    2111             : }
    2112             : 
    2113             : static void
    2114        3248 : blob_persist_write_page_chain(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    2115             : {
    2116        3248 :         struct spdk_blob                *blob = ctx->blob;
    2117        3248 :         struct spdk_blob_store          *bs = blob->bs;
    2118             :         uint64_t                        lba;
    2119             :         uint32_t                        lba_count;
    2120             :         struct spdk_blob_md_page        *page;
    2121             :         spdk_bs_batch_t                 *batch;
    2122             :         size_t                          i;
    2123             : 
    2124             :         /* Clusters don't move around in blobs. The list shrinks or grows
    2125             :          * at the end, but no changes ever occur in the middle of the list.
    2126             :          */
    2127             : 
    2128        3248 :         lba_count = bs_byte_to_lba(bs, sizeof(*page));
    2129             : 
    2130        3248 :         batch = bs_sequence_to_batch(seq, blob_persist_write_page_root, ctx);
    2131             : 
    2132             :         /* This starts at 1. The root page is not written until
    2133             :          * all of the others are finished
    2134             :          */
    2135        3336 :         for (i = 1; i < blob->active.num_pages; i++) {
    2136          88 :                 page = &ctx->pages[i];
    2137          88 :                 assert(page->sequence_num == i);
    2138             : 
    2139          88 :                 lba = bs_md_page_to_lba(bs, blob->active.pages[i]);
    2140             : 
    2141          88 :                 bs_batch_write_dev(batch, page, lba, lba_count);
    2142             :         }
    2143             : 
    2144        3248 :         bs_batch_close(batch);
    2145        3248 : }
    2146             : 
    2147             : static int
    2148        3392 : blob_resize(struct spdk_blob *blob, uint64_t sz)
    2149             : {
    2150             :         uint64_t        i;
    2151             :         uint64_t        *tmp;
    2152        3392 :         uint64_t        cluster;
    2153        3392 :         uint32_t        lfmd; /*  lowest free md page */
    2154             :         uint64_t        num_clusters;
    2155             :         uint32_t        *ep_tmp;
    2156        3392 :         uint64_t        new_num_ep = 0, current_num_ep = 0;
    2157             :         struct spdk_blob_store *bs;
    2158             :         int             rc;
    2159             : 
    2160        3392 :         bs = blob->bs;
    2161             : 
    2162        3392 :         blob_verify_md_op(blob);
    2163             : 
    2164        3392 :         if (blob->active.num_clusters == sz) {
    2165         376 :                 return 0;
    2166             :         }
    2167             : 
    2168        3016 :         if (blob->active.num_clusters < blob->active.cluster_array_size) {
    2169             :                 /* If this blob was resized to be larger, then smaller, then
    2170             :                  * larger without syncing, then the cluster array already
    2171             :                  * contains spare assigned clusters we can use.
    2172             :                  */
    2173           0 :                 num_clusters = spdk_min(blob->active.cluster_array_size,
    2174             :                                         sz);
    2175             :         } else {
    2176        3016 :                 num_clusters = blob->active.num_clusters;
    2177             :         }
    2178             : 
    2179        3016 :         if (blob->use_extent_table) {
    2180             :                 /* Round up since every cluster beyond current Extent Table size,
    2181             :                  * requires new extent page. */
    2182        1530 :                 new_num_ep = spdk_divide_round_up(sz, SPDK_EXTENTS_PER_EP);
    2183        1530 :                 current_num_ep = spdk_divide_round_up(num_clusters, SPDK_EXTENTS_PER_EP);
    2184             :         }
    2185             : 
    2186        3016 :         assert(!spdk_spin_held(&bs->used_lock));
    2187             : 
    2188             :         /* Check first that we have enough clusters and md pages before we start claiming them.
    2189             :          * bs->used_lock is held to ensure that clusters we think are free are still free when we go
    2190             :          * to claim them later in this function.
    2191             :          */
    2192        3016 :         if (sz > num_clusters && spdk_blob_is_thin_provisioned(blob) == false) {
    2193        1298 :                 spdk_spin_lock(&bs->used_lock);
    2194        1298 :                 if ((sz - num_clusters) > bs->num_free_clusters) {
    2195           8 :                         rc = -ENOSPC;
    2196           8 :                         goto out;
    2197             :                 }
    2198        1290 :                 lfmd = 0;
    2199        1932 :                 for (i = current_num_ep; i < new_num_ep ; i++) {
    2200         642 :                         lfmd = spdk_bit_array_find_first_clear(blob->bs->used_md_pages, lfmd);
    2201         642 :                         if (lfmd == UINT32_MAX) {
    2202             :                                 /* No more free md pages. Cannot satisfy the request */
    2203           0 :                                 rc = -ENOSPC;
    2204           0 :                                 goto out;
    2205             :                         }
    2206             :                 }
    2207             :         }
    2208             : 
    2209        3008 :         if (sz > num_clusters) {
    2210             :                 /* Expand the cluster array if necessary.
    2211             :                  * We only shrink the array when persisting.
    2212             :                  */
    2213        1642 :                 tmp = realloc(blob->active.clusters, sizeof(*blob->active.clusters) * sz);
    2214        1642 :                 if (sz > 0 && tmp == NULL) {
    2215           0 :                         rc = -ENOMEM;
    2216           0 :                         goto out;
    2217             :                 }
    2218        1642 :                 memset(tmp + blob->active.cluster_array_size, 0,
    2219        1642 :                        sizeof(*blob->active.clusters) * (sz - blob->active.cluster_array_size));
    2220        1642 :                 blob->active.clusters = tmp;
    2221        1642 :                 blob->active.cluster_array_size = sz;
    2222             : 
    2223             :                 /* Expand the extents table, only if enough clusters were added */
    2224        1642 :                 if (new_num_ep > current_num_ep && blob->use_extent_table) {
    2225         814 :                         ep_tmp = realloc(blob->active.extent_pages, sizeof(*blob->active.extent_pages) * new_num_ep);
    2226         814 :                         if (new_num_ep > 0 && ep_tmp == NULL) {
    2227           0 :                                 rc = -ENOMEM;
    2228           0 :                                 goto out;
    2229             :                         }
    2230         814 :                         memset(ep_tmp + blob->active.extent_pages_array_size, 0,
    2231         814 :                                sizeof(*blob->active.extent_pages) * (new_num_ep - blob->active.extent_pages_array_size));
    2232         814 :                         blob->active.extent_pages = ep_tmp;
    2233         814 :                         blob->active.extent_pages_array_size = new_num_ep;
    2234             :                 }
    2235             :         }
    2236             : 
    2237        3008 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    2238             : 
    2239        3008 :         if (spdk_blob_is_thin_provisioned(blob) == false) {
    2240        2424 :                 cluster = 0;
    2241        2424 :                 lfmd = 0;
    2242        9788 :                 for (i = num_clusters; i < sz; i++) {
    2243        7364 :                         bs_allocate_cluster(blob, i, &cluster, &lfmd, true);
    2244             :                         /* Do not increment lfmd here.  lfmd will get updated
    2245             :                          * to the md_page allocated (if any) when a new extent
    2246             :                          * page is needed.  Just pass that value again,
    2247             :                          * bs_allocate_cluster will just start at that index
    2248             :                          * to find the next free md_page when needed.
    2249             :                          */
    2250             :                 }
    2251             :         }
    2252             : 
    2253        3008 :         blob->active.num_clusters = sz;
    2254        3008 :         blob->active.num_extent_pages = new_num_ep;
    2255             : 
    2256        3008 :         rc = 0;
    2257        3016 : out:
    2258        3016 :         if (spdk_spin_held(&bs->used_lock)) {
    2259        1298 :                 spdk_spin_unlock(&bs->used_lock);
    2260             :         }
    2261             : 
    2262        3016 :         return rc;
    2263             : }
    2264             : 
    2265             : static void
    2266        3248 : blob_persist_generate_new_md(struct spdk_blob_persist_ctx *ctx)
    2267             : {
    2268        3248 :         spdk_bs_sequence_t *seq = ctx->seq;
    2269        3248 :         struct spdk_blob *blob = ctx->blob;
    2270        3248 :         struct spdk_blob_store *bs = blob->bs;
    2271             :         uint64_t i;
    2272             :         uint32_t page_num;
    2273             :         void *tmp;
    2274             :         int rc;
    2275             : 
    2276             :         /* Generate the new metadata */
    2277        3248 :         rc = blob_serialize(blob, &ctx->pages, &blob->active.num_pages);
    2278        3248 :         if (rc < 0) {
    2279           0 :                 blob_persist_complete(seq, ctx, rc);
    2280           0 :                 return;
    2281             :         }
    2282             : 
    2283        3248 :         assert(blob->active.num_pages >= 1);
    2284             : 
    2285             :         /* Resize the cache of page indices */
    2286        3248 :         tmp = realloc(blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
    2287        3248 :         if (!tmp) {
    2288           0 :                 blob_persist_complete(seq, ctx, -ENOMEM);
    2289           0 :                 return;
    2290             :         }
    2291        3248 :         blob->active.pages = tmp;
    2292             : 
    2293             :         /* Assign this metadata to pages. This requires two passes - one to verify that there are
    2294             :          * enough pages and a second to actually claim them. The used_lock is held across
    2295             :          * both passes to ensure things don't change in the middle.
    2296             :          */
    2297        3248 :         spdk_spin_lock(&bs->used_lock);
    2298        3248 :         page_num = 0;
    2299             :         /* Note that this loop starts at one. The first page location is fixed by the blobid. */
    2300        3336 :         for (i = 1; i < blob->active.num_pages; i++) {
    2301          88 :                 page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
    2302          88 :                 if (page_num == UINT32_MAX) {
    2303           0 :                         spdk_spin_unlock(&bs->used_lock);
    2304           0 :                         blob_persist_complete(seq, ctx, -ENOMEM);
    2305           0 :                         return;
    2306             :                 }
    2307          88 :                 page_num++;
    2308             :         }
    2309             : 
    2310        3248 :         page_num = 0;
    2311        3248 :         blob->active.pages[0] = bs_blobid_to_page(blob->id);
    2312        3336 :         for (i = 1; i < blob->active.num_pages; i++) {
    2313          88 :                 page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
    2314          88 :                 ctx->pages[i - 1].next = page_num;
    2315             :                 /* Now that previous metadata page is complete, calculate the crc for it. */
    2316          88 :                 ctx->pages[i - 1].crc = blob_md_page_calc_crc(&ctx->pages[i - 1]);
    2317          88 :                 blob->active.pages[i] = page_num;
    2318          88 :                 bs_claim_md_page(bs, page_num);
    2319          88 :                 SPDK_DEBUGLOG(blob, "Claiming page %u for blob 0x%" PRIx64 "\n", page_num,
    2320             :                               blob->id);
    2321          88 :                 page_num++;
    2322             :         }
    2323        3248 :         spdk_spin_unlock(&bs->used_lock);
    2324        3248 :         ctx->pages[i - 1].crc = blob_md_page_calc_crc(&ctx->pages[i - 1]);
    2325             :         /* Start writing the metadata from last page to first */
    2326        3248 :         blob->state = SPDK_BLOB_STATE_CLEAN;
    2327        3248 :         blob_persist_write_page_chain(seq, ctx);
    2328             : }
    2329             : 
    2330             : static void
    2331        2282 : blob_persist_write_extent_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2332             : {
    2333        2282 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2334        2282 :         struct spdk_blob                *blob = ctx->blob;
    2335             :         size_t                          i;
    2336             :         uint32_t                        extent_page_id;
    2337        2282 :         uint32_t                        page_count = 0;
    2338             :         int                             rc;
    2339             : 
    2340        2282 :         if (ctx->extent_page != NULL) {
    2341         658 :                 spdk_free(ctx->extent_page);
    2342         658 :                 ctx->extent_page = NULL;
    2343             :         }
    2344             : 
    2345        2282 :         if (bserrno != 0) {
    2346           0 :                 blob_persist_complete(seq, ctx, bserrno);
    2347           0 :                 return;
    2348             :         }
    2349             : 
    2350             :         /* Only write out Extent Pages when blob was resized. */
    2351        4512 :         for (i = ctx->next_extent_page; i < blob->active.extent_pages_array_size; i++) {
    2352        2888 :                 extent_page_id = blob->active.extent_pages[i];
    2353        2888 :                 if (extent_page_id == 0) {
    2354             :                         /* No Extent Page to persist */
    2355        2230 :                         assert(spdk_blob_is_thin_provisioned(blob));
    2356        2230 :                         continue;
    2357             :                 }
    2358         658 :                 assert(spdk_bit_array_get(blob->bs->used_md_pages, extent_page_id));
    2359         658 :                 ctx->next_extent_page = i + 1;
    2360         658 :                 rc = blob_serialize_add_page(ctx->blob, &ctx->extent_page, &page_count, &ctx->extent_page);
    2361         658 :                 if (rc < 0) {
    2362           0 :                         blob_persist_complete(seq, ctx, rc);
    2363           0 :                         return;
    2364             :                 }
    2365             : 
    2366         658 :                 blob->state = SPDK_BLOB_STATE_DIRTY;
    2367         658 :                 blob_serialize_extent_page(blob, i * SPDK_EXTENTS_PER_EP, ctx->extent_page);
    2368             : 
    2369         658 :                 ctx->extent_page->crc = blob_md_page_calc_crc(ctx->extent_page);
    2370             : 
    2371         658 :                 bs_sequence_write_dev(seq, ctx->extent_page, bs_md_page_to_lba(blob->bs, extent_page_id),
    2372         658 :                                       bs_byte_to_lba(blob->bs, SPDK_BS_PAGE_SIZE),
    2373             :                                       blob_persist_write_extent_pages, ctx);
    2374         658 :                 return;
    2375             :         }
    2376             : 
    2377        1624 :         blob_persist_generate_new_md(ctx);
    2378             : }
    2379             : 
    2380             : static void
    2381        4676 : blob_persist_start(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2382             : {
    2383        4676 :         struct spdk_blob_persist_ctx *ctx = cb_arg;
    2384        4676 :         struct spdk_blob *blob = ctx->blob;
    2385             : 
    2386        4676 :         if (bserrno != 0) {
    2387           8 :                 blob_persist_complete(seq, ctx, bserrno);
    2388           8 :                 return;
    2389             :         }
    2390             : 
    2391        4668 :         if (blob->active.num_pages == 0) {
    2392             :                 /* This is the signal that the blob should be deleted.
    2393             :                  * Immediately jump to the clean up routine. */
    2394        1420 :                 assert(blob->clean.num_pages > 0);
    2395        1420 :                 blob->state = SPDK_BLOB_STATE_CLEAN;
    2396        1420 :                 blob_persist_zero_pages(seq, ctx, 0);
    2397        1420 :                 return;
    2398             : 
    2399             :         }
    2400             : 
    2401        3248 :         if (blob->clean.num_clusters < blob->active.num_clusters) {
    2402             :                 /* Blob was resized up */
    2403        1614 :                 assert(blob->clean.num_extent_pages <= blob->active.num_extent_pages);
    2404        1614 :                 ctx->next_extent_page = spdk_max(1, blob->clean.num_extent_pages) - 1;
    2405        1634 :         } else if (blob->active.num_clusters < blob->active.cluster_array_size) {
    2406             :                 /* Blob was resized down */
    2407          10 :                 assert(blob->clean.num_extent_pages >= blob->active.num_extent_pages);
    2408          10 :                 ctx->next_extent_page = spdk_max(1, blob->active.num_extent_pages) - 1;
    2409             :         } else {
    2410             :                 /* No change in size occurred */
    2411        1624 :                 blob_persist_generate_new_md(ctx);
    2412        1624 :                 return;
    2413             :         }
    2414             : 
    2415        1624 :         blob_persist_write_extent_pages(seq, ctx, 0);
    2416             : }
    2417             : 
    2418             : struct spdk_bs_mark_dirty {
    2419             :         struct spdk_blob_store          *bs;
    2420             :         struct spdk_bs_super_block      *super;
    2421             :         spdk_bs_sequence_cpl            cb_fn;
    2422             :         void                            *cb_arg;
    2423             : };
    2424             : 
    2425             : static void
    2426         158 : bs_mark_dirty_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2427             : {
    2428         158 :         struct spdk_bs_mark_dirty *ctx = cb_arg;
    2429             : 
    2430         158 :         if (bserrno == 0) {
    2431         150 :                 ctx->bs->clean = 0;
    2432             :         }
    2433             : 
    2434         158 :         ctx->cb_fn(seq, ctx->cb_arg, bserrno);
    2435             : 
    2436         158 :         spdk_free(ctx->super);
    2437         158 :         free(ctx);
    2438         158 : }
    2439             : 
    2440             : static void bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    2441             :                            struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg);
    2442             : 
    2443             : 
    2444             : static void
    2445         158 : bs_mark_dirty_write(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2446             : {
    2447         158 :         struct spdk_bs_mark_dirty *ctx = cb_arg;
    2448             :         int rc;
    2449             : 
    2450         158 :         if (bserrno != 0) {
    2451           4 :                 bs_mark_dirty_write_cpl(seq, ctx, bserrno);
    2452           4 :                 return;
    2453             :         }
    2454             : 
    2455         154 :         rc = bs_super_validate(ctx->super, ctx->bs);
    2456         154 :         if (rc != 0) {
    2457           0 :                 bs_mark_dirty_write_cpl(seq, ctx, rc);
    2458           0 :                 return;
    2459             :         }
    2460             : 
    2461         154 :         ctx->super->clean = 0;
    2462         154 :         if (ctx->super->size == 0) {
    2463           4 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    2464             :         }
    2465             : 
    2466         154 :         bs_write_super(seq, ctx->bs, ctx->super, bs_mark_dirty_write_cpl, ctx);
    2467             : }
    2468             : 
    2469             : static void
    2470        4982 : bs_mark_dirty(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    2471             :               spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    2472             : {
    2473             :         struct spdk_bs_mark_dirty *ctx;
    2474             : 
    2475             :         /* Blobstore is already marked dirty */
    2476        4982 :         if (bs->clean == 0) {
    2477        4824 :                 cb_fn(seq, cb_arg, 0);
    2478        4824 :                 return;
    2479             :         }
    2480             : 
    2481         158 :         ctx = calloc(1, sizeof(*ctx));
    2482         158 :         if (!ctx) {
    2483           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2484           0 :                 return;
    2485             :         }
    2486         158 :         ctx->bs = bs;
    2487         158 :         ctx->cb_fn = cb_fn;
    2488         158 :         ctx->cb_arg = cb_arg;
    2489             : 
    2490         158 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    2491             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    2492         158 :         if (!ctx->super) {
    2493           0 :                 free(ctx);
    2494           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2495           0 :                 return;
    2496             :         }
    2497             : 
    2498         158 :         bs_sequence_read_dev(seq, ctx->super, bs_page_to_lba(bs, 0),
    2499         158 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    2500             :                              bs_mark_dirty_write, ctx);
    2501             : }
    2502             : 
    2503             : /* Write a blob to disk */
    2504             : static void
    2505        8364 : blob_persist(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
    2506             :              spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    2507             : {
    2508             :         struct spdk_blob_persist_ctx *ctx;
    2509             : 
    2510        8364 :         blob_verify_md_op(blob);
    2511             : 
    2512        8364 :         if (blob->state == SPDK_BLOB_STATE_CLEAN && TAILQ_EMPTY(&blob->persists_to_complete)) {
    2513        3688 :                 cb_fn(seq, cb_arg, 0);
    2514        3688 :                 return;
    2515             :         }
    2516             : 
    2517        4676 :         ctx = calloc(1, sizeof(*ctx));
    2518        4676 :         if (!ctx) {
    2519           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2520           0 :                 return;
    2521             :         }
    2522        4676 :         ctx->blob = blob;
    2523        4676 :         ctx->seq = seq;
    2524        4676 :         ctx->cb_fn = cb_fn;
    2525        4676 :         ctx->cb_arg = cb_arg;
    2526             : 
    2527             :         /* Multiple blob persists can affect one another, via blob->state or
    2528             :          * blob mutable data changes. To prevent it, queue up the persists. */
    2529        4676 :         if (!TAILQ_EMPTY(&blob->persists_to_complete)) {
    2530          21 :                 TAILQ_INSERT_TAIL(&blob->pending_persists, ctx, link);
    2531          21 :                 return;
    2532             :         }
    2533        4655 :         TAILQ_INSERT_HEAD(&blob->persists_to_complete, ctx, link);
    2534             : 
    2535        4655 :         bs_mark_dirty(seq, blob->bs, blob_persist_start, ctx);
    2536             : }
    2537             : 
    2538             : struct spdk_blob_copy_cluster_ctx {
    2539             :         struct spdk_blob *blob;
    2540             :         uint8_t *buf;
    2541             :         uint64_t page;
    2542             :         uint64_t new_cluster;
    2543             :         uint32_t new_extent_page;
    2544             :         spdk_bs_sequence_t *seq;
    2545             :         struct spdk_blob_md_page *new_cluster_page;
    2546             : };
    2547             : 
    2548             : static void
    2549         608 : blob_allocate_and_copy_cluster_cpl(void *cb_arg, int bserrno)
    2550             : {
    2551         608 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2552         608 :         struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)ctx->seq;
    2553         608 :         TAILQ_HEAD(, spdk_bs_request_set) requests;
    2554             :         spdk_bs_user_op_t *op;
    2555             : 
    2556         608 :         TAILQ_INIT(&requests);
    2557         608 :         TAILQ_SWAP(&set->channel->need_cluster_alloc, &requests, spdk_bs_request_set, link);
    2558             : 
    2559        1216 :         while (!TAILQ_EMPTY(&requests)) {
    2560         608 :                 op = TAILQ_FIRST(&requests);
    2561         608 :                 TAILQ_REMOVE(&requests, op, link);
    2562         608 :                 if (bserrno == 0) {
    2563         608 :                         bs_user_op_execute(op);
    2564             :                 } else {
    2565           0 :                         bs_user_op_abort(op, bserrno);
    2566             :                 }
    2567             :         }
    2568             : 
    2569         608 :         spdk_free(ctx->buf);
    2570         608 :         free(ctx);
    2571         608 : }
    2572             : 
    2573             : static void
    2574         608 : blob_insert_cluster_cpl(void *cb_arg, int bserrno)
    2575             : {
    2576         608 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2577             : 
    2578         608 :         if (bserrno) {
    2579           4 :                 if (bserrno == -EEXIST) {
    2580             :                         /* The metadata insert failed because another thread
    2581             :                          * allocated the cluster first. Free our cluster
    2582             :                          * but continue without error. */
    2583           4 :                         bserrno = 0;
    2584             :                 }
    2585           4 :                 spdk_spin_lock(&ctx->blob->bs->used_lock);
    2586           4 :                 bs_release_cluster(ctx->blob->bs, ctx->new_cluster);
    2587           4 :                 if (ctx->new_extent_page != 0) {
    2588           2 :                         bs_release_md_page(ctx->blob->bs, ctx->new_extent_page);
    2589             :                 }
    2590           4 :                 spdk_spin_unlock(&ctx->blob->bs->used_lock);
    2591             :         }
    2592             : 
    2593         608 :         bs_sequence_finish(ctx->seq, bserrno);
    2594         608 : }
    2595             : 
    2596             : static void
    2597         296 : blob_write_copy_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2598             : {
    2599         296 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2600             :         uint32_t cluster_number;
    2601             : 
    2602         296 :         if (bserrno) {
    2603             :                 /* The write failed, so jump to the final completion handler */
    2604           0 :                 bs_sequence_finish(seq, bserrno);
    2605           0 :                 return;
    2606             :         }
    2607             : 
    2608         296 :         cluster_number = bs_page_to_cluster(ctx->blob->bs, ctx->page);
    2609             : 
    2610         296 :         blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
    2611             :                                          ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
    2612             : }
    2613             : 
    2614             : static void
    2615         188 : blob_write_copy(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2616             : {
    2617         188 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2618             : 
    2619         188 :         if (bserrno != 0) {
    2620             :                 /* The read failed, so jump to the final completion handler */
    2621           0 :                 bs_sequence_finish(seq, bserrno);
    2622           0 :                 return;
    2623             :         }
    2624             : 
    2625             :         /* Write whole cluster */
    2626         188 :         bs_sequence_write_dev(seq, ctx->buf,
    2627         188 :                               bs_cluster_to_lba(ctx->blob->bs, ctx->new_cluster),
    2628         188 :                               bs_cluster_to_lba(ctx->blob->bs, 1),
    2629             :                               blob_write_copy_cpl, ctx);
    2630             : }
    2631             : 
    2632             : static bool
    2633         608 : blob_can_copy(struct spdk_blob *blob, uint32_t cluster_start_page, uint64_t *base_lba)
    2634             : {
    2635         608 :         uint64_t lba = bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page);
    2636             : 
    2637         872 :         return (!blob_is_esnap_clone(blob) && blob->bs->dev->copy != NULL) &&
    2638         264 :                blob->back_bs_dev->translate_lba(blob->back_bs_dev, lba, base_lba);
    2639             : }
    2640             : 
    2641             : static void
    2642         108 : blob_copy(struct spdk_blob_copy_cluster_ctx *ctx, spdk_bs_user_op_t *op, uint64_t src_lba)
    2643             : {
    2644         108 :         struct spdk_blob *blob = ctx->blob;
    2645         108 :         uint64_t lba_count = bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz);
    2646             : 
    2647         108 :         bs_sequence_copy_dev(ctx->seq,
    2648         108 :                              bs_cluster_to_lba(blob->bs, ctx->new_cluster),
    2649             :                              src_lba,
    2650             :                              lba_count,
    2651             :                              blob_write_copy_cpl, ctx);
    2652         108 : }
    2653             : 
    2654             : static void
    2655         608 : bs_allocate_and_copy_cluster(struct spdk_blob *blob,
    2656             :                              struct spdk_io_channel *_ch,
    2657             :                              uint64_t io_unit, spdk_bs_user_op_t *op)
    2658             : {
    2659         608 :         struct spdk_bs_cpl cpl;
    2660             :         struct spdk_bs_channel *ch;
    2661             :         struct spdk_blob_copy_cluster_ctx *ctx;
    2662             :         uint32_t cluster_start_page;
    2663             :         uint32_t cluster_number;
    2664             :         bool is_zeroes;
    2665             :         bool can_copy;
    2666         608 :         uint64_t copy_src_lba;
    2667             :         int rc;
    2668             : 
    2669         608 :         ch = spdk_io_channel_get_ctx(_ch);
    2670             : 
    2671         608 :         if (!TAILQ_EMPTY(&ch->need_cluster_alloc)) {
    2672             :                 /* There are already operations pending. Queue this user op
    2673             :                  * and return because it will be re-executed when the outstanding
    2674             :                  * cluster allocation completes. */
    2675           0 :                 TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
    2676           0 :                 return;
    2677             :         }
    2678             : 
    2679             :         /* Round the io_unit offset down to the first page in the cluster */
    2680         608 :         cluster_start_page = bs_io_unit_to_cluster_start(blob, io_unit);
    2681             : 
    2682             :         /* Calculate which index in the metadata cluster array the corresponding
    2683             :          * cluster is supposed to be at. */
    2684         608 :         cluster_number = bs_io_unit_to_cluster_number(blob, io_unit);
    2685             : 
    2686         608 :         ctx = calloc(1, sizeof(*ctx));
    2687         608 :         if (!ctx) {
    2688           0 :                 bs_user_op_abort(op, -ENOMEM);
    2689           0 :                 return;
    2690             :         }
    2691             : 
    2692         608 :         assert(blob->bs->cluster_sz % blob->back_bs_dev->blocklen == 0);
    2693             : 
    2694         608 :         ctx->blob = blob;
    2695         608 :         ctx->page = cluster_start_page;
    2696         608 :         ctx->new_cluster_page = ch->new_cluster_page;
    2697         608 :         memset(ctx->new_cluster_page, 0, SPDK_BS_PAGE_SIZE);
    2698         608 :         can_copy = blob_can_copy(blob, cluster_start_page, &copy_src_lba);
    2699             : 
    2700         608 :         is_zeroes = blob->back_bs_dev->is_zeroes(blob->back_bs_dev,
    2701             :                         bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
    2702         608 :                         bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz));
    2703         608 :         if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes && !can_copy) {
    2704         188 :                 ctx->buf = spdk_malloc(blob->bs->cluster_sz, blob->back_bs_dev->blocklen,
    2705             :                                        NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    2706         188 :                 if (!ctx->buf) {
    2707           0 :                         SPDK_ERRLOG("DMA allocation for cluster of size = %" PRIu32 " failed.\n",
    2708             :                                     blob->bs->cluster_sz);
    2709           0 :                         free(ctx);
    2710           0 :                         bs_user_op_abort(op, -ENOMEM);
    2711           0 :                         return;
    2712             :                 }
    2713             :         }
    2714             : 
    2715         608 :         spdk_spin_lock(&blob->bs->used_lock);
    2716         608 :         rc = bs_allocate_cluster(blob, cluster_number, &ctx->new_cluster, &ctx->new_extent_page,
    2717             :                                  false);
    2718         608 :         spdk_spin_unlock(&blob->bs->used_lock);
    2719         608 :         if (rc != 0) {
    2720           0 :                 spdk_free(ctx->buf);
    2721           0 :                 free(ctx);
    2722           0 :                 bs_user_op_abort(op, rc);
    2723           0 :                 return;
    2724             :         }
    2725             : 
    2726         608 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    2727         608 :         cpl.u.blob_basic.cb_fn = blob_allocate_and_copy_cluster_cpl;
    2728         608 :         cpl.u.blob_basic.cb_arg = ctx;
    2729             : 
    2730         608 :         ctx->seq = bs_sequence_start_blob(_ch, &cpl, blob);
    2731         608 :         if (!ctx->seq) {
    2732           0 :                 spdk_spin_lock(&blob->bs->used_lock);
    2733           0 :                 bs_release_cluster(blob->bs, ctx->new_cluster);
    2734           0 :                 spdk_spin_unlock(&blob->bs->used_lock);
    2735           0 :                 spdk_free(ctx->buf);
    2736           0 :                 free(ctx);
    2737           0 :                 bs_user_op_abort(op, -ENOMEM);
    2738           0 :                 return;
    2739             :         }
    2740             : 
    2741             :         /* Queue the user op to block other incoming operations */
    2742         608 :         TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
    2743             : 
    2744         608 :         if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes) {
    2745         296 :                 if (can_copy) {
    2746         108 :                         blob_copy(ctx, op, copy_src_lba);
    2747             :                 } else {
    2748             :                         /* Read cluster from backing device */
    2749         188 :                         bs_sequence_read_bs_dev(ctx->seq, blob->back_bs_dev, ctx->buf,
    2750             :                                                 bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
    2751         188 :                                                 bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz),
    2752             :                                                 blob_write_copy, ctx);
    2753             :                 }
    2754             : 
    2755             :         } else {
    2756         312 :                 blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
    2757             :                                                  ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
    2758             :         }
    2759             : }
    2760             : 
    2761             : static inline bool
    2762       27542 : blob_calculate_lba_and_lba_count(struct spdk_blob *blob, uint64_t io_unit, uint64_t length,
    2763             :                                  uint64_t *lba, uint64_t *lba_count)
    2764             : {
    2765       27542 :         *lba_count = length;
    2766             : 
    2767       27542 :         if (!bs_io_unit_is_allocated(blob, io_unit)) {
    2768        2760 :                 assert(blob->back_bs_dev != NULL);
    2769        2760 :                 *lba = bs_io_unit_to_back_dev_lba(blob, io_unit);
    2770        2760 :                 *lba_count = bs_io_unit_to_back_dev_lba(blob, *lba_count);
    2771        2760 :                 return false;
    2772             :         } else {
    2773       24782 :                 *lba = bs_blob_io_unit_to_lba(blob, io_unit);
    2774       24782 :                 return true;
    2775             :         }
    2776             : }
    2777             : 
    2778             : struct op_split_ctx {
    2779             :         struct spdk_blob *blob;
    2780             :         struct spdk_io_channel *channel;
    2781             :         uint64_t io_unit_offset;
    2782             :         uint64_t io_units_remaining;
    2783             :         void *curr_payload;
    2784             :         enum spdk_blob_op_type op_type;
    2785             :         spdk_bs_sequence_t *seq;
    2786             :         bool in_submit_ctx;
    2787             :         bool completed_in_submit_ctx;
    2788             :         bool done;
    2789             : };
    2790             : 
    2791             : static void
    2792         758 : blob_request_submit_op_split_next(void *cb_arg, int bserrno)
    2793             : {
    2794         758 :         struct op_split_ctx     *ctx = cb_arg;
    2795         758 :         struct spdk_blob        *blob = ctx->blob;
    2796         758 :         struct spdk_io_channel  *ch = ctx->channel;
    2797         758 :         enum spdk_blob_op_type  op_type = ctx->op_type;
    2798             :         uint8_t                 *buf;
    2799             :         uint64_t                offset;
    2800             :         uint64_t                length;
    2801             :         uint64_t                op_length;
    2802             : 
    2803         758 :         if (bserrno != 0 || ctx->io_units_remaining == 0) {
    2804         174 :                 bs_sequence_finish(ctx->seq, bserrno);
    2805         174 :                 if (ctx->in_submit_ctx) {
    2806             :                         /* Defer freeing of the ctx object, since it will be
    2807             :                          * accessed when this unwinds back to the submisison
    2808             :                          * context.
    2809             :                          */
    2810          40 :                         ctx->done = true;
    2811             :                 } else {
    2812         134 :                         free(ctx);
    2813             :                 }
    2814         174 :                 return;
    2815             :         }
    2816             : 
    2817         584 :         if (ctx->in_submit_ctx) {
    2818             :                 /* If this split operation completed in the context
    2819             :                  * of its submission, mark the flag and return immediately
    2820             :                  * to avoid recursion.
    2821             :                  */
    2822          68 :                 ctx->completed_in_submit_ctx = true;
    2823          68 :                 return;
    2824             :         }
    2825             : 
    2826             :         while (true) {
    2827         584 :                 ctx->completed_in_submit_ctx = false;
    2828             : 
    2829         584 :                 offset = ctx->io_unit_offset;
    2830         584 :                 length = ctx->io_units_remaining;
    2831         584 :                 buf = ctx->curr_payload;
    2832         584 :                 op_length = spdk_min(length, bs_num_io_units_to_cluster_boundary(blob,
    2833             :                                      offset));
    2834             : 
    2835             :                 /* Update length and payload for next operation */
    2836         584 :                 ctx->io_units_remaining -= op_length;
    2837         584 :                 ctx->io_unit_offset += op_length;
    2838         584 :                 if (op_type == SPDK_BLOB_WRITE || op_type == SPDK_BLOB_READ) {
    2839         528 :                         ctx->curr_payload += op_length * blob->bs->io_unit_size;
    2840             :                 }
    2841             : 
    2842         584 :                 assert(!ctx->in_submit_ctx);
    2843         584 :                 ctx->in_submit_ctx = true;
    2844             : 
    2845         584 :                 switch (op_type) {
    2846         418 :                 case SPDK_BLOB_READ:
    2847         418 :                         spdk_blob_io_read(blob, ch, buf, offset, op_length,
    2848             :                                           blob_request_submit_op_split_next, ctx);
    2849         418 :                         break;
    2850         110 :                 case SPDK_BLOB_WRITE:
    2851         110 :                         spdk_blob_io_write(blob, ch, buf, offset, op_length,
    2852             :                                            blob_request_submit_op_split_next, ctx);
    2853         110 :                         break;
    2854          24 :                 case SPDK_BLOB_UNMAP:
    2855          24 :                         spdk_blob_io_unmap(blob, ch, offset, op_length,
    2856             :                                            blob_request_submit_op_split_next, ctx);
    2857          24 :                         break;
    2858          32 :                 case SPDK_BLOB_WRITE_ZEROES:
    2859          32 :                         spdk_blob_io_write_zeroes(blob, ch, offset, op_length,
    2860             :                                                   blob_request_submit_op_split_next, ctx);
    2861          32 :                         break;
    2862           0 :                 case SPDK_BLOB_READV:
    2863             :                 case SPDK_BLOB_WRITEV:
    2864           0 :                         SPDK_ERRLOG("readv/write not valid\n");
    2865           0 :                         bs_sequence_finish(ctx->seq, -EINVAL);
    2866           0 :                         free(ctx);
    2867           0 :                         return;
    2868             :                 }
    2869             : 
    2870             : #ifndef __clang_analyzer__
    2871             :                 /* scan-build reports a false positive around accessing the ctx here. It
    2872             :                  * forms a path that recursively calls this function, but then says
    2873             :                  * "assuming ctx->in_submit_ctx is false", when that isn't possible.
    2874             :                  * This path does free(ctx), returns to here, and reports a use-after-free
    2875             :                  * bug.  Wrapping this bit of code so that scan-build doesn't see it
    2876             :                  * works around the scan-build bug.
    2877             :                  */
    2878         584 :                 assert(ctx->in_submit_ctx);
    2879         584 :                 ctx->in_submit_ctx = false;
    2880             : 
    2881             :                 /* If the operation completed immediately, loop back and submit the
    2882             :                  * next operation.  Otherwise we can return and the next split
    2883             :                  * operation will get submitted when this current operation is
    2884             :                  * later completed asynchronously.
    2885             :                  */
    2886         584 :                 if (ctx->completed_in_submit_ctx) {
    2887          68 :                         continue;
    2888         516 :                 } else if (ctx->done) {
    2889          40 :                         free(ctx);
    2890             :                 }
    2891             : #endif
    2892         516 :                 break;
    2893             :         }
    2894             : }
    2895             : 
    2896             : static void
    2897         174 : blob_request_submit_op_split(struct spdk_io_channel *ch, struct spdk_blob *blob,
    2898             :                              void *payload, uint64_t offset, uint64_t length,
    2899             :                              spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    2900             : {
    2901             :         struct op_split_ctx *ctx;
    2902             :         spdk_bs_sequence_t *seq;
    2903         174 :         struct spdk_bs_cpl cpl;
    2904             : 
    2905         174 :         assert(blob != NULL);
    2906             : 
    2907         174 :         ctx = calloc(1, sizeof(struct op_split_ctx));
    2908         174 :         if (ctx == NULL) {
    2909           0 :                 cb_fn(cb_arg, -ENOMEM);
    2910           0 :                 return;
    2911             :         }
    2912             : 
    2913         174 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    2914         174 :         cpl.u.blob_basic.cb_fn = cb_fn;
    2915         174 :         cpl.u.blob_basic.cb_arg = cb_arg;
    2916             : 
    2917         174 :         seq = bs_sequence_start_blob(ch, &cpl, blob);
    2918         174 :         if (!seq) {
    2919           0 :                 free(ctx);
    2920           0 :                 cb_fn(cb_arg, -ENOMEM);
    2921           0 :                 return;
    2922             :         }
    2923             : 
    2924         174 :         ctx->blob = blob;
    2925         174 :         ctx->channel = ch;
    2926         174 :         ctx->curr_payload = payload;
    2927         174 :         ctx->io_unit_offset = offset;
    2928         174 :         ctx->io_units_remaining = length;
    2929         174 :         ctx->op_type = op_type;
    2930         174 :         ctx->seq = seq;
    2931             : 
    2932         174 :         blob_request_submit_op_split_next(ctx, 0);
    2933             : }
    2934             : 
    2935             : static void
    2936       25170 : blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blob,
    2937             :                               void *payload, uint64_t offset, uint64_t length,
    2938             :                               spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    2939             : {
    2940       25170 :         struct spdk_bs_cpl cpl;
    2941       25170 :         uint64_t lba;
    2942       25170 :         uint64_t lba_count;
    2943             :         bool is_allocated;
    2944             : 
    2945       25170 :         assert(blob != NULL);
    2946             : 
    2947       25170 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    2948       25170 :         cpl.u.blob_basic.cb_fn = cb_fn;
    2949       25170 :         cpl.u.blob_basic.cb_arg = cb_arg;
    2950             : 
    2951       25170 :         if (blob->frozen_refcnt) {
    2952             :                 /* This blob I/O is frozen */
    2953             :                 spdk_bs_user_op_t *op;
    2954           4 :                 struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_ch);
    2955             : 
    2956           4 :                 op = bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
    2957           4 :                 if (!op) {
    2958           0 :                         cb_fn(cb_arg, -ENOMEM);
    2959           0 :                         return;
    2960             :                 }
    2961             : 
    2962           4 :                 TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
    2963             : 
    2964           4 :                 return;
    2965             :         }
    2966             : 
    2967       25166 :         is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
    2968             : 
    2969       25166 :         switch (op_type) {
    2970       13707 :         case SPDK_BLOB_READ: {
    2971             :                 spdk_bs_batch_t *batch;
    2972             : 
    2973       13707 :                 batch = bs_batch_open(_ch, &cpl, blob);
    2974       13707 :                 if (!batch) {
    2975           0 :                         cb_fn(cb_arg, -ENOMEM);
    2976           0 :                         return;
    2977             :                 }
    2978             : 
    2979       13707 :                 if (is_allocated) {
    2980             :                         /* Read from the blob */
    2981       12719 :                         bs_batch_read_dev(batch, payload, lba, lba_count);
    2982             :                 } else {
    2983             :                         /* Read from the backing block device */
    2984         988 :                         bs_batch_read_bs_dev(batch, blob->back_bs_dev, payload, lba, lba_count);
    2985             :                 }
    2986             : 
    2987       13707 :                 bs_batch_close(batch);
    2988       13707 :                 break;
    2989             :         }
    2990       11435 :         case SPDK_BLOB_WRITE:
    2991             :         case SPDK_BLOB_WRITE_ZEROES: {
    2992       11435 :                 if (is_allocated) {
    2993             :                         /* Write to the blob */
    2994             :                         spdk_bs_batch_t *batch;
    2995             : 
    2996       11223 :                         if (lba_count == 0) {
    2997           0 :                                 cb_fn(cb_arg, 0);
    2998           0 :                                 return;
    2999             :                         }
    3000             : 
    3001       11223 :                         batch = bs_batch_open(_ch, &cpl, blob);
    3002       11223 :                         if (!batch) {
    3003           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3004           0 :                                 return;
    3005             :                         }
    3006             : 
    3007       11223 :                         if (op_type == SPDK_BLOB_WRITE) {
    3008       11191 :                                 bs_batch_write_dev(batch, payload, lba, lba_count);
    3009             :                         } else {
    3010          32 :                                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    3011             :                         }
    3012             : 
    3013       11223 :                         bs_batch_close(batch);
    3014             :                 } else {
    3015             :                         /* Queue this operation and allocate the cluster */
    3016             :                         spdk_bs_user_op_t *op;
    3017             : 
    3018         212 :                         op = bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
    3019         212 :                         if (!op) {
    3020           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3021           0 :                                 return;
    3022             :                         }
    3023             : 
    3024         212 :                         bs_allocate_and_copy_cluster(blob, _ch, offset, op);
    3025             :                 }
    3026       11435 :                 break;
    3027             :         }
    3028          24 :         case SPDK_BLOB_UNMAP: {
    3029             :                 spdk_bs_batch_t *batch;
    3030             : 
    3031          24 :                 batch = bs_batch_open(_ch, &cpl, blob);
    3032          24 :                 if (!batch) {
    3033           0 :                         cb_fn(cb_arg, -ENOMEM);
    3034           0 :                         return;
    3035             :                 }
    3036             : 
    3037          24 :                 if (is_allocated) {
    3038          24 :                         bs_batch_unmap_dev(batch, lba, lba_count);
    3039             :                 }
    3040             : 
    3041          24 :                 bs_batch_close(batch);
    3042          24 :                 break;
    3043             :         }
    3044           0 :         case SPDK_BLOB_READV:
    3045             :         case SPDK_BLOB_WRITEV:
    3046           0 :                 SPDK_ERRLOG("readv/write not valid\n");
    3047           0 :                 cb_fn(cb_arg, -EINVAL);
    3048           0 :                 break;
    3049             :         }
    3050       25166 : }
    3051             : 
    3052             : static void
    3053       25752 : blob_request_submit_op(struct spdk_blob *blob, struct spdk_io_channel *_channel,
    3054             :                        void *payload, uint64_t offset, uint64_t length,
    3055             :                        spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    3056             : {
    3057       25752 :         assert(blob != NULL);
    3058             : 
    3059       25752 :         if (blob->data_ro && op_type != SPDK_BLOB_READ) {
    3060           4 :                 cb_fn(cb_arg, -EPERM);
    3061           4 :                 return;
    3062             :         }
    3063             : 
    3064       25748 :         if (length == 0) {
    3065         380 :                 cb_fn(cb_arg, 0);
    3066         380 :                 return;
    3067             :         }
    3068             : 
    3069       25368 :         if (offset + length > bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
    3070          24 :                 cb_fn(cb_arg, -EINVAL);
    3071          24 :                 return;
    3072             :         }
    3073       25344 :         if (length <= bs_num_io_units_to_cluster_boundary(blob, offset)) {
    3074       25170 :                 blob_request_submit_op_single(_channel, blob, payload, offset, length,
    3075             :                                               cb_fn, cb_arg, op_type);
    3076             :         } else {
    3077         174 :                 blob_request_submit_op_split(_channel, blob, payload, offset, length,
    3078             :                                              cb_fn, cb_arg, op_type);
    3079             :         }
    3080             : }
    3081             : 
    3082             : struct rw_iov_ctx {
    3083             :         struct spdk_blob *blob;
    3084             :         struct spdk_io_channel *channel;
    3085             :         spdk_blob_op_complete cb_fn;
    3086             :         void *cb_arg;
    3087             :         bool read;
    3088             :         int iovcnt;
    3089             :         struct iovec *orig_iov;
    3090             :         uint64_t io_unit_offset;
    3091             :         uint64_t io_units_remaining;
    3092             :         uint64_t io_units_done;
    3093             :         struct spdk_blob_ext_io_opts *ext_io_opts;
    3094             :         struct iovec iov[0];
    3095             : };
    3096             : 
    3097             : static void
    3098        2360 : rw_iov_done(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    3099             : {
    3100        2360 :         assert(cb_arg == NULL);
    3101        2360 :         bs_sequence_finish(seq, bserrno);
    3102        2360 : }
    3103             : 
    3104             : static void
    3105         744 : rw_iov_split_next(void *cb_arg, int bserrno)
    3106             : {
    3107         744 :         struct rw_iov_ctx *ctx = cb_arg;
    3108         744 :         struct spdk_blob *blob = ctx->blob;
    3109             :         struct iovec *iov, *orig_iov;
    3110             :         int iovcnt;
    3111             :         size_t orig_iovoff;
    3112             :         uint64_t io_units_count, io_units_to_boundary, io_unit_offset;
    3113             :         uint64_t byte_count;
    3114             : 
    3115         744 :         if (bserrno != 0 || ctx->io_units_remaining == 0) {
    3116         204 :                 ctx->cb_fn(ctx->cb_arg, bserrno);
    3117         204 :                 free(ctx);
    3118         204 :                 return;
    3119             :         }
    3120             : 
    3121         540 :         io_unit_offset = ctx->io_unit_offset;
    3122         540 :         io_units_to_boundary = bs_num_io_units_to_cluster_boundary(blob, io_unit_offset);
    3123         540 :         io_units_count = spdk_min(ctx->io_units_remaining, io_units_to_boundary);
    3124             :         /*
    3125             :          * Get index and offset into the original iov array for our current position in the I/O sequence.
    3126             :          *  byte_count will keep track of how many bytes remaining until orig_iov and orig_iovoff will
    3127             :          *  point to the current position in the I/O sequence.
    3128             :          */
    3129         540 :         byte_count = ctx->io_units_done * blob->bs->io_unit_size;
    3130         540 :         orig_iov = &ctx->orig_iov[0];
    3131         540 :         orig_iovoff = 0;
    3132        1148 :         while (byte_count > 0) {
    3133         608 :                 if (byte_count >= orig_iov->iov_len) {
    3134         352 :                         byte_count -= orig_iov->iov_len;
    3135         352 :                         orig_iov++;
    3136             :                 } else {
    3137         256 :                         orig_iovoff = byte_count;
    3138         256 :                         byte_count = 0;
    3139             :                 }
    3140             :         }
    3141             : 
    3142             :         /*
    3143             :          * Build an iov array for the next I/O in the sequence.  byte_count will keep track of how many
    3144             :          *  bytes of this next I/O remain to be accounted for in the new iov array.
    3145             :          */
    3146         540 :         byte_count = io_units_count * blob->bs->io_unit_size;
    3147         540 :         iov = &ctx->iov[0];
    3148         540 :         iovcnt = 0;
    3149        1380 :         while (byte_count > 0) {
    3150         840 :                 assert(iovcnt < ctx->iovcnt);
    3151         840 :                 iov->iov_len = spdk_min(byte_count, orig_iov->iov_len - orig_iovoff);
    3152         840 :                 iov->iov_base = orig_iov->iov_base + orig_iovoff;
    3153         840 :                 byte_count -= iov->iov_len;
    3154         840 :                 orig_iovoff = 0;
    3155         840 :                 orig_iov++;
    3156         840 :                 iov++;
    3157         840 :                 iovcnt++;
    3158             :         }
    3159             : 
    3160         540 :         ctx->io_unit_offset += io_units_count;
    3161         540 :         ctx->io_units_remaining -= io_units_count;
    3162         540 :         ctx->io_units_done += io_units_count;
    3163         540 :         iov = &ctx->iov[0];
    3164             : 
    3165         540 :         if (ctx->read) {
    3166         408 :                 spdk_blob_io_readv_ext(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
    3167             :                                        io_units_count, rw_iov_split_next, ctx, ctx->ext_io_opts);
    3168             :         } else {
    3169         132 :                 spdk_blob_io_writev_ext(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
    3170             :                                         io_units_count, rw_iov_split_next, ctx, ctx->ext_io_opts);
    3171             :         }
    3172             : }
    3173             : 
    3174             : static void
    3175        2588 : blob_request_submit_rw_iov(struct spdk_blob *blob, struct spdk_io_channel *_channel,
    3176             :                            struct iovec *iov, int iovcnt,
    3177             :                            uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg, bool read,
    3178             :                            struct spdk_blob_ext_io_opts *ext_io_opts)
    3179             : {
    3180        2588 :         struct spdk_bs_cpl      cpl;
    3181             : 
    3182        2588 :         assert(blob != NULL);
    3183             : 
    3184        2588 :         if (!read && blob->data_ro) {
    3185           4 :                 cb_fn(cb_arg, -EPERM);
    3186           4 :                 return;
    3187             :         }
    3188             : 
    3189        2584 :         if (length == 0) {
    3190           0 :                 cb_fn(cb_arg, 0);
    3191           0 :                 return;
    3192             :         }
    3193             : 
    3194        2584 :         if (offset + length > bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
    3195           0 :                 cb_fn(cb_arg, -EINVAL);
    3196           0 :                 return;
    3197             :         }
    3198             : 
    3199             :         /*
    3200             :          * For now, we implement readv/writev using a sequence (instead of a batch) to account for having
    3201             :          *  to split a request that spans a cluster boundary.  For I/O that do not span a cluster boundary,
    3202             :          *  there will be no noticeable difference compared to using a batch.  For I/O that do span a cluster
    3203             :          *  boundary, the target LBAs (after blob offset to LBA translation) may not be contiguous, so we need
    3204             :          *  to allocate a separate iov array and split the I/O such that none of the resulting
    3205             :          *  smaller I/O cross a cluster boundary.  These smaller I/O will be issued in sequence (not in parallel)
    3206             :          *  but since this case happens very infrequently, any performance impact will be negligible.
    3207             :          *
    3208             :          * This could be optimized in the future to allocate a big enough iov array to account for all of the iovs
    3209             :          *  for all of the smaller I/Os, pre-build all of the iov arrays for the smaller I/Os, then issue them
    3210             :          *  in a batch.  That would also require creating an intermediate spdk_bs_cpl that would get called
    3211             :          *  when the batch was completed, to allow for freeing the memory for the iov arrays.
    3212             :          */
    3213        2584 :         if (spdk_likely(length <= bs_num_io_units_to_cluster_boundary(blob, offset))) {
    3214        2376 :                 uint64_t lba_count;
    3215        2376 :                 uint64_t lba;
    3216             :                 bool is_allocated;
    3217             : 
    3218        2376 :                 cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    3219        2376 :                 cpl.u.blob_basic.cb_fn = cb_fn;
    3220        2376 :                 cpl.u.blob_basic.cb_arg = cb_arg;
    3221             : 
    3222        2376 :                 if (blob->frozen_refcnt) {
    3223             :                         /* This blob I/O is frozen */
    3224             :                         enum spdk_blob_op_type op_type;
    3225             :                         spdk_bs_user_op_t *op;
    3226           0 :                         struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_channel);
    3227             : 
    3228           0 :                         op_type = read ? SPDK_BLOB_READV : SPDK_BLOB_WRITEV;
    3229           0 :                         op = bs_user_op_alloc(_channel, &cpl, op_type, blob, iov, iovcnt, offset, length);
    3230           0 :                         if (!op) {
    3231           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3232           0 :                                 return;
    3233             :                         }
    3234             : 
    3235           0 :                         TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
    3236             : 
    3237           0 :                         return;
    3238             :                 }
    3239             : 
    3240        2376 :                 is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
    3241             : 
    3242        2376 :                 if (read) {
    3243             :                         spdk_bs_sequence_t *seq;
    3244             : 
    3245        2084 :                         seq = bs_sequence_start_blob(_channel, &cpl, blob);
    3246        2084 :                         if (!seq) {
    3247           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3248           0 :                                 return;
    3249             :                         }
    3250             : 
    3251        2084 :                         seq->ext_io_opts = ext_io_opts;
    3252             : 
    3253        2084 :                         if (is_allocated) {
    3254         540 :                                 bs_sequence_readv_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL);
    3255             :                         } else {
    3256        1544 :                                 bs_sequence_readv_bs_dev(seq, blob->back_bs_dev, iov, iovcnt, lba, lba_count,
    3257             :                                                          rw_iov_done, NULL);
    3258             :                         }
    3259             :                 } else {
    3260         292 :                         if (is_allocated) {
    3261             :                                 spdk_bs_sequence_t *seq;
    3262             : 
    3263         276 :                                 seq = bs_sequence_start_blob(_channel, &cpl, blob);
    3264         276 :                                 if (!seq) {
    3265           0 :                                         cb_fn(cb_arg, -ENOMEM);
    3266           0 :                                         return;
    3267             :                                 }
    3268             : 
    3269         276 :                                 seq->ext_io_opts = ext_io_opts;
    3270             : 
    3271         276 :                                 bs_sequence_writev_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL);
    3272             :                         } else {
    3273             :                                 /* Queue this operation and allocate the cluster */
    3274             :                                 spdk_bs_user_op_t *op;
    3275             : 
    3276          16 :                                 op = bs_user_op_alloc(_channel, &cpl, SPDK_BLOB_WRITEV, blob, iov, iovcnt, offset,
    3277             :                                                       length);
    3278          16 :                                 if (!op) {
    3279           0 :                                         cb_fn(cb_arg, -ENOMEM);
    3280           0 :                                         return;
    3281             :                                 }
    3282             : 
    3283          16 :                                 op->ext_io_opts = ext_io_opts;
    3284             : 
    3285          16 :                                 bs_allocate_and_copy_cluster(blob, _channel, offset, op);
    3286             :                         }
    3287             :                 }
    3288             :         } else {
    3289             :                 struct rw_iov_ctx *ctx;
    3290             : 
    3291         208 :                 ctx = calloc(1, sizeof(struct rw_iov_ctx) + iovcnt * sizeof(struct iovec));
    3292         208 :                 if (ctx == NULL) {
    3293           4 :                         cb_fn(cb_arg, -ENOMEM);
    3294           4 :                         return;
    3295             :                 }
    3296             : 
    3297         204 :                 ctx->blob = blob;
    3298         204 :                 ctx->channel = _channel;
    3299         204 :                 ctx->cb_fn = cb_fn;
    3300         204 :                 ctx->cb_arg = cb_arg;
    3301         204 :                 ctx->read = read;
    3302         204 :                 ctx->orig_iov = iov;
    3303         204 :                 ctx->iovcnt = iovcnt;
    3304         204 :                 ctx->io_unit_offset = offset;
    3305         204 :                 ctx->io_units_remaining = length;
    3306         204 :                 ctx->io_units_done = 0;
    3307         204 :                 ctx->ext_io_opts = ext_io_opts;
    3308             : 
    3309         204 :                 rw_iov_split_next(ctx, 0);
    3310             :         }
    3311             : }
    3312             : 
    3313             : static struct spdk_blob *
    3314        7197 : blob_lookup(struct spdk_blob_store *bs, spdk_blob_id blobid)
    3315             : {
    3316        7197 :         struct spdk_blob find;
    3317             : 
    3318        7197 :         if (spdk_bit_array_get(bs->open_blobids, blobid) == 0) {
    3319        6556 :                 return NULL;
    3320             :         }
    3321             : 
    3322         641 :         find.id = blobid;
    3323         641 :         return RB_FIND(spdk_blob_tree, &bs->open_blobs, &find);
    3324             : }
    3325             : 
    3326             : static void
    3327        1678 : blob_get_snapshot_and_clone_entries(struct spdk_blob *blob,
    3328             :                                     struct spdk_blob_list **snapshot_entry, struct spdk_blob_list **clone_entry)
    3329             : {
    3330        1678 :         assert(blob != NULL);
    3331        1678 :         *snapshot_entry = NULL;
    3332        1678 :         *clone_entry = NULL;
    3333             : 
    3334        1678 :         if (blob->parent_id == SPDK_BLOBID_INVALID) {
    3335        1462 :                 return;
    3336             :         }
    3337             : 
    3338         308 :         TAILQ_FOREACH(*snapshot_entry, &blob->bs->snapshots, link) {
    3339         272 :                 if ((*snapshot_entry)->id == blob->parent_id) {
    3340         180 :                         break;
    3341             :                 }
    3342             :         }
    3343             : 
    3344         216 :         if (*snapshot_entry != NULL) {
    3345         216 :                 TAILQ_FOREACH(*clone_entry, &(*snapshot_entry)->clones, link) {
    3346         216 :                         if ((*clone_entry)->id == blob->id) {
    3347         180 :                                 break;
    3348             :                         }
    3349             :                 }
    3350             : 
    3351         180 :                 assert(*clone_entry != NULL);
    3352             :         }
    3353             : }
    3354             : 
    3355             : static int
    3356         768 : bs_channel_create(void *io_device, void *ctx_buf)
    3357             : {
    3358         768 :         struct spdk_blob_store          *bs = io_device;
    3359         768 :         struct spdk_bs_channel          *channel = ctx_buf;
    3360             :         struct spdk_bs_dev              *dev;
    3361         768 :         uint32_t                        max_ops = bs->max_channel_ops;
    3362             :         uint32_t                        i;
    3363             : 
    3364         768 :         dev = bs->dev;
    3365             : 
    3366         768 :         channel->req_mem = calloc(max_ops, sizeof(struct spdk_bs_request_set));
    3367         768 :         if (!channel->req_mem) {
    3368           0 :                 return -1;
    3369             :         }
    3370             : 
    3371         768 :         TAILQ_INIT(&channel->reqs);
    3372             : 
    3373      393984 :         for (i = 0; i < max_ops; i++) {
    3374      393216 :                 TAILQ_INSERT_TAIL(&channel->reqs, &channel->req_mem[i], link);
    3375             :         }
    3376             : 
    3377         768 :         channel->bs = bs;
    3378         768 :         channel->dev = dev;
    3379         768 :         channel->dev_channel = dev->create_channel(dev);
    3380             : 
    3381         768 :         if (!channel->dev_channel) {
    3382           0 :                 SPDK_ERRLOG("Failed to create device channel.\n");
    3383           0 :                 free(channel->req_mem);
    3384           0 :                 return -1;
    3385             :         }
    3386             : 
    3387         768 :         channel->new_cluster_page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0, NULL, SPDK_ENV_SOCKET_ID_ANY,
    3388             :                                     SPDK_MALLOC_DMA);
    3389         768 :         if (!channel->new_cluster_page) {
    3390           0 :                 SPDK_ERRLOG("Failed to allocate new cluster page\n");
    3391           0 :                 free(channel->req_mem);
    3392           0 :                 channel->dev->destroy_channel(channel->dev, channel->dev_channel);
    3393           0 :                 return -1;
    3394             :         }
    3395             : 
    3396         768 :         TAILQ_INIT(&channel->need_cluster_alloc);
    3397         768 :         TAILQ_INIT(&channel->queued_io);
    3398         768 :         RB_INIT(&channel->esnap_channels);
    3399             : 
    3400         768 :         return 0;
    3401             : }
    3402             : 
    3403             : static void
    3404         768 : bs_channel_destroy(void *io_device, void *ctx_buf)
    3405             : {
    3406         768 :         struct spdk_bs_channel *channel = ctx_buf;
    3407             :         spdk_bs_user_op_t *op;
    3408             : 
    3409         768 :         while (!TAILQ_EMPTY(&channel->need_cluster_alloc)) {
    3410           0 :                 op = TAILQ_FIRST(&channel->need_cluster_alloc);
    3411           0 :                 TAILQ_REMOVE(&channel->need_cluster_alloc, op, link);
    3412           0 :                 bs_user_op_abort(op, -EIO);
    3413             :         }
    3414             : 
    3415         768 :         while (!TAILQ_EMPTY(&channel->queued_io)) {
    3416           0 :                 op = TAILQ_FIRST(&channel->queued_io);
    3417           0 :                 TAILQ_REMOVE(&channel->queued_io, op, link);
    3418           0 :                 bs_user_op_abort(op, -EIO);
    3419             :         }
    3420             : 
    3421         768 :         blob_esnap_destroy_bs_channel(channel);
    3422             : 
    3423         768 :         free(channel->req_mem);
    3424         768 :         spdk_free(channel->new_cluster_page);
    3425         768 :         channel->dev->destroy_channel(channel->dev, channel->dev_channel);
    3426         768 : }
    3427             : 
    3428             : static void
    3429         752 : bs_dev_destroy(void *io_device)
    3430             : {
    3431         752 :         struct spdk_blob_store *bs = io_device;
    3432             :         struct spdk_blob        *blob, *blob_tmp;
    3433             : 
    3434         752 :         bs->dev->destroy(bs->dev);
    3435             : 
    3436         752 :         RB_FOREACH_SAFE(blob, spdk_blob_tree, &bs->open_blobs, blob_tmp) {
    3437           0 :                 RB_REMOVE(spdk_blob_tree, &bs->open_blobs, blob);
    3438           0 :                 spdk_bit_array_clear(bs->open_blobids, blob->id);
    3439           0 :                 blob_free(blob);
    3440             :         }
    3441             : 
    3442         752 :         spdk_spin_destroy(&bs->used_lock);
    3443             : 
    3444         752 :         spdk_bit_array_free(&bs->open_blobids);
    3445         752 :         spdk_bit_array_free(&bs->used_blobids);
    3446         752 :         spdk_bit_array_free(&bs->used_md_pages);
    3447         752 :         spdk_bit_pool_free(&bs->used_clusters);
    3448             :         /*
    3449             :          * If this function is called for any reason except a successful unload,
    3450             :          * the unload_cpl type will be NONE and this will be a nop.
    3451             :          */
    3452         752 :         bs_call_cpl(&bs->unload_cpl, bs->unload_err);
    3453             : 
    3454         752 :         free(bs);
    3455         752 : }
    3456             : 
    3457             : static int
    3458         820 : bs_blob_list_add(struct spdk_blob *blob)
    3459             : {
    3460             :         spdk_blob_id snapshot_id;
    3461         820 :         struct spdk_blob_list *snapshot_entry = NULL;
    3462         820 :         struct spdk_blob_list *clone_entry = NULL;
    3463             : 
    3464         820 :         assert(blob != NULL);
    3465             : 
    3466         820 :         snapshot_id = blob->parent_id;
    3467         820 :         if (snapshot_id == SPDK_BLOBID_INVALID ||
    3468             :             snapshot_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    3469         460 :                 return 0;
    3470             :         }
    3471             : 
    3472         360 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, snapshot_id);
    3473         360 :         if (snapshot_entry == NULL) {
    3474             :                 /* Snapshot not found */
    3475         256 :                 snapshot_entry = calloc(1, sizeof(struct spdk_blob_list));
    3476         256 :                 if (snapshot_entry == NULL) {
    3477           0 :                         return -ENOMEM;
    3478             :                 }
    3479         256 :                 snapshot_entry->id = snapshot_id;
    3480         256 :                 TAILQ_INIT(&snapshot_entry->clones);
    3481         256 :                 TAILQ_INSERT_TAIL(&blob->bs->snapshots, snapshot_entry, link);
    3482             :         } else {
    3483         164 :                 TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    3484          60 :                         if (clone_entry->id == blob->id) {
    3485           0 :                                 break;
    3486             :                         }
    3487             :                 }
    3488             :         }
    3489             : 
    3490         360 :         if (clone_entry == NULL) {
    3491             :                 /* Clone not found */
    3492         360 :                 clone_entry = calloc(1, sizeof(struct spdk_blob_list));
    3493         360 :                 if (clone_entry == NULL) {
    3494           0 :                         return -ENOMEM;
    3495             :                 }
    3496         360 :                 clone_entry->id = blob->id;
    3497         360 :                 TAILQ_INIT(&clone_entry->clones);
    3498         360 :                 TAILQ_INSERT_TAIL(&snapshot_entry->clones, clone_entry, link);
    3499         360 :                 snapshot_entry->clone_count++;
    3500             :         }
    3501             : 
    3502         360 :         return 0;
    3503             : }
    3504             : 
    3505             : static void
    3506        1600 : bs_blob_list_remove(struct spdk_blob *blob)
    3507             : {
    3508        1600 :         struct spdk_blob_list *snapshot_entry = NULL;
    3509        1600 :         struct spdk_blob_list *clone_entry = NULL;
    3510             : 
    3511        1600 :         blob_get_snapshot_and_clone_entries(blob, &snapshot_entry, &clone_entry);
    3512             : 
    3513        1600 :         if (snapshot_entry == NULL) {
    3514        1436 :                 return;
    3515             :         }
    3516             : 
    3517         164 :         blob->parent_id = SPDK_BLOBID_INVALID;
    3518         164 :         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    3519         164 :         free(clone_entry);
    3520             : 
    3521         164 :         snapshot_entry->clone_count--;
    3522             : }
    3523             : 
    3524             : static int
    3525         752 : bs_blob_list_free(struct spdk_blob_store *bs)
    3526             : {
    3527             :         struct spdk_blob_list *snapshot_entry;
    3528             :         struct spdk_blob_list *snapshot_entry_tmp;
    3529             :         struct spdk_blob_list *clone_entry;
    3530             :         struct spdk_blob_list *clone_entry_tmp;
    3531             : 
    3532         884 :         TAILQ_FOREACH_SAFE(snapshot_entry, &bs->snapshots, link, snapshot_entry_tmp) {
    3533         280 :                 TAILQ_FOREACH_SAFE(clone_entry, &snapshot_entry->clones, link, clone_entry_tmp) {
    3534         148 :                         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    3535         148 :                         free(clone_entry);
    3536             :                 }
    3537         132 :                 TAILQ_REMOVE(&bs->snapshots, snapshot_entry, link);
    3538         132 :                 free(snapshot_entry);
    3539             :         }
    3540             : 
    3541         752 :         return 0;
    3542             : }
    3543             : 
    3544             : static void
    3545         752 : bs_free(struct spdk_blob_store *bs)
    3546             : {
    3547         752 :         bs_blob_list_free(bs);
    3548             : 
    3549         752 :         bs_unregister_md_thread(bs);
    3550         752 :         spdk_io_device_unregister(bs, bs_dev_destroy);
    3551         752 : }
    3552             : 
    3553             : void
    3554        1004 : spdk_bs_opts_init(struct spdk_bs_opts *opts, size_t opts_size)
    3555             : {
    3556             : 
    3557        1004 :         if (!opts) {
    3558           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
    3559           0 :                 return;
    3560             :         }
    3561             : 
    3562        1004 :         if (!opts_size) {
    3563           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
    3564           0 :                 return;
    3565             :         }
    3566             : 
    3567        1004 :         memset(opts, 0, opts_size);
    3568        1004 :         opts->opts_size = opts_size;
    3569             : 
    3570             : #define FIELD_OK(field) \
    3571             :         offsetof(struct spdk_bs_opts, field) + sizeof(opts->field) <= opts_size
    3572             : 
    3573             : #define SET_FIELD(field, value) \
    3574             :         if (FIELD_OK(field)) { \
    3575             :                 opts->field = value; \
    3576             :         } \
    3577             : 
    3578        1004 :         SET_FIELD(cluster_sz, SPDK_BLOB_OPTS_CLUSTER_SZ);
    3579        1004 :         SET_FIELD(num_md_pages, SPDK_BLOB_OPTS_NUM_MD_PAGES);
    3580        1004 :         SET_FIELD(max_md_ops, SPDK_BLOB_OPTS_NUM_MD_PAGES);
    3581        1004 :         SET_FIELD(max_channel_ops, SPDK_BLOB_OPTS_DEFAULT_CHANNEL_OPS);
    3582        1004 :         SET_FIELD(clear_method,  BS_CLEAR_WITH_UNMAP);
    3583             : 
    3584        1004 :         if (FIELD_OK(bstype)) {
    3585        1004 :                 memset(&opts->bstype, 0, sizeof(opts->bstype));
    3586             :         }
    3587             : 
    3588        1004 :         SET_FIELD(iter_cb_fn, NULL);
    3589        1004 :         SET_FIELD(iter_cb_arg, NULL);
    3590        1004 :         SET_FIELD(force_recover, false);
    3591        1004 :         SET_FIELD(esnap_bs_dev_create, NULL);
    3592        1004 :         SET_FIELD(esnap_ctx, NULL);
    3593             : 
    3594             : #undef FIELD_OK
    3595             : #undef SET_FIELD
    3596             : }
    3597             : 
    3598             : static int
    3599         456 : bs_opts_verify(struct spdk_bs_opts *opts)
    3600             : {
    3601         456 :         if (opts->cluster_sz == 0 || opts->num_md_pages == 0 || opts->max_md_ops == 0 ||
    3602         452 :             opts->max_channel_ops == 0) {
    3603           4 :                 SPDK_ERRLOG("Blobstore options cannot be set to 0\n");
    3604           4 :                 return -1;
    3605             :         }
    3606             : 
    3607         452 :         return 0;
    3608             : }
    3609             : 
    3610             : /* START spdk_bs_load */
    3611             : 
    3612             : /* spdk_bs_load_ctx is used for init, load, unload and dump code paths. */
    3613             : 
    3614             : struct spdk_bs_load_ctx {
    3615             :         struct spdk_blob_store          *bs;
    3616             :         struct spdk_bs_super_block      *super;
    3617             : 
    3618             :         struct spdk_bs_md_mask          *mask;
    3619             :         bool                            in_page_chain;
    3620             :         uint32_t                        page_index;
    3621             :         uint32_t                        cur_page;
    3622             :         struct spdk_blob_md_page        *page;
    3623             : 
    3624             :         uint64_t                        num_extent_pages;
    3625             :         uint32_t                        *extent_page_num;
    3626             :         struct spdk_blob_md_page        *extent_pages;
    3627             :         struct spdk_bit_array           *used_clusters;
    3628             : 
    3629             :         spdk_bs_sequence_t                      *seq;
    3630             :         spdk_blob_op_with_handle_complete       iter_cb_fn;
    3631             :         void                                    *iter_cb_arg;
    3632             :         struct spdk_blob                        *blob;
    3633             :         spdk_blob_id                            blobid;
    3634             : 
    3635             :         bool                                    force_recover;
    3636             : 
    3637             :         /* These fields are used in the spdk_bs_dump path. */
    3638             :         bool                                    dumping;
    3639             :         FILE                                    *fp;
    3640             :         spdk_bs_dump_print_xattr                print_xattr_fn;
    3641             :         char                                    xattr_name[4096];
    3642             : };
    3643             : 
    3644             : static int
    3645         756 : bs_alloc(struct spdk_bs_dev *dev, struct spdk_bs_opts *opts, struct spdk_blob_store **_bs,
    3646             :          struct spdk_bs_load_ctx **_ctx)
    3647             : {
    3648             :         struct spdk_blob_store  *bs;
    3649             :         struct spdk_bs_load_ctx *ctx;
    3650             :         uint64_t dev_size;
    3651             :         int rc;
    3652             : 
    3653         756 :         dev_size = dev->blocklen * dev->blockcnt;
    3654         756 :         if (dev_size < opts->cluster_sz) {
    3655             :                 /* Device size cannot be smaller than cluster size of blobstore */
    3656           0 :                 SPDK_INFOLOG(blob, "Device size %" PRIu64 " is smaller than cluster size %" PRIu32 "\n",
    3657             :                              dev_size, opts->cluster_sz);
    3658           0 :                 return -ENOSPC;
    3659             :         }
    3660         756 :         if (opts->cluster_sz < SPDK_BS_PAGE_SIZE) {
    3661             :                 /* Cluster size cannot be smaller than page size */
    3662           4 :                 SPDK_ERRLOG("Cluster size %" PRIu32 " is smaller than page size %d\n",
    3663             :                             opts->cluster_sz, SPDK_BS_PAGE_SIZE);
    3664           4 :                 return -EINVAL;
    3665             :         }
    3666         752 :         bs = calloc(1, sizeof(struct spdk_blob_store));
    3667         752 :         if (!bs) {
    3668           0 :                 return -ENOMEM;
    3669             :         }
    3670             : 
    3671         752 :         ctx = calloc(1, sizeof(struct spdk_bs_load_ctx));
    3672         752 :         if (!ctx) {
    3673           0 :                 free(bs);
    3674           0 :                 return -ENOMEM;
    3675             :         }
    3676             : 
    3677         752 :         ctx->bs = bs;
    3678         752 :         ctx->iter_cb_fn = opts->iter_cb_fn;
    3679         752 :         ctx->iter_cb_arg = opts->iter_cb_arg;
    3680         752 :         ctx->force_recover = opts->force_recover;
    3681             : 
    3682         752 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    3683             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    3684         752 :         if (!ctx->super) {
    3685           0 :                 free(ctx);
    3686           0 :                 free(bs);
    3687           0 :                 return -ENOMEM;
    3688             :         }
    3689             : 
    3690         752 :         RB_INIT(&bs->open_blobs);
    3691         752 :         TAILQ_INIT(&bs->snapshots);
    3692         752 :         bs->dev = dev;
    3693         752 :         bs->md_thread = spdk_get_thread();
    3694         752 :         assert(bs->md_thread != NULL);
    3695             : 
    3696             :         /*
    3697             :          * Do not use bs_lba_to_cluster() here since blockcnt may not be an
    3698             :          *  even multiple of the cluster size.
    3699             :          */
    3700         752 :         bs->cluster_sz = opts->cluster_sz;
    3701         752 :         bs->total_clusters = dev->blockcnt / (bs->cluster_sz / dev->blocklen);
    3702         752 :         ctx->used_clusters = spdk_bit_array_create(bs->total_clusters);
    3703         752 :         if (!ctx->used_clusters) {
    3704           0 :                 spdk_free(ctx->super);
    3705           0 :                 free(ctx);
    3706           0 :                 free(bs);
    3707           0 :                 return -ENOMEM;
    3708             :         }
    3709             : 
    3710         752 :         bs->pages_per_cluster = bs->cluster_sz / SPDK_BS_PAGE_SIZE;
    3711         752 :         if (spdk_u32_is_pow2(bs->pages_per_cluster)) {
    3712         752 :                 bs->pages_per_cluster_shift = spdk_u32log2(bs->pages_per_cluster);
    3713             :         }
    3714         752 :         bs->num_free_clusters = bs->total_clusters;
    3715         752 :         bs->io_unit_size = dev->blocklen;
    3716             : 
    3717         752 :         bs->max_channel_ops = opts->max_channel_ops;
    3718         752 :         bs->super_blob = SPDK_BLOBID_INVALID;
    3719         752 :         memcpy(&bs->bstype, &opts->bstype, sizeof(opts->bstype));
    3720         752 :         bs->esnap_bs_dev_create = opts->esnap_bs_dev_create;
    3721         752 :         bs->esnap_ctx = opts->esnap_ctx;
    3722             : 
    3723             :         /* The metadata is assumed to be at least 1 page */
    3724         752 :         bs->used_md_pages = spdk_bit_array_create(1);
    3725         752 :         bs->used_blobids = spdk_bit_array_create(0);
    3726         752 :         bs->open_blobids = spdk_bit_array_create(0);
    3727             : 
    3728         752 :         spdk_spin_init(&bs->used_lock);
    3729             : 
    3730         752 :         spdk_io_device_register(bs, bs_channel_create, bs_channel_destroy,
    3731             :                                 sizeof(struct spdk_bs_channel), "blobstore");
    3732         752 :         rc = bs_register_md_thread(bs);
    3733         752 :         if (rc == -1) {
    3734           0 :                 spdk_io_device_unregister(bs, NULL);
    3735           0 :                 spdk_spin_destroy(&bs->used_lock);
    3736           0 :                 spdk_bit_array_free(&bs->open_blobids);
    3737           0 :                 spdk_bit_array_free(&bs->used_blobids);
    3738           0 :                 spdk_bit_array_free(&bs->used_md_pages);
    3739           0 :                 spdk_bit_array_free(&ctx->used_clusters);
    3740           0 :                 spdk_free(ctx->super);
    3741           0 :                 free(ctx);
    3742           0 :                 free(bs);
    3743             :                 /* FIXME: this is a lie but don't know how to get a proper error code here */
    3744           0 :                 return -ENOMEM;
    3745             :         }
    3746             : 
    3747         752 :         *_ctx = ctx;
    3748         752 :         *_bs = bs;
    3749         752 :         return 0;
    3750             : }
    3751             : 
    3752             : static void
    3753          24 : bs_load_ctx_fail(struct spdk_bs_load_ctx *ctx, int bserrno)
    3754             : {
    3755          24 :         assert(bserrno != 0);
    3756             : 
    3757          24 :         spdk_free(ctx->super);
    3758          24 :         bs_sequence_finish(ctx->seq, bserrno);
    3759          24 :         bs_free(ctx->bs);
    3760          24 :         spdk_bit_array_free(&ctx->used_clusters);
    3761          24 :         free(ctx);
    3762          24 : }
    3763             : 
    3764             : static void
    3765         796 : bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    3766             :                struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    3767             : {
    3768             :         /* Update the values in the super block */
    3769         796 :         super->super_blob = bs->super_blob;
    3770         796 :         memcpy(&super->bstype, &bs->bstype, sizeof(bs->bstype));
    3771         796 :         super->crc = blob_md_page_calc_crc(super);
    3772         796 :         bs_sequence_write_dev(seq, super, bs_page_to_lba(bs, 0),
    3773         796 :                               bs_byte_to_lba(bs, sizeof(*super)),
    3774             :                               cb_fn, cb_arg);
    3775         796 : }
    3776             : 
    3777             : static void
    3778         732 : bs_write_used_clusters(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    3779             : {
    3780         732 :         struct spdk_bs_load_ctx *ctx = arg;
    3781             :         uint64_t        mask_size, lba, lba_count;
    3782             : 
    3783             :         /* Write out the used clusters mask */
    3784         732 :         mask_size = ctx->super->used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
    3785         732 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    3786             :                                  SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    3787         732 :         if (!ctx->mask) {
    3788           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    3789           0 :                 return;
    3790             :         }
    3791             : 
    3792         732 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_CLUSTERS;
    3793         732 :         ctx->mask->length = ctx->bs->total_clusters;
    3794             :         /* We could get here through the normal unload path, or through dirty
    3795             :          * shutdown recovery.  For the normal unload path, we use the mask from
    3796             :          * the bit pool.  For dirty shutdown recovery, we don't have a bit pool yet -
    3797             :          * only the bit array from the load ctx.
    3798             :          */
    3799         732 :         if (ctx->bs->used_clusters) {
    3800         626 :                 assert(ctx->mask->length == spdk_bit_pool_capacity(ctx->bs->used_clusters));
    3801         626 :                 spdk_bit_pool_store_mask(ctx->bs->used_clusters, ctx->mask->mask);
    3802             :         } else {
    3803         106 :                 assert(ctx->mask->length == spdk_bit_array_capacity(ctx->used_clusters));
    3804         106 :                 spdk_bit_array_store_mask(ctx->used_clusters, ctx->mask->mask);
    3805             :         }
    3806         732 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    3807         732 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    3808         732 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    3809             : }
    3810             : 
    3811             : static void
    3812         732 : bs_write_used_md(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    3813             : {
    3814         732 :         struct spdk_bs_load_ctx *ctx = arg;
    3815             :         uint64_t        mask_size, lba, lba_count;
    3816             : 
    3817         732 :         mask_size = ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE;
    3818         732 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    3819             :                                  SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    3820         732 :         if (!ctx->mask) {
    3821           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    3822           0 :                 return;
    3823             :         }
    3824             : 
    3825         732 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_PAGES;
    3826         732 :         ctx->mask->length = ctx->super->md_len;
    3827         732 :         assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_md_pages));
    3828             : 
    3829         732 :         spdk_bit_array_store_mask(ctx->bs->used_md_pages, ctx->mask->mask);
    3830         732 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
    3831         732 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
    3832         732 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    3833             : }
    3834             : 
    3835             : static void
    3836         732 : bs_write_used_blobids(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    3837             : {
    3838         732 :         struct spdk_bs_load_ctx *ctx = arg;
    3839             :         uint64_t        mask_size, lba, lba_count;
    3840             : 
    3841         732 :         if (ctx->super->used_blobid_mask_len == 0) {
    3842             :                 /*
    3843             :                  * This is a pre-v3 on-disk format where the blobid mask does not get
    3844             :                  *  written to disk.
    3845             :                  */
    3846          24 :                 cb_fn(seq, arg, 0);
    3847          24 :                 return;
    3848             :         }
    3849             : 
    3850         708 :         mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE;
    3851         708 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
    3852             :                                  SPDK_MALLOC_DMA);
    3853         708 :         if (!ctx->mask) {
    3854           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    3855           0 :                 return;
    3856             :         }
    3857             : 
    3858         708 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_BLOBIDS;
    3859         708 :         ctx->mask->length = ctx->super->md_len;
    3860         708 :         assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_blobids));
    3861             : 
    3862         708 :         spdk_bit_array_store_mask(ctx->bs->used_blobids, ctx->mask->mask);
    3863         708 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
    3864         708 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
    3865         708 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    3866             : }
    3867             : 
    3868             : static void
    3869         600 : blob_set_thin_provision(struct spdk_blob *blob)
    3870             : {
    3871         600 :         blob_verify_md_op(blob);
    3872         600 :         blob->invalid_flags |= SPDK_BLOB_THIN_PROV;
    3873         600 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    3874         600 : }
    3875             : 
    3876             : static void
    3877        1970 : blob_set_clear_method(struct spdk_blob *blob, enum blob_clear_method clear_method)
    3878             : {
    3879        1970 :         blob_verify_md_op(blob);
    3880        1970 :         blob->clear_method = clear_method;
    3881        1970 :         blob->md_ro_flags |= (clear_method << SPDK_BLOB_CLEAR_METHOD_SHIFT);
    3882        1970 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    3883        1970 : }
    3884             : 
    3885             : static void bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno);
    3886             : 
    3887             : static void
    3888          24 : bs_delete_corrupted_blob_cpl(void *cb_arg, int bserrno)
    3889             : {
    3890          24 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    3891             :         spdk_blob_id id;
    3892             :         int64_t page_num;
    3893             : 
    3894             :         /* Iterate to next blob (we can't use spdk_bs_iter_next function as our
    3895             :          * last blob has been removed */
    3896          24 :         page_num = bs_blobid_to_page(ctx->blobid);
    3897          24 :         page_num++;
    3898          24 :         page_num = spdk_bit_array_find_first_set(ctx->bs->used_blobids, page_num);
    3899          24 :         if (page_num >= spdk_bit_array_capacity(ctx->bs->used_blobids)) {
    3900          24 :                 bs_load_iter(ctx, NULL, -ENOENT);
    3901          24 :                 return;
    3902             :         }
    3903             : 
    3904           0 :         id = bs_page_to_blobid(page_num);
    3905             : 
    3906           0 :         spdk_bs_open_blob(ctx->bs, id, bs_load_iter, ctx);
    3907             : }
    3908             : 
    3909             : static void
    3910          24 : bs_delete_corrupted_close_cb(void *cb_arg, int bserrno)
    3911             : {
    3912          24 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    3913             : 
    3914          24 :         if (bserrno != 0) {
    3915           0 :                 SPDK_ERRLOG("Failed to close corrupted blob\n");
    3916           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    3917           0 :                 return;
    3918             :         }
    3919             : 
    3920          24 :         spdk_bs_delete_blob(ctx->bs, ctx->blobid, bs_delete_corrupted_blob_cpl, ctx);
    3921             : }
    3922             : 
    3923             : static void
    3924          24 : bs_delete_corrupted_blob(void *cb_arg, int bserrno)
    3925             : {
    3926          24 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    3927             :         uint64_t i;
    3928             : 
    3929          24 :         if (bserrno != 0) {
    3930           0 :                 SPDK_ERRLOG("Failed to close clone of a corrupted blob\n");
    3931           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    3932           0 :                 return;
    3933             :         }
    3934             : 
    3935             :         /* Snapshot and clone have the same copy of cluster map and extent pages
    3936             :          * at this point. Let's clear both for snapshot now,
    3937             :          * so that it won't be cleared for clone later when we remove snapshot.
    3938             :          * Also set thin provision to pass data corruption check */
    3939         264 :         for (i = 0; i < ctx->blob->active.num_clusters; i++) {
    3940         240 :                 ctx->blob->active.clusters[i] = 0;
    3941             :         }
    3942          36 :         for (i = 0; i < ctx->blob->active.num_extent_pages; i++) {
    3943          12 :                 ctx->blob->active.extent_pages[i] = 0;
    3944             :         }
    3945             : 
    3946          24 :         ctx->blob->md_ro = false;
    3947             : 
    3948          24 :         blob_set_thin_provision(ctx->blob);
    3949             : 
    3950          24 :         ctx->blobid = ctx->blob->id;
    3951             : 
    3952          24 :         spdk_blob_close(ctx->blob, bs_delete_corrupted_close_cb, ctx);
    3953             : }
    3954             : 
    3955             : static void
    3956          12 : bs_update_corrupted_blob(void *cb_arg, int bserrno)
    3957             : {
    3958          12 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    3959             : 
    3960          12 :         if (bserrno != 0) {
    3961           0 :                 SPDK_ERRLOG("Failed to close clone of a corrupted blob\n");
    3962           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    3963           0 :                 return;
    3964             :         }
    3965             : 
    3966          12 :         ctx->blob->md_ro = false;
    3967          12 :         blob_remove_xattr(ctx->blob, SNAPSHOT_PENDING_REMOVAL, true);
    3968          12 :         blob_remove_xattr(ctx->blob, SNAPSHOT_IN_PROGRESS, true);
    3969          12 :         spdk_blob_set_read_only(ctx->blob);
    3970             : 
    3971          12 :         if (ctx->iter_cb_fn) {
    3972           0 :                 ctx->iter_cb_fn(ctx->iter_cb_arg, ctx->blob, 0);
    3973             :         }
    3974          12 :         bs_blob_list_add(ctx->blob);
    3975             : 
    3976          12 :         spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    3977             : }
    3978             : 
    3979             : static void
    3980          36 : bs_examine_clone(void *cb_arg, struct spdk_blob *blob, int bserrno)
    3981             : {
    3982          36 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    3983             : 
    3984          36 :         if (bserrno != 0) {
    3985           0 :                 SPDK_ERRLOG("Failed to open clone of a corrupted blob\n");
    3986           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    3987           0 :                 return;
    3988             :         }
    3989             : 
    3990          36 :         if (blob->parent_id == ctx->blob->id) {
    3991             :                 /* Power failure occurred before updating clone (snapshot delete case)
    3992             :                  * or after updating clone (creating snapshot case) - keep snapshot */
    3993          12 :                 spdk_blob_close(blob, bs_update_corrupted_blob, ctx);
    3994             :         } else {
    3995             :                 /* Power failure occurred after updating clone (snapshot delete case)
    3996             :                  * or before updating clone (creating snapshot case) - remove snapshot */
    3997          24 :                 spdk_blob_close(blob, bs_delete_corrupted_blob, ctx);
    3998             :         }
    3999             : }
    4000             : 
    4001             : static void
    4002         720 : bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno)
    4003             : {
    4004         720 :         struct spdk_bs_load_ctx *ctx = arg;
    4005         720 :         const void *value;
    4006         720 :         size_t len;
    4007         720 :         int rc = 0;
    4008             : 
    4009         720 :         if (bserrno == 0) {
    4010             :                 /* Examine blob if it is corrupted after power failure. Fix
    4011             :                  * the ones that can be fixed and remove any other corrupted
    4012             :                  * ones. If it is not corrupted just process it */
    4013         440 :                 rc = blob_get_xattr_value(blob, SNAPSHOT_PENDING_REMOVAL, &value, &len, true);
    4014         440 :                 if (rc != 0) {
    4015         420 :                         rc = blob_get_xattr_value(blob, SNAPSHOT_IN_PROGRESS, &value, &len, true);
    4016         420 :                         if (rc != 0) {
    4017             :                                 /* Not corrupted - process it and continue with iterating through blobs */
    4018         404 :                                 if (ctx->iter_cb_fn) {
    4019          34 :                                         ctx->iter_cb_fn(ctx->iter_cb_arg, blob, 0);
    4020             :                                 }
    4021         404 :                                 bs_blob_list_add(blob);
    4022         404 :                                 spdk_bs_iter_next(ctx->bs, blob, bs_load_iter, ctx);
    4023         404 :                                 return;
    4024             :                         }
    4025             : 
    4026             :                 }
    4027             : 
    4028          36 :                 assert(len == sizeof(spdk_blob_id));
    4029             : 
    4030          36 :                 ctx->blob = blob;
    4031             : 
    4032             :                 /* Open clone to check if we are able to fix this blob or should we remove it */
    4033          36 :                 spdk_bs_open_blob(ctx->bs, *(spdk_blob_id *)value, bs_examine_clone, ctx);
    4034          36 :                 return;
    4035         280 :         } else if (bserrno == -ENOENT) {
    4036         280 :                 bserrno = 0;
    4037             :         } else {
    4038             :                 /*
    4039             :                  * This case needs to be looked at further.  Same problem
    4040             :                  *  exists with applications that rely on explicit blob
    4041             :                  *  iteration.  We should just skip the blob that failed
    4042             :                  *  to load and continue on to the next one.
    4043             :                  */
    4044           0 :                 SPDK_ERRLOG("Error in iterating blobs\n");
    4045             :         }
    4046             : 
    4047         280 :         ctx->iter_cb_fn = NULL;
    4048             : 
    4049         280 :         spdk_free(ctx->super);
    4050         280 :         spdk_free(ctx->mask);
    4051         280 :         bs_sequence_finish(ctx->seq, bserrno);
    4052         280 :         free(ctx);
    4053             : }
    4054             : 
    4055             : static void bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg);
    4056             : 
    4057             : static void
    4058         280 : bs_load_complete(struct spdk_bs_load_ctx *ctx)
    4059             : {
    4060         280 :         ctx->bs->used_clusters = spdk_bit_pool_create_from_array(ctx->used_clusters);
    4061         280 :         if (ctx->dumping) {
    4062           0 :                 bs_dump_read_md_page(ctx->seq, ctx);
    4063           0 :                 return;
    4064             :         }
    4065         280 :         spdk_bs_iter_first(ctx->bs, bs_load_iter, ctx);
    4066             : }
    4067             : 
    4068             : static void
    4069         174 : bs_load_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4070             : {
    4071         174 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4072             :         int rc;
    4073             : 
    4074             :         /* The type must be correct */
    4075         174 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_BLOBIDS);
    4076             : 
    4077             :         /* The length of the mask (in bits) must not be greater than
    4078             :          * the length of the buffer (converted to bits) */
    4079         174 :         assert(ctx->mask->length <= (ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE * 8));
    4080             : 
    4081             :         /* The length of the mask must be exactly equal to the size
    4082             :          * (in pages) of the metadata region */
    4083         174 :         assert(ctx->mask->length == ctx->super->md_len);
    4084             : 
    4085         174 :         rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->mask->length);
    4086         174 :         if (rc < 0) {
    4087           0 :                 spdk_free(ctx->mask);
    4088           0 :                 bs_load_ctx_fail(ctx, rc);
    4089           0 :                 return;
    4090             :         }
    4091             : 
    4092         174 :         spdk_bit_array_load_mask(ctx->bs->used_blobids, ctx->mask->mask);
    4093         174 :         bs_load_complete(ctx);
    4094             : }
    4095             : 
    4096             : static void
    4097         174 : bs_load_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4098             : {
    4099         174 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4100             :         uint64_t                lba, lba_count, mask_size;
    4101             :         int                     rc;
    4102             : 
    4103         174 :         if (bserrno != 0) {
    4104           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4105           0 :                 return;
    4106             :         }
    4107             : 
    4108             :         /* The type must be correct */
    4109         174 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS);
    4110             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    4111         174 :         assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof(
    4112             :                                              struct spdk_blob_md_page) * 8));
    4113             :         /*
    4114             :          * The length of the mask must be equal to or larger than the total number of clusters. It may be
    4115             :          * larger than the total number of clusters due to a failure spdk_bs_grow.
    4116             :          */
    4117         174 :         assert(ctx->mask->length >= ctx->bs->total_clusters);
    4118         174 :         if (ctx->mask->length > ctx->bs->total_clusters) {
    4119           4 :                 SPDK_WARNLOG("Shrink the used_custers mask length to total_clusters");
    4120           4 :                 ctx->mask->length = ctx->bs->total_clusters;
    4121             :         }
    4122             : 
    4123         174 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->mask->length);
    4124         174 :         if (rc < 0) {
    4125           0 :                 spdk_free(ctx->mask);
    4126           0 :                 bs_load_ctx_fail(ctx, rc);
    4127           0 :                 return;
    4128             :         }
    4129             : 
    4130         174 :         spdk_bit_array_load_mask(ctx->used_clusters, ctx->mask->mask);
    4131         174 :         ctx->bs->num_free_clusters = spdk_bit_array_count_clear(ctx->used_clusters);
    4132         174 :         assert(ctx->bs->num_free_clusters <= ctx->bs->total_clusters);
    4133             : 
    4134         174 :         spdk_free(ctx->mask);
    4135             : 
    4136             :         /* Read the used blobids mask */
    4137         174 :         mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE;
    4138         174 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
    4139             :                                  SPDK_MALLOC_DMA);
    4140         174 :         if (!ctx->mask) {
    4141           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4142           0 :                 return;
    4143             :         }
    4144         174 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
    4145         174 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
    4146         174 :         bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
    4147             :                              bs_load_used_blobids_cpl, ctx);
    4148             : }
    4149             : 
    4150             : static void
    4151         174 : bs_load_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4152             : {
    4153         174 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4154             :         uint64_t                lba, lba_count, mask_size;
    4155             :         int                     rc;
    4156             : 
    4157         174 :         if (bserrno != 0) {
    4158           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4159           0 :                 return;
    4160             :         }
    4161             : 
    4162             :         /* The type must be correct */
    4163         174 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_PAGES);
    4164             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    4165         174 :         assert(ctx->mask->length <= (ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE *
    4166             :                                      8));
    4167             :         /* The length of the mask must be exactly equal to the size (in pages) of the metadata region */
    4168         174 :         if (ctx->mask->length != ctx->super->md_len) {
    4169           0 :                 SPDK_ERRLOG("mismatched md_len in used_pages mask: "
    4170             :                             "mask->length=%" PRIu32 " super->md_len=%" PRIu32 "\n",
    4171             :                             ctx->mask->length, ctx->super->md_len);
    4172           0 :                 assert(false);
    4173             :         }
    4174             : 
    4175         174 :         rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->mask->length);
    4176         174 :         if (rc < 0) {
    4177           0 :                 spdk_free(ctx->mask);
    4178           0 :                 bs_load_ctx_fail(ctx, rc);
    4179           0 :                 return;
    4180             :         }
    4181             : 
    4182         174 :         spdk_bit_array_load_mask(ctx->bs->used_md_pages, ctx->mask->mask);
    4183         174 :         spdk_free(ctx->mask);
    4184             : 
    4185             :         /* Read the used clusters mask */
    4186         174 :         mask_size = ctx->super->used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
    4187         174 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
    4188             :                                  SPDK_MALLOC_DMA);
    4189         174 :         if (!ctx->mask) {
    4190           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4191           0 :                 return;
    4192             :         }
    4193         174 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    4194         174 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    4195         174 :         bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
    4196             :                              bs_load_used_clusters_cpl, ctx);
    4197             : }
    4198             : 
    4199             : static void
    4200         174 : bs_load_read_used_pages(struct spdk_bs_load_ctx *ctx)
    4201             : {
    4202             :         uint64_t lba, lba_count, mask_size;
    4203             : 
    4204             :         /* Read the used pages mask */
    4205         174 :         mask_size = ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE;
    4206         174 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    4207             :                                  SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    4208         174 :         if (!ctx->mask) {
    4209           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4210           0 :                 return;
    4211             :         }
    4212             : 
    4213         174 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
    4214         174 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
    4215         174 :         bs_sequence_read_dev(ctx->seq, ctx->mask, lba, lba_count,
    4216             :                              bs_load_used_pages_cpl, ctx);
    4217             : }
    4218             : 
    4219             : static int
    4220         246 : bs_load_replay_md_parse_page(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_page *page)
    4221             : {
    4222         246 :         struct spdk_blob_store *bs = ctx->bs;
    4223             :         struct spdk_blob_md_descriptor *desc;
    4224         246 :         size_t  cur_desc = 0;
    4225             : 
    4226         246 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    4227         718 :         while (cur_desc < sizeof(page->descriptors)) {
    4228         718 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
    4229         226 :                         if (desc->length == 0) {
    4230             :                                 /* If padding and length are 0, this terminates the page */
    4231         226 :                                 break;
    4232             :                         }
    4233         492 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
    4234             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
    4235             :                         unsigned int                            i, j;
    4236          68 :                         unsigned int                            cluster_count = 0;
    4237             :                         uint32_t                                cluster_idx;
    4238             : 
    4239          68 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
    4240             : 
    4241         136 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
    4242         828 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
    4243         760 :                                         cluster_idx = desc_extent_rle->extents[i].cluster_idx;
    4244             :                                         /*
    4245             :                                          * cluster_idx = 0 means an unallocated cluster - don't mark that
    4246             :                                          * in the used cluster map.
    4247             :                                          */
    4248         760 :                                         if (cluster_idx != 0) {
    4249         540 :                                                 SPDK_NOTICELOG("Recover: cluster %" PRIu32 "\n", cluster_idx + j);
    4250         540 :                                                 spdk_bit_array_set(ctx->used_clusters, cluster_idx + j);
    4251         540 :                                                 if (bs->num_free_clusters == 0) {
    4252           0 :                                                         return -ENOSPC;
    4253             :                                                 }
    4254         540 :                                                 bs->num_free_clusters--;
    4255             :                                         }
    4256         760 :                                         cluster_count++;
    4257             :                                 }
    4258             :                         }
    4259          68 :                         if (cluster_count == 0) {
    4260           0 :                                 return -EINVAL;
    4261             :                         }
    4262         424 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    4263             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
    4264             :                         uint32_t                                        i;
    4265          52 :                         uint32_t                                        cluster_count = 0;
    4266             :                         uint32_t                                        cluster_idx;
    4267             :                         size_t                                          cluster_idx_length;
    4268             : 
    4269          52 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
    4270          52 :                         cluster_idx_length = desc_extent->length - sizeof(desc_extent->start_cluster_idx);
    4271             : 
    4272          52 :                         if (desc_extent->length <= sizeof(desc_extent->start_cluster_idx) ||
    4273          52 :                             (cluster_idx_length % sizeof(desc_extent->cluster_idx[0]) != 0)) {
    4274           0 :                                 return -EINVAL;
    4275             :                         }
    4276             : 
    4277         652 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
    4278         600 :                                 cluster_idx = desc_extent->cluster_idx[i];
    4279             :                                 /*
    4280             :                                  * cluster_idx = 0 means an unallocated cluster - don't mark that
    4281             :                                  * in the used cluster map.
    4282             :                                  */
    4283         600 :                                 if (cluster_idx != 0) {
    4284         600 :                                         if (cluster_idx < desc_extent->start_cluster_idx &&
    4285           0 :                                             cluster_idx >= desc_extent->start_cluster_idx + cluster_count) {
    4286           0 :                                                 return -EINVAL;
    4287             :                                         }
    4288         600 :                                         spdk_bit_array_set(ctx->used_clusters, cluster_idx);
    4289         600 :                                         if (bs->num_free_clusters == 0) {
    4290           0 :                                                 return -ENOSPC;
    4291             :                                         }
    4292         600 :                                         bs->num_free_clusters--;
    4293             :                                 }
    4294         600 :                                 cluster_count++;
    4295             :                         }
    4296             : 
    4297          52 :                         if (cluster_count == 0) {
    4298           0 :                                 return -EINVAL;
    4299             :                         }
    4300         372 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    4301             :                         /* Skip this item */
    4302         296 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    4303             :                         /* Skip this item */
    4304         236 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
    4305             :                         /* Skip this item */
    4306          82 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
    4307             :                         struct spdk_blob_md_descriptor_extent_table *desc_extent_table;
    4308          82 :                         uint32_t num_extent_pages = ctx->num_extent_pages;
    4309             :                         uint32_t i;
    4310             :                         size_t extent_pages_length;
    4311             :                         void *tmp;
    4312             : 
    4313          82 :                         desc_extent_table = (struct spdk_blob_md_descriptor_extent_table *)desc;
    4314          82 :                         extent_pages_length = desc_extent_table->length - sizeof(desc_extent_table->num_clusters);
    4315             : 
    4316          82 :                         if (desc_extent_table->length == 0 ||
    4317          82 :                             (extent_pages_length % sizeof(desc_extent_table->extent_page[0]) != 0)) {
    4318           0 :                                 return -EINVAL;
    4319             :                         }
    4320             : 
    4321         160 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
    4322          78 :                                 if (desc_extent_table->extent_page[i].page_idx != 0) {
    4323          52 :                                         if (desc_extent_table->extent_page[i].num_pages != 1) {
    4324           0 :                                                 return -EINVAL;
    4325             :                                         }
    4326          52 :                                         num_extent_pages += 1;
    4327             :                                 }
    4328             :                         }
    4329             : 
    4330          82 :                         if (num_extent_pages > 0) {
    4331          52 :                                 tmp = realloc(ctx->extent_page_num, num_extent_pages * sizeof(uint32_t));
    4332          52 :                                 if (tmp == NULL) {
    4333           0 :                                         return -ENOMEM;
    4334             :                                 }
    4335          52 :                                 ctx->extent_page_num = tmp;
    4336             : 
    4337             :                                 /* Extent table entries contain md page numbers for extent pages.
    4338             :                                  * Zeroes represent unallocated extent pages, those are run-length-encoded.
    4339             :                                  */
    4340         104 :                                 for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
    4341          52 :                                         if (desc_extent_table->extent_page[i].page_idx != 0) {
    4342          52 :                                                 ctx->extent_page_num[ctx->num_extent_pages] = desc_extent_table->extent_page[i].page_idx;
    4343          52 :                                                 ctx->num_extent_pages += 1;
    4344             :                                         }
    4345             :                                 }
    4346             :                         }
    4347             :                 } else {
    4348             :                         /* Error */
    4349           0 :                         return -EINVAL;
    4350             :                 }
    4351             :                 /* Advance to the next descriptor */
    4352         492 :                 cur_desc += sizeof(*desc) + desc->length;
    4353         492 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
    4354          20 :                         break;
    4355             :                 }
    4356         472 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
    4357             :         }
    4358         246 :         return 0;
    4359             : }
    4360             : 
    4361             : static bool
    4362        1284 : bs_load_cur_extent_page_valid(struct spdk_blob_md_page *page)
    4363             : {
    4364             :         uint32_t crc;
    4365        1284 :         struct spdk_blob_md_descriptor *desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    4366             :         size_t desc_len;
    4367             : 
    4368        1284 :         crc = blob_md_page_calc_crc(page);
    4369        1284 :         if (crc != page->crc) {
    4370           0 :                 return false;
    4371             :         }
    4372             : 
    4373             :         /* Extent page should always be of sequence num 0. */
    4374        1284 :         if (page->sequence_num != 0) {
    4375          44 :                 return false;
    4376             :         }
    4377             : 
    4378             :         /* Descriptor type must be EXTENT_PAGE. */
    4379        1240 :         if (desc->type != SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    4380         154 :                 return false;
    4381             :         }
    4382             : 
    4383             :         /* Descriptor length cannot exceed the page. */
    4384        1086 :         desc_len = sizeof(*desc) + desc->length;
    4385        1086 :         if (desc_len > sizeof(page->descriptors)) {
    4386           0 :                 return false;
    4387             :         }
    4388             : 
    4389             :         /* It has to be the only descriptor in the page. */
    4390        1086 :         if (desc_len + sizeof(*desc) <= sizeof(page->descriptors)) {
    4391        1086 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + desc_len);
    4392        1086 :                 if (desc->length != 0) {
    4393           0 :                         return false;
    4394             :                 }
    4395             :         }
    4396             : 
    4397        1086 :         return true;
    4398             : }
    4399             : 
    4400             : static bool
    4401        6754 : bs_load_cur_md_page_valid(struct spdk_bs_load_ctx *ctx)
    4402             : {
    4403             :         uint32_t crc;
    4404        6754 :         struct spdk_blob_md_page *page = ctx->page;
    4405             : 
    4406        6754 :         crc = blob_md_page_calc_crc(page);
    4407        6754 :         if (crc != page->crc) {
    4408        6538 :                 return false;
    4409             :         }
    4410             : 
    4411             :         /* First page of a sequence should match the blobid. */
    4412         216 :         if (page->sequence_num == 0 &&
    4413         172 :             bs_page_to_blobid(ctx->cur_page) != page->id) {
    4414          18 :                 return false;
    4415             :         }
    4416         198 :         assert(bs_load_cur_extent_page_valid(page) == false);
    4417             : 
    4418         198 :         return true;
    4419             : }
    4420             : 
    4421             : static void bs_load_replay_cur_md_page(struct spdk_bs_load_ctx *ctx);
    4422             : 
    4423             : static void
    4424         106 : bs_load_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4425             : {
    4426         106 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4427             : 
    4428         106 :         if (bserrno != 0) {
    4429           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4430           0 :                 return;
    4431             :         }
    4432             : 
    4433         106 :         bs_load_complete(ctx);
    4434             : }
    4435             : 
    4436             : static void
    4437         106 : bs_load_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4438             : {
    4439         106 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4440             : 
    4441         106 :         spdk_free(ctx->mask);
    4442         106 :         ctx->mask = NULL;
    4443             : 
    4444         106 :         if (bserrno != 0) {
    4445           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4446           0 :                 return;
    4447             :         }
    4448             : 
    4449         106 :         bs_write_used_clusters(seq, ctx, bs_load_write_used_clusters_cpl);
    4450             : }
    4451             : 
    4452             : static void
    4453         106 : bs_load_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4454             : {
    4455         106 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4456             : 
    4457         106 :         spdk_free(ctx->mask);
    4458         106 :         ctx->mask = NULL;
    4459             : 
    4460         106 :         if (bserrno != 0) {
    4461           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4462           0 :                 return;
    4463             :         }
    4464             : 
    4465         106 :         bs_write_used_blobids(seq, ctx, bs_load_write_used_blobids_cpl);
    4466             : }
    4467             : 
    4468             : static void
    4469         106 : bs_load_write_used_md(struct spdk_bs_load_ctx *ctx)
    4470             : {
    4471         106 :         bs_write_used_md(ctx->seq, ctx, bs_load_write_used_pages_cpl);
    4472         106 : }
    4473             : 
    4474             : static void
    4475        6714 : bs_load_replay_md_chain_cpl(struct spdk_bs_load_ctx *ctx)
    4476             : {
    4477             :         uint64_t num_md_clusters;
    4478             :         uint64_t i;
    4479             : 
    4480        6714 :         ctx->in_page_chain = false;
    4481             : 
    4482             :         do {
    4483        6784 :                 ctx->page_index++;
    4484        6784 :         } while (spdk_bit_array_get(ctx->bs->used_md_pages, ctx->page_index) == true);
    4485             : 
    4486        6714 :         if (ctx->page_index < ctx->super->md_len) {
    4487        6608 :                 ctx->cur_page = ctx->page_index;
    4488        6608 :                 bs_load_replay_cur_md_page(ctx);
    4489             :         } else {
    4490             :                 /* Claim all of the clusters used by the metadata */
    4491         106 :                 num_md_clusters = spdk_divide_round_up(
    4492         106 :                                           ctx->super->md_start + ctx->super->md_len, ctx->bs->pages_per_cluster);
    4493         480 :                 for (i = 0; i < num_md_clusters; i++) {
    4494         374 :                         spdk_bit_array_set(ctx->used_clusters, i);
    4495             :                 }
    4496         106 :                 ctx->bs->num_free_clusters -= num_md_clusters;
    4497         106 :                 spdk_free(ctx->page);
    4498         106 :                 bs_load_write_used_md(ctx);
    4499             :         }
    4500        6714 : }
    4501             : 
    4502             : static void
    4503          52 : bs_load_replay_extent_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4504             : {
    4505          52 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4506             :         uint32_t page_num;
    4507             :         uint64_t i;
    4508             : 
    4509          52 :         if (bserrno != 0) {
    4510           0 :                 spdk_free(ctx->extent_pages);
    4511           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4512           0 :                 return;
    4513             :         }
    4514             : 
    4515         104 :         for (i = 0; i < ctx->num_extent_pages; i++) {
    4516             :                 /* Extent pages are only read when present within in chain md.
    4517             :                  * Integrity of md is not right if that page was not a valid extent page. */
    4518          52 :                 if (bs_load_cur_extent_page_valid(&ctx->extent_pages[i]) != true) {
    4519           0 :                         spdk_free(ctx->extent_pages);
    4520           0 :                         bs_load_ctx_fail(ctx, -EILSEQ);
    4521           0 :                         return;
    4522             :                 }
    4523             : 
    4524          52 :                 page_num = ctx->extent_page_num[i];
    4525          52 :                 spdk_bit_array_set(ctx->bs->used_md_pages, page_num);
    4526          52 :                 if (bs_load_replay_md_parse_page(ctx, &ctx->extent_pages[i])) {
    4527           0 :                         spdk_free(ctx->extent_pages);
    4528           0 :                         bs_load_ctx_fail(ctx, -EILSEQ);
    4529           0 :                         return;
    4530             :                 }
    4531             :         }
    4532             : 
    4533          52 :         spdk_free(ctx->extent_pages);
    4534          52 :         free(ctx->extent_page_num);
    4535          52 :         ctx->extent_page_num = NULL;
    4536          52 :         ctx->num_extent_pages = 0;
    4537             : 
    4538          52 :         bs_load_replay_md_chain_cpl(ctx);
    4539             : }
    4540             : 
    4541             : static void
    4542          52 : bs_load_replay_extent_pages(struct spdk_bs_load_ctx *ctx)
    4543             : {
    4544             :         spdk_bs_batch_t *batch;
    4545             :         uint32_t page;
    4546             :         uint64_t lba;
    4547             :         uint64_t i;
    4548             : 
    4549          52 :         ctx->extent_pages = spdk_zmalloc(SPDK_BS_PAGE_SIZE * ctx->num_extent_pages, 0,
    4550             :                                          NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    4551          52 :         if (!ctx->extent_pages) {
    4552           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4553           0 :                 return;
    4554             :         }
    4555             : 
    4556          52 :         batch = bs_sequence_to_batch(ctx->seq, bs_load_replay_extent_page_cpl, ctx);
    4557             : 
    4558         104 :         for (i = 0; i < ctx->num_extent_pages; i++) {
    4559          52 :                 page = ctx->extent_page_num[i];
    4560          52 :                 assert(page < ctx->super->md_len);
    4561          52 :                 lba = bs_md_page_to_lba(ctx->bs, page);
    4562          52 :                 bs_batch_read_dev(batch, &ctx->extent_pages[i], lba,
    4563          52 :                                   bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE));
    4564             :         }
    4565             : 
    4566          52 :         bs_batch_close(batch);
    4567             : }
    4568             : 
    4569             : static void
    4570        6754 : bs_load_replay_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4571             : {
    4572        6754 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4573             :         uint32_t page_num;
    4574             :         struct spdk_blob_md_page *page;
    4575             : 
    4576        6754 :         if (bserrno != 0) {
    4577           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4578           0 :                 return;
    4579             :         }
    4580             : 
    4581        6754 :         page_num = ctx->cur_page;
    4582        6754 :         page = ctx->page;
    4583        6754 :         if (bs_load_cur_md_page_valid(ctx) == true) {
    4584         198 :                 if (page->sequence_num == 0 || ctx->in_page_chain == true) {
    4585         194 :                         spdk_spin_lock(&ctx->bs->used_lock);
    4586         194 :                         bs_claim_md_page(ctx->bs, page_num);
    4587         194 :                         spdk_spin_unlock(&ctx->bs->used_lock);
    4588         194 :                         if (page->sequence_num == 0) {
    4589         154 :                                 SPDK_NOTICELOG("Recover: blob 0x%" PRIx32 "\n", page_num);
    4590         154 :                                 spdk_bit_array_set(ctx->bs->used_blobids, page_num);
    4591             :                         }
    4592         194 :                         if (bs_load_replay_md_parse_page(ctx, page)) {
    4593           0 :                                 bs_load_ctx_fail(ctx, -EILSEQ);
    4594           0 :                                 return;
    4595             :                         }
    4596         194 :                         if (page->next != SPDK_INVALID_MD_PAGE) {
    4597          40 :                                 ctx->in_page_chain = true;
    4598          40 :                                 ctx->cur_page = page->next;
    4599          40 :                                 bs_load_replay_cur_md_page(ctx);
    4600          40 :                                 return;
    4601             :                         }
    4602         154 :                         if (ctx->num_extent_pages != 0) {
    4603          52 :                                 bs_load_replay_extent_pages(ctx);
    4604          52 :                                 return;
    4605             :                         }
    4606             :                 }
    4607             :         }
    4608        6662 :         bs_load_replay_md_chain_cpl(ctx);
    4609             : }
    4610             : 
    4611             : static void
    4612        6754 : bs_load_replay_cur_md_page(struct spdk_bs_load_ctx *ctx)
    4613             : {
    4614             :         uint64_t lba;
    4615             : 
    4616        6754 :         assert(ctx->cur_page < ctx->super->md_len);
    4617        6754 :         lba = bs_md_page_to_lba(ctx->bs, ctx->cur_page);
    4618        6754 :         bs_sequence_read_dev(ctx->seq, ctx->page, lba,
    4619        6754 :                              bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
    4620             :                              bs_load_replay_md_cpl, ctx);
    4621        6754 : }
    4622             : 
    4623             : static void
    4624         106 : bs_load_replay_md(struct spdk_bs_load_ctx *ctx)
    4625             : {
    4626         106 :         ctx->page_index = 0;
    4627         106 :         ctx->cur_page = 0;
    4628         106 :         ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
    4629             :                                  NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    4630         106 :         if (!ctx->page) {
    4631           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4632           0 :                 return;
    4633             :         }
    4634         106 :         bs_load_replay_cur_md_page(ctx);
    4635             : }
    4636             : 
    4637             : static void
    4638         106 : bs_recover(struct spdk_bs_load_ctx *ctx)
    4639             : {
    4640             :         int             rc;
    4641             : 
    4642         106 :         SPDK_NOTICELOG("Performing recovery on blobstore\n");
    4643         106 :         rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->super->md_len);
    4644         106 :         if (rc < 0) {
    4645           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4646           0 :                 return;
    4647             :         }
    4648             : 
    4649         106 :         rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->super->md_len);
    4650         106 :         if (rc < 0) {
    4651           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4652           0 :                 return;
    4653             :         }
    4654             : 
    4655         106 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    4656         106 :         if (rc < 0) {
    4657           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4658           0 :                 return;
    4659             :         }
    4660             : 
    4661         106 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->super->md_len);
    4662         106 :         if (rc < 0) {
    4663           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4664           0 :                 return;
    4665             :         }
    4666             : 
    4667         106 :         ctx->bs->num_free_clusters = ctx->bs->total_clusters;
    4668         106 :         bs_load_replay_md(ctx);
    4669             : }
    4670             : 
    4671             : static int
    4672         276 : bs_parse_super(struct spdk_bs_load_ctx *ctx)
    4673             : {
    4674             :         int rc;
    4675             : 
    4676         276 :         if (ctx->super->size == 0) {
    4677           8 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    4678             :         }
    4679             : 
    4680         276 :         if (ctx->super->io_unit_size == 0) {
    4681           8 :                 ctx->super->io_unit_size = SPDK_BS_PAGE_SIZE;
    4682             :         }
    4683             : 
    4684         276 :         ctx->bs->clean = 1;
    4685         276 :         ctx->bs->cluster_sz = ctx->super->cluster_size;
    4686         276 :         ctx->bs->total_clusters = ctx->super->size / ctx->super->cluster_size;
    4687         276 :         ctx->bs->pages_per_cluster = ctx->bs->cluster_sz / SPDK_BS_PAGE_SIZE;
    4688         276 :         if (spdk_u32_is_pow2(ctx->bs->pages_per_cluster)) {
    4689         276 :                 ctx->bs->pages_per_cluster_shift = spdk_u32log2(ctx->bs->pages_per_cluster);
    4690             :         }
    4691         276 :         ctx->bs->io_unit_size = ctx->super->io_unit_size;
    4692         276 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    4693         276 :         if (rc < 0) {
    4694           0 :                 return -ENOMEM;
    4695             :         }
    4696         276 :         ctx->bs->md_start = ctx->super->md_start;
    4697         276 :         ctx->bs->md_len = ctx->super->md_len;
    4698         276 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->bs->md_len);
    4699         276 :         if (rc < 0) {
    4700           0 :                 return -ENOMEM;
    4701             :         }
    4702             : 
    4703         552 :         ctx->bs->total_data_clusters = ctx->bs->total_clusters - spdk_divide_round_up(
    4704         276 :                                                ctx->bs->md_start + ctx->bs->md_len, ctx->bs->pages_per_cluster);
    4705         276 :         ctx->bs->super_blob = ctx->super->super_blob;
    4706         276 :         memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
    4707             : 
    4708         276 :         return 0;
    4709             : }
    4710             : 
    4711             : static void
    4712         300 : bs_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4713             : {
    4714         300 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4715             :         int rc;
    4716             : 
    4717         300 :         rc = bs_super_validate(ctx->super, ctx->bs);
    4718         300 :         if (rc != 0) {
    4719          24 :                 bs_load_ctx_fail(ctx, rc);
    4720          24 :                 return;
    4721             :         }
    4722             : 
    4723         276 :         rc = bs_parse_super(ctx);
    4724         276 :         if (rc < 0) {
    4725           0 :                 bs_load_ctx_fail(ctx, rc);
    4726           0 :                 return;
    4727             :         }
    4728             : 
    4729         276 :         if (ctx->super->used_blobid_mask_len == 0 || ctx->super->clean == 0 || ctx->force_recover) {
    4730         106 :                 bs_recover(ctx);
    4731             :         } else {
    4732         170 :                 bs_load_read_used_pages(ctx);
    4733             :         }
    4734             : }
    4735             : 
    4736             : static inline int
    4737         292 : bs_opts_copy(struct spdk_bs_opts *src, struct spdk_bs_opts *dst)
    4738             : {
    4739             : 
    4740         292 :         if (!src->opts_size) {
    4741           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
    4742           0 :                 return -1;
    4743             :         }
    4744             : 
    4745             : #define FIELD_OK(field) \
    4746             :         offsetof(struct spdk_bs_opts, field) + sizeof(src->field) <= src->opts_size
    4747             : 
    4748             : #define SET_FIELD(field) \
    4749             :         if (FIELD_OK(field)) { \
    4750             :                 dst->field = src->field; \
    4751             :         } \
    4752             : 
    4753         292 :         SET_FIELD(cluster_sz);
    4754         292 :         SET_FIELD(num_md_pages);
    4755         292 :         SET_FIELD(max_md_ops);
    4756         292 :         SET_FIELD(max_channel_ops);
    4757         292 :         SET_FIELD(clear_method);
    4758             : 
    4759         292 :         if (FIELD_OK(bstype)) {
    4760         292 :                 memcpy(&dst->bstype, &src->bstype, sizeof(dst->bstype));
    4761             :         }
    4762         292 :         SET_FIELD(iter_cb_fn);
    4763         292 :         SET_FIELD(iter_cb_arg);
    4764         292 :         SET_FIELD(force_recover);
    4765         292 :         SET_FIELD(esnap_bs_dev_create);
    4766         292 :         SET_FIELD(esnap_ctx);
    4767             : 
    4768         292 :         dst->opts_size = src->opts_size;
    4769             : 
    4770             :         /* You should not remove this statement, but need to update the assert statement
    4771             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    4772             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_bs_opts) == 88, "Incorrect size");
    4773             : 
    4774             : #undef FIELD_OK
    4775             : #undef SET_FIELD
    4776             : 
    4777         292 :         return 0;
    4778             : }
    4779             : 
    4780             : void
    4781         312 : spdk_bs_load(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
    4782             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
    4783             : {
    4784         312 :         struct spdk_blob_store  *bs;
    4785         312 :         struct spdk_bs_cpl      cpl;
    4786         312 :         struct spdk_bs_load_ctx *ctx;
    4787         312 :         struct spdk_bs_opts     opts = {};
    4788             :         int err;
    4789             : 
    4790         312 :         SPDK_DEBUGLOG(blob, "Loading blobstore from dev %p\n", dev);
    4791             : 
    4792         312 :         if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
    4793           4 :                 SPDK_DEBUGLOG(blob, "unsupported dev block length of %d\n", dev->blocklen);
    4794           4 :                 dev->destroy(dev);
    4795           4 :                 cb_fn(cb_arg, NULL, -EINVAL);
    4796           4 :                 return;
    4797             :         }
    4798             : 
    4799         308 :         spdk_bs_opts_init(&opts, sizeof(opts));
    4800         308 :         if (o) {
    4801         122 :                 if (bs_opts_copy(o, &opts)) {
    4802           0 :                         return;
    4803             :                 }
    4804             :         }
    4805             : 
    4806         308 :         if (opts.max_md_ops == 0 || opts.max_channel_ops == 0) {
    4807           8 :                 dev->destroy(dev);
    4808           8 :                 cb_fn(cb_arg, NULL, -EINVAL);
    4809           8 :                 return;
    4810             :         }
    4811             : 
    4812         300 :         err = bs_alloc(dev, &opts, &bs, &ctx);
    4813         300 :         if (err) {
    4814           0 :                 dev->destroy(dev);
    4815           0 :                 cb_fn(cb_arg, NULL, err);
    4816           0 :                 return;
    4817             :         }
    4818             : 
    4819         300 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
    4820         300 :         cpl.u.bs_handle.cb_fn = cb_fn;
    4821         300 :         cpl.u.bs_handle.cb_arg = cb_arg;
    4822         300 :         cpl.u.bs_handle.bs = bs;
    4823             : 
    4824         300 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    4825         300 :         if (!ctx->seq) {
    4826           0 :                 spdk_free(ctx->super);
    4827           0 :                 free(ctx);
    4828           0 :                 bs_free(bs);
    4829           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    4830           0 :                 return;
    4831             :         }
    4832             : 
    4833             :         /* Read the super block */
    4834         300 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    4835         300 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    4836             :                              bs_load_super_cpl, ctx);
    4837             : }
    4838             : 
    4839             : /* END spdk_bs_load */
    4840             : 
    4841             : /* START spdk_bs_dump */
    4842             : 
    4843             : static void
    4844           0 : bs_dump_finish(spdk_bs_sequence_t *seq, struct spdk_bs_load_ctx *ctx, int bserrno)
    4845             : {
    4846           0 :         spdk_free(ctx->super);
    4847             : 
    4848             :         /*
    4849             :          * We need to defer calling bs_call_cpl() until after
    4850             :          * dev destruction, so tuck these away for later use.
    4851             :          */
    4852           0 :         ctx->bs->unload_err = bserrno;
    4853           0 :         memcpy(&ctx->bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    4854           0 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    4855             : 
    4856           0 :         bs_sequence_finish(seq, 0);
    4857           0 :         bs_free(ctx->bs);
    4858           0 :         free(ctx);
    4859           0 : }
    4860             : 
    4861             : static void
    4862           0 : bs_dump_print_xattr(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    4863             : {
    4864             :         struct spdk_blob_md_descriptor_xattr *desc_xattr;
    4865             :         uint32_t i;
    4866             :         const char *type;
    4867             : 
    4868           0 :         desc_xattr = (struct spdk_blob_md_descriptor_xattr *)desc;
    4869             : 
    4870           0 :         if (desc_xattr->length !=
    4871             :             sizeof(desc_xattr->name_length) + sizeof(desc_xattr->value_length) +
    4872           0 :             desc_xattr->name_length + desc_xattr->value_length) {
    4873             :         }
    4874             : 
    4875           0 :         memcpy(ctx->xattr_name, desc_xattr->name, desc_xattr->name_length);
    4876           0 :         ctx->xattr_name[desc_xattr->name_length] = '\0';
    4877           0 :         if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    4878           0 :                 type = "XATTR";
    4879           0 :         } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    4880           0 :                 type = "XATTR_INTERNAL";
    4881             :         } else {
    4882           0 :                 assert(false);
    4883             :                 type = "XATTR_?";
    4884             :         }
    4885           0 :         fprintf(ctx->fp, "%s: name = \"%s\"\n", type, ctx->xattr_name);
    4886           0 :         fprintf(ctx->fp, "       value = \"");
    4887           0 :         ctx->print_xattr_fn(ctx->fp, ctx->super->bstype.bstype, ctx->xattr_name,
    4888           0 :                             (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
    4889           0 :                             desc_xattr->value_length);
    4890           0 :         fprintf(ctx->fp, "\"\n");
    4891           0 :         for (i = 0; i < desc_xattr->value_length; i++) {
    4892           0 :                 if (i % 16 == 0) {
    4893           0 :                         fprintf(ctx->fp, "               ");
    4894             :                 }
    4895           0 :                 fprintf(ctx->fp, "%02" PRIx8 " ", *((uint8_t *)desc_xattr->name + desc_xattr->name_length + i));
    4896           0 :                 if ((i + 1) % 16 == 0) {
    4897           0 :                         fprintf(ctx->fp, "\n");
    4898             :                 }
    4899             :         }
    4900           0 :         if (i % 16 != 0) {
    4901           0 :                 fprintf(ctx->fp, "\n");
    4902             :         }
    4903           0 : }
    4904             : 
    4905             : struct type_flag_desc {
    4906             :         uint64_t mask;
    4907             :         uint64_t val;
    4908             :         const char *name;
    4909             : };
    4910             : 
    4911             : static void
    4912           0 : bs_dump_print_type_bits(struct spdk_bs_load_ctx *ctx, uint64_t flags,
    4913             :                         struct type_flag_desc *desc, size_t numflags)
    4914             : {
    4915           0 :         uint64_t covered = 0;
    4916             :         size_t i;
    4917             : 
    4918           0 :         for (i = 0; i < numflags; i++) {
    4919           0 :                 if ((desc[i].mask & flags) != desc[i].val) {
    4920           0 :                         continue;
    4921             :                 }
    4922           0 :                 fprintf(ctx->fp, "\t\t 0x%016" PRIx64 " %s", desc[i].val, desc[i].name);
    4923           0 :                 if (desc[i].mask != desc[i].val) {
    4924           0 :                         fprintf(ctx->fp, " (mask 0x%" PRIx64 " value 0x%" PRIx64 ")",
    4925           0 :                                 desc[i].mask, desc[i].val);
    4926             :                 }
    4927           0 :                 fprintf(ctx->fp, "\n");
    4928           0 :                 covered |= desc[i].mask;
    4929             :         }
    4930           0 :         if ((flags & ~covered) != 0) {
    4931           0 :                 fprintf(ctx->fp, "\t\t 0x%016" PRIx64 " Unknown\n", flags & ~covered);
    4932             :         }
    4933           0 : }
    4934             : 
    4935             : static void
    4936           0 : bs_dump_print_type_flags(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    4937             : {
    4938             :         struct spdk_blob_md_descriptor_flags *type_desc;
    4939             : #define ADD_FLAG(f) { f, f, #f }
    4940             : #define ADD_MASK_VAL(m, v) { m, v, #v }
    4941             :         static struct type_flag_desc invalid[] = {
    4942             :                 ADD_FLAG(SPDK_BLOB_THIN_PROV),
    4943             :                 ADD_FLAG(SPDK_BLOB_INTERNAL_XATTR),
    4944             :                 ADD_FLAG(SPDK_BLOB_EXTENT_TABLE),
    4945             :         };
    4946             :         static struct type_flag_desc data_ro[] = {
    4947             :                 ADD_FLAG(SPDK_BLOB_READ_ONLY),
    4948             :         };
    4949             :         static struct type_flag_desc md_ro[] = {
    4950             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_DEFAULT),
    4951             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_NONE),
    4952             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_UNMAP),
    4953             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_WRITE_ZEROES),
    4954             :         };
    4955             : #undef ADD_FLAG
    4956             : #undef ADD_MASK_VAL
    4957             : 
    4958           0 :         type_desc = (struct spdk_blob_md_descriptor_flags *)desc;
    4959           0 :         fprintf(ctx->fp, "Flags:\n");
    4960           0 :         fprintf(ctx->fp, "\tinvalid: 0x%016" PRIx64 "\n", type_desc->invalid_flags);
    4961           0 :         bs_dump_print_type_bits(ctx, type_desc->invalid_flags, invalid,
    4962             :                                 SPDK_COUNTOF(invalid));
    4963           0 :         fprintf(ctx->fp, "\tdata_ro: 0x%016" PRIx64 "\n", type_desc->data_ro_flags);
    4964           0 :         bs_dump_print_type_bits(ctx, type_desc->data_ro_flags, data_ro,
    4965             :                                 SPDK_COUNTOF(data_ro));
    4966           0 :         fprintf(ctx->fp, "\t  md_ro: 0x%016" PRIx64 "\n", type_desc->md_ro_flags);
    4967           0 :         bs_dump_print_type_bits(ctx, type_desc->md_ro_flags, md_ro,
    4968             :                                 SPDK_COUNTOF(md_ro));
    4969           0 : }
    4970             : 
    4971             : static void
    4972           0 : bs_dump_print_extent_table(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    4973             : {
    4974             :         struct spdk_blob_md_descriptor_extent_table *et_desc;
    4975             :         uint64_t num_extent_pages;
    4976             :         uint32_t et_idx;
    4977             : 
    4978           0 :         et_desc = (struct spdk_blob_md_descriptor_extent_table *)desc;
    4979           0 :         num_extent_pages = (et_desc->length - sizeof(et_desc->num_clusters)) /
    4980             :                            sizeof(et_desc->extent_page[0]);
    4981             : 
    4982           0 :         fprintf(ctx->fp, "Extent table:\n");
    4983           0 :         for (et_idx = 0; et_idx < num_extent_pages; et_idx++) {
    4984           0 :                 if (et_desc->extent_page[et_idx].page_idx == 0) {
    4985             :                         /* Zeroes represent unallocated extent pages. */
    4986           0 :                         continue;
    4987             :                 }
    4988           0 :                 fprintf(ctx->fp, "\tExtent page: %5" PRIu32 " length %3" PRIu32
    4989             :                         " at LBA %" PRIu64 "\n", et_desc->extent_page[et_idx].page_idx,
    4990             :                         et_desc->extent_page[et_idx].num_pages,
    4991             :                         bs_md_page_to_lba(ctx->bs, et_desc->extent_page[et_idx].page_idx));
    4992             :         }
    4993           0 : }
    4994             : 
    4995             : static void
    4996           0 : bs_dump_print_md_page(struct spdk_bs_load_ctx *ctx)
    4997             : {
    4998           0 :         uint32_t page_idx = ctx->cur_page;
    4999           0 :         struct spdk_blob_md_page *page = ctx->page;
    5000             :         struct spdk_blob_md_descriptor *desc;
    5001           0 :         size_t cur_desc = 0;
    5002             :         uint32_t crc;
    5003             : 
    5004           0 :         fprintf(ctx->fp, "=========\n");
    5005           0 :         fprintf(ctx->fp, "Metadata Page Index: %" PRIu32 " (0x%" PRIx32 ")\n", page_idx, page_idx);
    5006           0 :         fprintf(ctx->fp, "Start LBA: %" PRIu64 "\n", bs_md_page_to_lba(ctx->bs, page_idx));
    5007           0 :         fprintf(ctx->fp, "Blob ID: 0x%" PRIx64 "\n", page->id);
    5008           0 :         fprintf(ctx->fp, "Sequence: %" PRIu32 "\n", page->sequence_num);
    5009           0 :         if (page->next == SPDK_INVALID_MD_PAGE) {
    5010           0 :                 fprintf(ctx->fp, "Next: None\n");
    5011             :         } else {
    5012           0 :                 fprintf(ctx->fp, "Next: %" PRIu32 "\n", page->next);
    5013             :         }
    5014           0 :         fprintf(ctx->fp, "In used bit array%s:", ctx->super->clean ? "" : " (not clean: dubious)");
    5015           0 :         if (spdk_bit_array_get(ctx->bs->used_md_pages, page_idx)) {
    5016           0 :                 fprintf(ctx->fp, " md");
    5017             :         }
    5018           0 :         if (spdk_bit_array_get(ctx->bs->used_blobids, page_idx)) {
    5019           0 :                 fprintf(ctx->fp, " blob");
    5020             :         }
    5021           0 :         fprintf(ctx->fp, "\n");
    5022             : 
    5023           0 :         crc = blob_md_page_calc_crc(page);
    5024           0 :         fprintf(ctx->fp, "CRC: 0x%" PRIx32 " (%s)\n", page->crc, crc == page->crc ? "OK" : "Mismatch");
    5025             : 
    5026           0 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    5027           0 :         while (cur_desc < sizeof(page->descriptors)) {
    5028           0 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
    5029           0 :                         if (desc->length == 0) {
    5030             :                                 /* If padding and length are 0, this terminates the page */
    5031           0 :                                 break;
    5032             :                         }
    5033           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
    5034             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
    5035             :                         unsigned int                            i;
    5036             : 
    5037           0 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
    5038             : 
    5039           0 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
    5040           0 :                                 if (desc_extent_rle->extents[i].cluster_idx != 0) {
    5041           0 :                                         fprintf(ctx->fp, "Allocated Extent - Start: %" PRIu32,
    5042             :                                                 desc_extent_rle->extents[i].cluster_idx);
    5043             :                                 } else {
    5044           0 :                                         fprintf(ctx->fp, "Unallocated Extent - ");
    5045             :                                 }
    5046           0 :                                 fprintf(ctx->fp, " Length: %" PRIu32, desc_extent_rle->extents[i].length);
    5047           0 :                                 fprintf(ctx->fp, "\n");
    5048             :                         }
    5049           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    5050             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
    5051             :                         unsigned int                                    i;
    5052             : 
    5053           0 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
    5054             : 
    5055           0 :                         for (i = 0; i < desc_extent->length / sizeof(desc_extent->cluster_idx[0]); i++) {
    5056           0 :                                 if (desc_extent->cluster_idx[i] != 0) {
    5057           0 :                                         fprintf(ctx->fp, "Allocated Extent - Start: %" PRIu32,
    5058             :                                                 desc_extent->cluster_idx[i]);
    5059             :                                 } else {
    5060           0 :                                         fprintf(ctx->fp, "Unallocated Extent");
    5061             :                                 }
    5062           0 :                                 fprintf(ctx->fp, "\n");
    5063             :                         }
    5064           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    5065           0 :                         bs_dump_print_xattr(ctx, desc);
    5066           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    5067           0 :                         bs_dump_print_xattr(ctx, desc);
    5068           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
    5069           0 :                         bs_dump_print_type_flags(ctx, desc);
    5070           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
    5071           0 :                         bs_dump_print_extent_table(ctx, desc);
    5072             :                 } else {
    5073             :                         /* Error */
    5074           0 :                         fprintf(ctx->fp, "Unknown descriptor type %" PRIu8 "\n", desc->type);
    5075             :                 }
    5076             :                 /* Advance to the next descriptor */
    5077           0 :                 cur_desc += sizeof(*desc) + desc->length;
    5078           0 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
    5079           0 :                         break;
    5080             :                 }
    5081           0 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
    5082             :         }
    5083           0 : }
    5084             : 
    5085             : static void
    5086           0 : bs_dump_read_md_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5087             : {
    5088           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5089             : 
    5090           0 :         if (bserrno != 0) {
    5091           0 :                 bs_dump_finish(seq, ctx, bserrno);
    5092           0 :                 return;
    5093             :         }
    5094             : 
    5095           0 :         if (ctx->page->id != 0) {
    5096           0 :                 bs_dump_print_md_page(ctx);
    5097             :         }
    5098             : 
    5099           0 :         ctx->cur_page++;
    5100             : 
    5101           0 :         if (ctx->cur_page < ctx->super->md_len) {
    5102           0 :                 bs_dump_read_md_page(seq, ctx);
    5103             :         } else {
    5104           0 :                 spdk_free(ctx->page);
    5105           0 :                 bs_dump_finish(seq, ctx, 0);
    5106             :         }
    5107             : }
    5108             : 
    5109             : static void
    5110           0 : bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg)
    5111             : {
    5112           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5113             :         uint64_t lba;
    5114             : 
    5115           0 :         assert(ctx->cur_page < ctx->super->md_len);
    5116           0 :         lba = bs_page_to_lba(ctx->bs, ctx->super->md_start + ctx->cur_page);
    5117           0 :         bs_sequence_read_dev(seq, ctx->page, lba,
    5118           0 :                              bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
    5119             :                              bs_dump_read_md_page_cpl, ctx);
    5120           0 : }
    5121             : 
    5122             : static void
    5123           0 : bs_dump_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5124             : {
    5125           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5126             :         int rc;
    5127             : 
    5128           0 :         fprintf(ctx->fp, "Signature: \"%.8s\" ", ctx->super->signature);
    5129           0 :         if (memcmp(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    5130             :                    sizeof(ctx->super->signature)) != 0) {
    5131           0 :                 fprintf(ctx->fp, "(Mismatch)\n");
    5132           0 :                 bs_dump_finish(seq, ctx, bserrno);
    5133           0 :                 return;
    5134             :         } else {
    5135           0 :                 fprintf(ctx->fp, "(OK)\n");
    5136             :         }
    5137           0 :         fprintf(ctx->fp, "Version: %" PRIu32 "\n", ctx->super->version);
    5138           0 :         fprintf(ctx->fp, "CRC: 0x%x (%s)\n", ctx->super->crc,
    5139           0 :                 (ctx->super->crc == blob_md_page_calc_crc(ctx->super)) ? "OK" : "Mismatch");
    5140           0 :         fprintf(ctx->fp, "Blobstore Type: %.*s\n", SPDK_BLOBSTORE_TYPE_LENGTH, ctx->super->bstype.bstype);
    5141           0 :         fprintf(ctx->fp, "Cluster Size: %" PRIu32 "\n", ctx->super->cluster_size);
    5142           0 :         fprintf(ctx->fp, "Super Blob ID: ");
    5143           0 :         if (ctx->super->super_blob == SPDK_BLOBID_INVALID) {
    5144           0 :                 fprintf(ctx->fp, "(None)\n");
    5145             :         } else {
    5146           0 :                 fprintf(ctx->fp, "0x%" PRIx64 "\n", ctx->super->super_blob);
    5147             :         }
    5148           0 :         fprintf(ctx->fp, "Clean: %" PRIu32 "\n", ctx->super->clean);
    5149           0 :         fprintf(ctx->fp, "Used Metadata Page Mask Start: %" PRIu32 "\n", ctx->super->used_page_mask_start);
    5150           0 :         fprintf(ctx->fp, "Used Metadata Page Mask Length: %" PRIu32 "\n", ctx->super->used_page_mask_len);
    5151           0 :         fprintf(ctx->fp, "Used Cluster Mask Start: %" PRIu32 "\n", ctx->super->used_cluster_mask_start);
    5152           0 :         fprintf(ctx->fp, "Used Cluster Mask Length: %" PRIu32 "\n", ctx->super->used_cluster_mask_len);
    5153           0 :         fprintf(ctx->fp, "Used Blob ID Mask Start: %" PRIu32 "\n", ctx->super->used_blobid_mask_start);
    5154           0 :         fprintf(ctx->fp, "Used Blob ID Mask Length: %" PRIu32 "\n", ctx->super->used_blobid_mask_len);
    5155           0 :         fprintf(ctx->fp, "Metadata Start: %" PRIu32 "\n", ctx->super->md_start);
    5156           0 :         fprintf(ctx->fp, "Metadata Length: %" PRIu32 "\n", ctx->super->md_len);
    5157             : 
    5158           0 :         ctx->cur_page = 0;
    5159           0 :         ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
    5160             :                                  NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    5161           0 :         if (!ctx->page) {
    5162           0 :                 bs_dump_finish(seq, ctx, -ENOMEM);
    5163           0 :                 return;
    5164             :         }
    5165             : 
    5166           0 :         rc = bs_parse_super(ctx);
    5167           0 :         if (rc < 0) {
    5168           0 :                 bs_load_ctx_fail(ctx, rc);
    5169           0 :                 return;
    5170             :         }
    5171             : 
    5172           0 :         bs_load_read_used_pages(ctx);
    5173             : }
    5174             : 
    5175             : void
    5176           0 : spdk_bs_dump(struct spdk_bs_dev *dev, FILE *fp, spdk_bs_dump_print_xattr print_xattr_fn,
    5177             :              spdk_bs_op_complete cb_fn, void *cb_arg)
    5178             : {
    5179           0 :         struct spdk_blob_store  *bs;
    5180           0 :         struct spdk_bs_cpl      cpl;
    5181           0 :         struct spdk_bs_load_ctx *ctx;
    5182           0 :         struct spdk_bs_opts     opts = {};
    5183             :         int err;
    5184             : 
    5185           0 :         SPDK_DEBUGLOG(blob, "Dumping blobstore from dev %p\n", dev);
    5186             : 
    5187           0 :         spdk_bs_opts_init(&opts, sizeof(opts));
    5188             : 
    5189           0 :         err = bs_alloc(dev, &opts, &bs, &ctx);
    5190           0 :         if (err) {
    5191           0 :                 dev->destroy(dev);
    5192           0 :                 cb_fn(cb_arg, err);
    5193           0 :                 return;
    5194             :         }
    5195             : 
    5196           0 :         ctx->dumping = true;
    5197           0 :         ctx->fp = fp;
    5198           0 :         ctx->print_xattr_fn = print_xattr_fn;
    5199             : 
    5200           0 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5201           0 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5202           0 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5203             : 
    5204           0 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5205           0 :         if (!ctx->seq) {
    5206           0 :                 spdk_free(ctx->super);
    5207           0 :                 free(ctx);
    5208           0 :                 bs_free(bs);
    5209           0 :                 cb_fn(cb_arg, -ENOMEM);
    5210           0 :                 return;
    5211             :         }
    5212             : 
    5213             :         /* Read the super block */
    5214           0 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    5215           0 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5216             :                              bs_dump_super_cpl, ctx);
    5217             : }
    5218             : 
    5219             : /* END spdk_bs_dump */
    5220             : 
    5221             : /* START spdk_bs_init */
    5222             : 
    5223             : static void
    5224         444 : bs_init_persist_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5225             : {
    5226         444 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5227             : 
    5228         444 :         ctx->bs->used_clusters = spdk_bit_pool_create_from_array(ctx->used_clusters);
    5229         444 :         spdk_free(ctx->super);
    5230         444 :         free(ctx);
    5231             : 
    5232         444 :         bs_sequence_finish(seq, bserrno);
    5233         444 : }
    5234             : 
    5235             : static void
    5236         444 : bs_init_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5237             : {
    5238         444 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5239             : 
    5240             :         /* Write super block */
    5241         444 :         bs_sequence_write_dev(seq, ctx->super, bs_page_to_lba(ctx->bs, 0),
    5242         444 :                               bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)),
    5243             :                               bs_init_persist_super_cpl, ctx);
    5244         444 : }
    5245             : 
    5246             : void
    5247         460 : spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
    5248             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
    5249             : {
    5250         460 :         struct spdk_bs_load_ctx *ctx;
    5251         460 :         struct spdk_blob_store  *bs;
    5252         460 :         struct spdk_bs_cpl      cpl;
    5253             :         spdk_bs_sequence_t      *seq;
    5254             :         spdk_bs_batch_t         *batch;
    5255             :         uint64_t                num_md_lba;
    5256             :         uint64_t                num_md_pages;
    5257             :         uint64_t                num_md_clusters;
    5258             :         uint64_t                max_used_cluster_mask_len;
    5259             :         uint32_t                i;
    5260         460 :         struct spdk_bs_opts     opts = {};
    5261             :         int                     rc;
    5262             :         uint64_t                lba, lba_count;
    5263             : 
    5264         460 :         SPDK_DEBUGLOG(blob, "Initializing blobstore on dev %p\n", dev);
    5265             : 
    5266         460 :         if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
    5267           4 :                 SPDK_ERRLOG("unsupported dev block length of %d\n",
    5268             :                             dev->blocklen);
    5269           4 :                 dev->destroy(dev);
    5270           4 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5271           4 :                 return;
    5272             :         }
    5273             : 
    5274         456 :         spdk_bs_opts_init(&opts, sizeof(opts));
    5275         456 :         if (o) {
    5276         166 :                 if (bs_opts_copy(o, &opts)) {
    5277           0 :                         return;
    5278             :                 }
    5279             :         }
    5280             : 
    5281         456 :         if (bs_opts_verify(&opts) != 0) {
    5282           4 :                 dev->destroy(dev);
    5283           4 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5284           4 :                 return;
    5285             :         }
    5286             : 
    5287         452 :         rc = bs_alloc(dev, &opts, &bs, &ctx);
    5288         452 :         if (rc) {
    5289           4 :                 dev->destroy(dev);
    5290           4 :                 cb_fn(cb_arg, NULL, rc);
    5291           4 :                 return;
    5292             :         }
    5293             : 
    5294         448 :         if (opts.num_md_pages == SPDK_BLOB_OPTS_NUM_MD_PAGES) {
    5295             :                 /* By default, allocate 1 page per cluster.
    5296             :                  * Technically, this over-allocates metadata
    5297             :                  * because more metadata will reduce the number
    5298             :                  * of usable clusters. This can be addressed with
    5299             :                  * more complex math in the future.
    5300             :                  */
    5301         440 :                 bs->md_len = bs->total_clusters;
    5302             :         } else {
    5303           8 :                 bs->md_len = opts.num_md_pages;
    5304             :         }
    5305         448 :         rc = spdk_bit_array_resize(&bs->used_md_pages, bs->md_len);
    5306         448 :         if (rc < 0) {
    5307           0 :                 spdk_free(ctx->super);
    5308           0 :                 free(ctx);
    5309           0 :                 bs_free(bs);
    5310           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5311           0 :                 return;
    5312             :         }
    5313             : 
    5314         448 :         rc = spdk_bit_array_resize(&bs->used_blobids, bs->md_len);
    5315         448 :         if (rc < 0) {
    5316           0 :                 spdk_free(ctx->super);
    5317           0 :                 free(ctx);
    5318           0 :                 bs_free(bs);
    5319           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5320           0 :                 return;
    5321             :         }
    5322             : 
    5323         448 :         rc = spdk_bit_array_resize(&bs->open_blobids, bs->md_len);
    5324         448 :         if (rc < 0) {
    5325           0 :                 spdk_free(ctx->super);
    5326           0 :                 free(ctx);
    5327           0 :                 bs_free(bs);
    5328           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5329           0 :                 return;
    5330             :         }
    5331             : 
    5332         448 :         memcpy(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    5333             :                sizeof(ctx->super->signature));
    5334         448 :         ctx->super->version = SPDK_BS_VERSION;
    5335         448 :         ctx->super->length = sizeof(*ctx->super);
    5336         448 :         ctx->super->super_blob = bs->super_blob;
    5337         448 :         ctx->super->clean = 0;
    5338         448 :         ctx->super->cluster_size = bs->cluster_sz;
    5339         448 :         ctx->super->io_unit_size = bs->io_unit_size;
    5340         448 :         memcpy(&ctx->super->bstype, &bs->bstype, sizeof(bs->bstype));
    5341             : 
    5342             :         /* Calculate how many pages the metadata consumes at the front
    5343             :          * of the disk.
    5344             :          */
    5345             : 
    5346             :         /* The super block uses 1 page */
    5347         448 :         num_md_pages = 1;
    5348             : 
    5349             :         /* The used_md_pages mask requires 1 bit per metadata page, rounded
    5350             :          * up to the nearest page, plus a header.
    5351             :          */
    5352         448 :         ctx->super->used_page_mask_start = num_md_pages;
    5353         448 :         ctx->super->used_page_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5354         448 :                                          spdk_divide_round_up(bs->md_len, 8),
    5355             :                                          SPDK_BS_PAGE_SIZE);
    5356         448 :         num_md_pages += ctx->super->used_page_mask_len;
    5357             : 
    5358             :         /* The used_clusters mask requires 1 bit per cluster, rounded
    5359             :          * up to the nearest page, plus a header.
    5360             :          */
    5361         448 :         ctx->super->used_cluster_mask_start = num_md_pages;
    5362         448 :         ctx->super->used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5363         448 :                                             spdk_divide_round_up(bs->total_clusters, 8),
    5364             :                                             SPDK_BS_PAGE_SIZE);
    5365             :         /* The blobstore might be extended, then the used_cluster bitmap will need more space.
    5366             :          * Here we calculate the max clusters we can support according to the
    5367             :          * num_md_pages (bs->md_len).
    5368             :          */
    5369         448 :         max_used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5370         448 :                                     spdk_divide_round_up(bs->md_len, 8),
    5371             :                                     SPDK_BS_PAGE_SIZE);
    5372         448 :         max_used_cluster_mask_len = spdk_max(max_used_cluster_mask_len,
    5373             :                                              ctx->super->used_cluster_mask_len);
    5374         448 :         num_md_pages += max_used_cluster_mask_len;
    5375             : 
    5376             :         /* The used_blobids mask requires 1 bit per metadata page, rounded
    5377             :          * up to the nearest page, plus a header.
    5378             :          */
    5379         448 :         ctx->super->used_blobid_mask_start = num_md_pages;
    5380         448 :         ctx->super->used_blobid_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5381         448 :                                            spdk_divide_round_up(bs->md_len, 8),
    5382             :                                            SPDK_BS_PAGE_SIZE);
    5383         448 :         num_md_pages += ctx->super->used_blobid_mask_len;
    5384             : 
    5385             :         /* The metadata region size was chosen above */
    5386         448 :         ctx->super->md_start = bs->md_start = num_md_pages;
    5387         448 :         ctx->super->md_len = bs->md_len;
    5388         448 :         num_md_pages += bs->md_len;
    5389             : 
    5390         448 :         num_md_lba = bs_page_to_lba(bs, num_md_pages);
    5391             : 
    5392         448 :         ctx->super->size = dev->blockcnt * dev->blocklen;
    5393             : 
    5394         448 :         ctx->super->crc = blob_md_page_calc_crc(ctx->super);
    5395             : 
    5396         448 :         num_md_clusters = spdk_divide_round_up(num_md_pages, bs->pages_per_cluster);
    5397         448 :         if (num_md_clusters > bs->total_clusters) {
    5398           4 :                 SPDK_ERRLOG("Blobstore metadata cannot use more clusters than is available, "
    5399             :                             "please decrease number of pages reserved for metadata "
    5400             :                             "or increase cluster size.\n");
    5401           4 :                 spdk_free(ctx->super);
    5402           4 :                 spdk_bit_array_free(&ctx->used_clusters);
    5403           4 :                 free(ctx);
    5404           4 :                 bs_free(bs);
    5405           4 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5406           4 :                 return;
    5407             :         }
    5408             :         /* Claim all of the clusters used by the metadata */
    5409       63356 :         for (i = 0; i < num_md_clusters; i++) {
    5410       62912 :                 spdk_bit_array_set(ctx->used_clusters, i);
    5411             :         }
    5412             : 
    5413         444 :         bs->num_free_clusters -= num_md_clusters;
    5414         444 :         bs->total_data_clusters = bs->num_free_clusters;
    5415             : 
    5416         444 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
    5417         444 :         cpl.u.bs_handle.cb_fn = cb_fn;
    5418         444 :         cpl.u.bs_handle.cb_arg = cb_arg;
    5419         444 :         cpl.u.bs_handle.bs = bs;
    5420             : 
    5421         444 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5422         444 :         if (!seq) {
    5423           0 :                 spdk_free(ctx->super);
    5424           0 :                 free(ctx);
    5425           0 :                 bs_free(bs);
    5426           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5427           0 :                 return;
    5428             :         }
    5429             : 
    5430         444 :         batch = bs_sequence_to_batch(seq, bs_init_trim_cpl, ctx);
    5431             : 
    5432             :         /* Clear metadata space */
    5433         444 :         bs_batch_write_zeroes_dev(batch, 0, num_md_lba);
    5434             : 
    5435         444 :         lba = num_md_lba;
    5436         444 :         lba_count = ctx->bs->dev->blockcnt - lba;
    5437         444 :         switch (opts.clear_method) {
    5438         428 :         case BS_CLEAR_WITH_UNMAP:
    5439             :                 /* Trim data clusters */
    5440         428 :                 bs_batch_unmap_dev(batch, lba, lba_count);
    5441         428 :                 break;
    5442           0 :         case BS_CLEAR_WITH_WRITE_ZEROES:
    5443             :                 /* Write_zeroes to data clusters */
    5444           0 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    5445           0 :                 break;
    5446          16 :         case BS_CLEAR_WITH_NONE:
    5447             :         default:
    5448          16 :                 break;
    5449             :         }
    5450             : 
    5451         444 :         bs_batch_close(batch);
    5452             : }
    5453             : 
    5454             : /* END spdk_bs_init */
    5455             : 
    5456             : /* START spdk_bs_destroy */
    5457             : 
    5458             : static void
    5459           4 : bs_destroy_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5460             : {
    5461           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5462           4 :         struct spdk_blob_store *bs = ctx->bs;
    5463             : 
    5464             :         /*
    5465             :          * We need to defer calling bs_call_cpl() until after
    5466             :          * dev destruction, so tuck these away for later use.
    5467             :          */
    5468           4 :         bs->unload_err = bserrno;
    5469           4 :         memcpy(&bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    5470           4 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    5471             : 
    5472           4 :         bs_sequence_finish(seq, bserrno);
    5473             : 
    5474           4 :         bs_free(bs);
    5475           4 :         free(ctx);
    5476           4 : }
    5477             : 
    5478             : void
    5479           4 : spdk_bs_destroy(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn,
    5480             :                 void *cb_arg)
    5481             : {
    5482           4 :         struct spdk_bs_cpl      cpl;
    5483             :         spdk_bs_sequence_t      *seq;
    5484             :         struct spdk_bs_load_ctx *ctx;
    5485             : 
    5486           4 :         SPDK_DEBUGLOG(blob, "Destroying blobstore\n");
    5487             : 
    5488           4 :         if (!RB_EMPTY(&bs->open_blobs)) {
    5489           0 :                 SPDK_ERRLOG("Blobstore still has open blobs\n");
    5490           0 :                 cb_fn(cb_arg, -EBUSY);
    5491           0 :                 return;
    5492             :         }
    5493             : 
    5494           4 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5495           4 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5496           4 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5497             : 
    5498           4 :         ctx = calloc(1, sizeof(*ctx));
    5499           4 :         if (!ctx) {
    5500           0 :                 cb_fn(cb_arg, -ENOMEM);
    5501           0 :                 return;
    5502             :         }
    5503             : 
    5504           4 :         ctx->bs = bs;
    5505             : 
    5506           4 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5507           4 :         if (!seq) {
    5508           0 :                 free(ctx);
    5509           0 :                 cb_fn(cb_arg, -ENOMEM);
    5510           0 :                 return;
    5511             :         }
    5512             : 
    5513             :         /* Write zeroes to the super block */
    5514           4 :         bs_sequence_write_zeroes_dev(seq,
    5515             :                                      bs_page_to_lba(bs, 0),
    5516             :                                      bs_byte_to_lba(bs, sizeof(struct spdk_bs_super_block)),
    5517             :                                      bs_destroy_trim_cpl, ctx);
    5518             : }
    5519             : 
    5520             : /* END spdk_bs_destroy */
    5521             : 
    5522             : /* START spdk_bs_unload */
    5523             : 
    5524             : static void
    5525         626 : bs_unload_finish(struct spdk_bs_load_ctx *ctx, int bserrno)
    5526             : {
    5527         626 :         spdk_bs_sequence_t *seq = ctx->seq;
    5528             : 
    5529         626 :         spdk_free(ctx->super);
    5530             : 
    5531             :         /*
    5532             :          * We need to defer calling bs_call_cpl() until after
    5533             :          * dev destruction, so tuck these away for later use.
    5534             :          */
    5535         626 :         ctx->bs->unload_err = bserrno;
    5536         626 :         memcpy(&ctx->bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    5537         626 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    5538             : 
    5539         626 :         bs_sequence_finish(seq, bserrno);
    5540             : 
    5541         626 :         bs_free(ctx->bs);
    5542         626 :         free(ctx);
    5543         626 : }
    5544             : 
    5545             : static void
    5546         626 : bs_unload_write_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5547             : {
    5548         626 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5549             : 
    5550         626 :         bs_unload_finish(ctx, bserrno);
    5551         626 : }
    5552             : 
    5553             : static void
    5554         626 : bs_unload_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5555             : {
    5556         626 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5557             : 
    5558         626 :         spdk_free(ctx->mask);
    5559             : 
    5560         626 :         if (bserrno != 0) {
    5561           0 :                 bs_unload_finish(ctx, bserrno);
    5562           0 :                 return;
    5563             :         }
    5564             : 
    5565         626 :         ctx->super->clean = 1;
    5566             : 
    5567         626 :         bs_write_super(seq, ctx->bs, ctx->super, bs_unload_write_super_cpl, ctx);
    5568             : }
    5569             : 
    5570             : static void
    5571         626 : bs_unload_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5572             : {
    5573         626 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5574             : 
    5575         626 :         spdk_free(ctx->mask);
    5576         626 :         ctx->mask = NULL;
    5577             : 
    5578         626 :         if (bserrno != 0) {
    5579           0 :                 bs_unload_finish(ctx, bserrno);
    5580           0 :                 return;
    5581             :         }
    5582             : 
    5583         626 :         bs_write_used_clusters(seq, ctx, bs_unload_write_used_clusters_cpl);
    5584             : }
    5585             : 
    5586             : static void
    5587         626 : bs_unload_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5588             : {
    5589         626 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5590             : 
    5591         626 :         spdk_free(ctx->mask);
    5592         626 :         ctx->mask = NULL;
    5593             : 
    5594         626 :         if (bserrno != 0) {
    5595           0 :                 bs_unload_finish(ctx, bserrno);
    5596           0 :                 return;
    5597             :         }
    5598             : 
    5599         626 :         bs_write_used_blobids(seq, ctx, bs_unload_write_used_blobids_cpl);
    5600             : }
    5601             : 
    5602             : static void
    5603         626 : bs_unload_read_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5604             : {
    5605         626 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5606             :         int rc;
    5607             : 
    5608         626 :         if (bserrno != 0) {
    5609           0 :                 bs_unload_finish(ctx, bserrno);
    5610           0 :                 return;
    5611             :         }
    5612             : 
    5613         626 :         rc = bs_super_validate(ctx->super, ctx->bs);
    5614         626 :         if (rc != 0) {
    5615           0 :                 bs_unload_finish(ctx, rc);
    5616           0 :                 return;
    5617             :         }
    5618             : 
    5619         626 :         bs_write_used_md(seq, cb_arg, bs_unload_write_used_pages_cpl);
    5620             : }
    5621             : 
    5622             : void
    5623         634 : spdk_bs_unload(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn, void *cb_arg)
    5624             : {
    5625         634 :         struct spdk_bs_cpl      cpl;
    5626             :         struct spdk_bs_load_ctx *ctx;
    5627             : 
    5628         634 :         SPDK_DEBUGLOG(blob, "Syncing blobstore\n");
    5629             : 
    5630             :         /*
    5631             :          * If external snapshot channels are being destroyed while the blobstore is unloaded, the
    5632             :          * unload is deferred until after the channel destruction completes.
    5633             :          */
    5634         634 :         if (bs->esnap_channels_unloading != 0) {
    5635           4 :                 if (bs->esnap_unload_cb_fn != NULL) {
    5636           0 :                         SPDK_ERRLOG("Blobstore unload in progress\n");
    5637           0 :                         cb_fn(cb_arg, -EBUSY);
    5638           0 :                         return;
    5639             :                 }
    5640           4 :                 SPDK_DEBUGLOG(blob_esnap, "Blobstore unload deferred: %" PRIu32
    5641             :                               " esnap clones are unloading\n", bs->esnap_channels_unloading);
    5642           4 :                 bs->esnap_unload_cb_fn = cb_fn;
    5643           4 :                 bs->esnap_unload_cb_arg = cb_arg;
    5644           4 :                 return;
    5645             :         }
    5646         630 :         if (bs->esnap_unload_cb_fn != NULL) {
    5647           4 :                 SPDK_DEBUGLOG(blob_esnap, "Blobstore deferred unload progressing\n");
    5648           4 :                 assert(bs->esnap_unload_cb_fn == cb_fn);
    5649           4 :                 assert(bs->esnap_unload_cb_arg == cb_arg);
    5650           4 :                 bs->esnap_unload_cb_fn = NULL;
    5651           4 :                 bs->esnap_unload_cb_arg = NULL;
    5652             :         }
    5653             : 
    5654         630 :         if (!RB_EMPTY(&bs->open_blobs)) {
    5655           4 :                 SPDK_ERRLOG("Blobstore still has open blobs\n");
    5656           4 :                 cb_fn(cb_arg, -EBUSY);
    5657           4 :                 return;
    5658             :         }
    5659             : 
    5660         626 :         ctx = calloc(1, sizeof(*ctx));
    5661         626 :         if (!ctx) {
    5662           0 :                 cb_fn(cb_arg, -ENOMEM);
    5663           0 :                 return;
    5664             :         }
    5665             : 
    5666         626 :         ctx->bs = bs;
    5667             : 
    5668         626 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    5669             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    5670         626 :         if (!ctx->super) {
    5671           0 :                 free(ctx);
    5672           0 :                 cb_fn(cb_arg, -ENOMEM);
    5673           0 :                 return;
    5674             :         }
    5675             : 
    5676         626 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5677         626 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5678         626 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5679             : 
    5680         626 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5681         626 :         if (!ctx->seq) {
    5682           0 :                 spdk_free(ctx->super);
    5683           0 :                 free(ctx);
    5684           0 :                 cb_fn(cb_arg, -ENOMEM);
    5685           0 :                 return;
    5686             :         }
    5687             : 
    5688             :         /* Read super block */
    5689         626 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    5690         626 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5691             :                              bs_unload_read_super_cpl, ctx);
    5692             : }
    5693             : 
    5694             : /* END spdk_bs_unload */
    5695             : 
    5696             : /* START spdk_bs_set_super */
    5697             : 
    5698             : struct spdk_bs_set_super_ctx {
    5699             :         struct spdk_blob_store          *bs;
    5700             :         struct spdk_bs_super_block      *super;
    5701             : };
    5702             : 
    5703             : static void
    5704           8 : bs_set_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5705             : {
    5706           8 :         struct spdk_bs_set_super_ctx    *ctx = cb_arg;
    5707             : 
    5708           8 :         if (bserrno != 0) {
    5709           0 :                 SPDK_ERRLOG("Unable to write to super block of blobstore\n");
    5710             :         }
    5711             : 
    5712           8 :         spdk_free(ctx->super);
    5713             : 
    5714           8 :         bs_sequence_finish(seq, bserrno);
    5715             : 
    5716           8 :         free(ctx);
    5717           8 : }
    5718             : 
    5719             : static void
    5720           8 : bs_set_super_read_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5721             : {
    5722           8 :         struct spdk_bs_set_super_ctx    *ctx = cb_arg;
    5723             :         int rc;
    5724             : 
    5725           8 :         if (bserrno != 0) {
    5726           0 :                 SPDK_ERRLOG("Unable to read super block of blobstore\n");
    5727           0 :                 spdk_free(ctx->super);
    5728           0 :                 bs_sequence_finish(seq, bserrno);
    5729           0 :                 free(ctx);
    5730           0 :                 return;
    5731             :         }
    5732             : 
    5733           8 :         rc = bs_super_validate(ctx->super, ctx->bs);
    5734           8 :         if (rc != 0) {
    5735           0 :                 SPDK_ERRLOG("Not a valid super block\n");
    5736           0 :                 spdk_free(ctx->super);
    5737           0 :                 bs_sequence_finish(seq, rc);
    5738           0 :                 free(ctx);
    5739           0 :                 return;
    5740             :         }
    5741             : 
    5742           8 :         bs_write_super(seq, ctx->bs, ctx->super, bs_set_super_write_cpl, ctx);
    5743             : }
    5744             : 
    5745             : void
    5746           8 : spdk_bs_set_super(struct spdk_blob_store *bs, spdk_blob_id blobid,
    5747             :                   spdk_bs_op_complete cb_fn, void *cb_arg)
    5748             : {
    5749           8 :         struct spdk_bs_cpl              cpl;
    5750             :         spdk_bs_sequence_t              *seq;
    5751             :         struct spdk_bs_set_super_ctx    *ctx;
    5752             : 
    5753           8 :         SPDK_DEBUGLOG(blob, "Setting super blob id on blobstore\n");
    5754             : 
    5755           8 :         ctx = calloc(1, sizeof(*ctx));
    5756           8 :         if (!ctx) {
    5757           0 :                 cb_fn(cb_arg, -ENOMEM);
    5758           0 :                 return;
    5759             :         }
    5760             : 
    5761           8 :         ctx->bs = bs;
    5762             : 
    5763           8 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    5764             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    5765           8 :         if (!ctx->super) {
    5766           0 :                 free(ctx);
    5767           0 :                 cb_fn(cb_arg, -ENOMEM);
    5768           0 :                 return;
    5769             :         }
    5770             : 
    5771           8 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5772           8 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5773           8 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5774             : 
    5775           8 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5776           8 :         if (!seq) {
    5777           0 :                 spdk_free(ctx->super);
    5778           0 :                 free(ctx);
    5779           0 :                 cb_fn(cb_arg, -ENOMEM);
    5780           0 :                 return;
    5781             :         }
    5782             : 
    5783           8 :         bs->super_blob = blobid;
    5784             : 
    5785             :         /* Read super block */
    5786           8 :         bs_sequence_read_dev(seq, ctx->super, bs_page_to_lba(bs, 0),
    5787           8 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5788             :                              bs_set_super_read_cpl, ctx);
    5789             : }
    5790             : 
    5791             : /* END spdk_bs_set_super */
    5792             : 
    5793             : void
    5794          12 : spdk_bs_get_super(struct spdk_blob_store *bs,
    5795             :                   spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    5796             : {
    5797          12 :         if (bs->super_blob == SPDK_BLOBID_INVALID) {
    5798           4 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOENT);
    5799             :         } else {
    5800           8 :                 cb_fn(cb_arg, bs->super_blob, 0);
    5801             :         }
    5802          12 : }
    5803             : 
    5804             : uint64_t
    5805          88 : spdk_bs_get_cluster_size(struct spdk_blob_store *bs)
    5806             : {
    5807          88 :         return bs->cluster_sz;
    5808             : }
    5809             : 
    5810             : uint64_t
    5811          60 : spdk_bs_get_page_size(struct spdk_blob_store *bs)
    5812             : {
    5813          60 :         return SPDK_BS_PAGE_SIZE;
    5814             : }
    5815             : 
    5816             : uint64_t
    5817         642 : spdk_bs_get_io_unit_size(struct spdk_blob_store *bs)
    5818             : {
    5819         642 :         return bs->io_unit_size;
    5820             : }
    5821             : 
    5822             : uint64_t
    5823         380 : spdk_bs_free_cluster_count(struct spdk_blob_store *bs)
    5824             : {
    5825         380 :         return bs->num_free_clusters;
    5826             : }
    5827             : 
    5828             : uint64_t
    5829          92 : spdk_bs_total_data_cluster_count(struct spdk_blob_store *bs)
    5830             : {
    5831          92 :         return bs->total_data_clusters;
    5832             : }
    5833             : 
    5834             : static int
    5835         752 : bs_register_md_thread(struct spdk_blob_store *bs)
    5836             : {
    5837         752 :         bs->md_channel = spdk_get_io_channel(bs);
    5838         752 :         if (!bs->md_channel) {
    5839           0 :                 SPDK_ERRLOG("Failed to get IO channel.\n");
    5840           0 :                 return -1;
    5841             :         }
    5842             : 
    5843         752 :         return 0;
    5844             : }
    5845             : 
    5846             : static int
    5847         752 : bs_unregister_md_thread(struct spdk_blob_store *bs)
    5848             : {
    5849         752 :         spdk_put_io_channel(bs->md_channel);
    5850             : 
    5851         752 :         return 0;
    5852             : }
    5853             : 
    5854             : spdk_blob_id
    5855         466 : spdk_blob_get_id(struct spdk_blob *blob)
    5856             : {
    5857         466 :         assert(blob != NULL);
    5858             : 
    5859         466 :         return blob->id;
    5860             : }
    5861             : 
    5862             : uint64_t
    5863          24 : spdk_blob_get_num_pages(struct spdk_blob *blob)
    5864             : {
    5865          24 :         assert(blob != NULL);
    5866             : 
    5867          24 :         return bs_cluster_to_page(blob->bs, blob->active.num_clusters);
    5868             : }
    5869             : 
    5870             : uint64_t
    5871          24 : spdk_blob_get_num_io_units(struct spdk_blob *blob)
    5872             : {
    5873          24 :         assert(blob != NULL);
    5874             : 
    5875          24 :         return spdk_blob_get_num_pages(blob) * bs_io_unit_per_page(blob->bs);
    5876             : }
    5877             : 
    5878             : uint64_t
    5879         485 : spdk_blob_get_num_clusters(struct spdk_blob *blob)
    5880             : {
    5881         485 :         assert(blob != NULL);
    5882             : 
    5883         485 :         return blob->active.num_clusters;
    5884             : }
    5885             : 
    5886             : static uint64_t
    5887          24 : blob_find_io_unit(struct spdk_blob *blob, uint64_t offset, bool is_allocated)
    5888             : {
    5889          24 :         uint64_t blob_io_unit_num = spdk_blob_get_num_io_units(blob);
    5890             : 
    5891          44 :         while (offset < blob_io_unit_num) {
    5892          40 :                 if (bs_io_unit_is_allocated(blob, offset) == is_allocated) {
    5893          20 :                         return offset;
    5894             :                 }
    5895             : 
    5896          20 :                 offset += bs_num_io_units_to_cluster_boundary(blob, offset);
    5897             :         }
    5898             : 
    5899           4 :         return UINT64_MAX;
    5900             : }
    5901             : 
    5902             : uint64_t
    5903          12 : spdk_blob_get_next_allocated_io_unit(struct spdk_blob *blob, uint64_t offset)
    5904             : {
    5905          12 :         return blob_find_io_unit(blob, offset, true);
    5906             : }
    5907             : 
    5908             : uint64_t
    5909          12 : spdk_blob_get_next_unallocated_io_unit(struct spdk_blob *blob, uint64_t offset)
    5910             : {
    5911          12 :         return blob_find_io_unit(blob, offset, false);
    5912             : }
    5913             : 
    5914             : /* START spdk_bs_create_blob */
    5915             : 
    5916             : static void
    5917        1786 : bs_create_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5918             : {
    5919        1786 :         struct spdk_blob *blob = cb_arg;
    5920        1786 :         uint32_t page_idx = bs_blobid_to_page(blob->id);
    5921             : 
    5922        1786 :         if (bserrno != 0) {
    5923           0 :                 spdk_spin_lock(&blob->bs->used_lock);
    5924           0 :                 spdk_bit_array_clear(blob->bs->used_blobids, page_idx);
    5925           0 :                 bs_release_md_page(blob->bs, page_idx);
    5926           0 :                 spdk_spin_unlock(&blob->bs->used_lock);
    5927             :         }
    5928             : 
    5929        1786 :         blob_free(blob);
    5930             : 
    5931        1786 :         bs_sequence_finish(seq, bserrno);
    5932        1786 : }
    5933             : 
    5934             : static int
    5935        3592 : blob_set_xattrs(struct spdk_blob *blob, const struct spdk_blob_xattr_opts *xattrs,
    5936             :                 bool internal)
    5937             : {
    5938             :         uint64_t i;
    5939        3592 :         size_t value_len = 0;
    5940             :         int rc;
    5941        3592 :         const void *value = NULL;
    5942        3592 :         if (xattrs->count > 0 && xattrs->get_value == NULL) {
    5943           8 :                 return -EINVAL;
    5944             :         }
    5945        3864 :         for (i = 0; i < xattrs->count; i++) {
    5946         284 :                 xattrs->get_value(xattrs->ctx, xattrs->names[i], &value, &value_len);
    5947         284 :                 if (value == NULL || value_len == 0) {
    5948           4 :                         return -EINVAL;
    5949             :                 }
    5950         280 :                 rc = blob_set_xattr(blob, xattrs->names[i], value, value_len, internal);
    5951         280 :                 if (rc < 0) {
    5952           0 :                         return rc;
    5953             :                 }
    5954             :         }
    5955        3580 :         return 0;
    5956             : }
    5957             : 
    5958             : static void
    5959        1770 : blob_opts_copy(const struct spdk_blob_opts *src, struct spdk_blob_opts *dst)
    5960             : {
    5961             : #define FIELD_OK(field) \
    5962             :         offsetof(struct spdk_blob_opts, field) + sizeof(src->field) <= src->opts_size
    5963             : 
    5964             : #define SET_FIELD(field) \
    5965             :         if (FIELD_OK(field)) { \
    5966             :                 dst->field = src->field; \
    5967             :         } \
    5968             : 
    5969        1770 :         SET_FIELD(num_clusters);
    5970        1770 :         SET_FIELD(thin_provision);
    5971        1770 :         SET_FIELD(clear_method);
    5972             : 
    5973        1770 :         if (FIELD_OK(xattrs)) {
    5974        1770 :                 memcpy(&dst->xattrs, &src->xattrs, sizeof(src->xattrs));
    5975             :         }
    5976             : 
    5977        1770 :         SET_FIELD(use_extent_table);
    5978        1770 :         SET_FIELD(esnap_id);
    5979        1770 :         SET_FIELD(esnap_id_len);
    5980             : 
    5981        1770 :         dst->opts_size = src->opts_size;
    5982             : 
    5983             :         /* You should not remove this statement, but need to update the assert statement
    5984             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    5985             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_opts) == 80, "Incorrect size");
    5986             : 
    5987             : #undef FIELD_OK
    5988             : #undef SET_FIELD
    5989        1770 : }
    5990             : 
    5991             : static void
    5992        1802 : bs_create_blob(struct spdk_blob_store *bs,
    5993             :                const struct spdk_blob_opts *opts,
    5994             :                const struct spdk_blob_xattr_opts *internal_xattrs,
    5995             :                spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    5996             : {
    5997             :         struct spdk_blob        *blob;
    5998             :         uint32_t                page_idx;
    5999        1802 :         struct spdk_bs_cpl      cpl;
    6000        1802 :         struct spdk_blob_opts   opts_local;
    6001        1802 :         struct spdk_blob_xattr_opts internal_xattrs_default;
    6002             :         spdk_bs_sequence_t      *seq;
    6003             :         spdk_blob_id            id;
    6004             :         int rc;
    6005             : 
    6006        1802 :         assert(spdk_get_thread() == bs->md_thread);
    6007             : 
    6008        1802 :         spdk_spin_lock(&bs->used_lock);
    6009        1802 :         page_idx = spdk_bit_array_find_first_clear(bs->used_md_pages, 0);
    6010        1802 :         if (page_idx == UINT32_MAX) {
    6011           0 :                 spdk_spin_unlock(&bs->used_lock);
    6012           0 :                 cb_fn(cb_arg, 0, -ENOMEM);
    6013           0 :                 return;
    6014             :         }
    6015        1802 :         spdk_bit_array_set(bs->used_blobids, page_idx);
    6016        1802 :         bs_claim_md_page(bs, page_idx);
    6017        1802 :         spdk_spin_unlock(&bs->used_lock);
    6018             : 
    6019        1802 :         id = bs_page_to_blobid(page_idx);
    6020             : 
    6021        1802 :         SPDK_DEBUGLOG(blob, "Creating blob with id 0x%" PRIx64 " at page %u\n", id, page_idx);
    6022             : 
    6023        1802 :         spdk_blob_opts_init(&opts_local, sizeof(opts_local));
    6024        1802 :         if (opts) {
    6025        1770 :                 blob_opts_copy(opts, &opts_local);
    6026             :         }
    6027             : 
    6028        1802 :         blob = blob_alloc(bs, id);
    6029        1802 :         if (!blob) {
    6030           0 :                 rc = -ENOMEM;
    6031           0 :                 goto error;
    6032             :         }
    6033             : 
    6034        1802 :         blob->use_extent_table = opts_local.use_extent_table;
    6035        1802 :         if (blob->use_extent_table) {
    6036         922 :                 blob->invalid_flags |= SPDK_BLOB_EXTENT_TABLE;
    6037             :         }
    6038             : 
    6039        1802 :         if (!internal_xattrs) {
    6040        1566 :                 blob_xattrs_init(&internal_xattrs_default);
    6041        1566 :                 internal_xattrs = &internal_xattrs_default;
    6042             :         }
    6043             : 
    6044        1802 :         rc = blob_set_xattrs(blob, &opts_local.xattrs, false);
    6045        1802 :         if (rc < 0) {
    6046          12 :                 goto error;
    6047             :         }
    6048             : 
    6049        1790 :         rc = blob_set_xattrs(blob, internal_xattrs, true);
    6050        1790 :         if (rc < 0) {
    6051           0 :                 goto error;
    6052             :         }
    6053             : 
    6054        1790 :         if (opts_local.thin_provision) {
    6055         296 :                 blob_set_thin_provision(blob);
    6056             :         }
    6057             : 
    6058        1790 :         blob_set_clear_method(blob, opts_local.clear_method);
    6059             : 
    6060        1790 :         if (opts_local.esnap_id != NULL) {
    6061          48 :                 if (opts_local.esnap_id_len > UINT16_MAX) {
    6062           0 :                         SPDK_ERRLOG("esnap id length %" PRIu64 "is too long\n",
    6063             :                                     opts_local.esnap_id_len);
    6064           0 :                         rc = -EINVAL;
    6065           0 :                         goto error;
    6066             : 
    6067             :                 }
    6068          48 :                 blob_set_thin_provision(blob);
    6069          48 :                 blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6070          48 :                 rc = blob_set_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID,
    6071          48 :                                     opts_local.esnap_id, opts_local.esnap_id_len, true);
    6072          48 :                 if (rc != 0) {
    6073           0 :                         goto error;
    6074             :                 }
    6075             :         }
    6076             : 
    6077        1790 :         rc = blob_resize(blob, opts_local.num_clusters);
    6078        1790 :         if (rc < 0) {
    6079           4 :                 goto error;
    6080             :         }
    6081        1786 :         cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6082        1786 :         cpl.u.blobid.cb_fn = cb_fn;
    6083        1786 :         cpl.u.blobid.cb_arg = cb_arg;
    6084        1786 :         cpl.u.blobid.blobid = blob->id;
    6085             : 
    6086        1786 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    6087        1786 :         if (!seq) {
    6088           0 :                 rc = -ENOMEM;
    6089           0 :                 goto error;
    6090             :         }
    6091             : 
    6092        1786 :         blob_persist(seq, blob, bs_create_blob_cpl, blob);
    6093        1786 :         return;
    6094             : 
    6095          16 : error:
    6096          16 :         SPDK_ERRLOG("Failed to create blob: %s, size in clusters/size: %lu (clusters)\n",
    6097             :                     spdk_strerror(rc), opts_local.num_clusters);
    6098          16 :         if (blob != NULL) {
    6099          16 :                 blob_free(blob);
    6100             :         }
    6101          16 :         spdk_spin_lock(&bs->used_lock);
    6102          16 :         spdk_bit_array_clear(bs->used_blobids, page_idx);
    6103          16 :         bs_release_md_page(bs, page_idx);
    6104          16 :         spdk_spin_unlock(&bs->used_lock);
    6105          16 :         cb_fn(cb_arg, 0, rc);
    6106             : }
    6107             : 
    6108             : void
    6109          16 : spdk_bs_create_blob(struct spdk_blob_store *bs,
    6110             :                     spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6111             : {
    6112          16 :         bs_create_blob(bs, NULL, NULL, cb_fn, cb_arg);
    6113          16 : }
    6114             : 
    6115             : void
    6116        1542 : spdk_bs_create_blob_ext(struct spdk_blob_store *bs, const struct spdk_blob_opts *opts,
    6117             :                         spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6118             : {
    6119        1542 :         bs_create_blob(bs, opts, NULL, cb_fn, cb_arg);
    6120        1542 : }
    6121             : 
    6122             : /* END spdk_bs_create_blob */
    6123             : 
    6124             : /* START blob_cleanup */
    6125             : 
    6126             : struct spdk_clone_snapshot_ctx {
    6127             :         struct spdk_bs_cpl      cpl;
    6128             :         int bserrno;
    6129             :         bool frozen;
    6130             : 
    6131             :         struct spdk_io_channel *channel;
    6132             : 
    6133             :         /* Current cluster for inflate operation */
    6134             :         uint64_t cluster;
    6135             : 
    6136             :         /* For inflation force allocation of all unallocated clusters and remove
    6137             :          * thin-provisioning. Otherwise only decouple parent and keep clone thin. */
    6138             :         bool allocate_all;
    6139             : 
    6140             :         struct {
    6141             :                 spdk_blob_id id;
    6142             :                 struct spdk_blob *blob;
    6143             :                 bool md_ro;
    6144             :         } original;
    6145             :         struct {
    6146             :                 spdk_blob_id id;
    6147             :                 struct spdk_blob *blob;
    6148             :         } new;
    6149             : 
    6150             :         /* xattrs specified for snapshot/clones only. They have no impact on
    6151             :          * the original blobs xattrs. */
    6152             :         const struct spdk_blob_xattr_opts *xattrs;
    6153             : };
    6154             : 
    6155             : static void
    6156         302 : bs_clone_snapshot_cleanup_finish(void *cb_arg, int bserrno)
    6157             : {
    6158         302 :         struct spdk_clone_snapshot_ctx *ctx = cb_arg;
    6159         302 :         struct spdk_bs_cpl *cpl = &ctx->cpl;
    6160             : 
    6161         302 :         if (bserrno != 0) {
    6162           6 :                 if (ctx->bserrno != 0) {
    6163           0 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6164             :                 } else {
    6165           6 :                         ctx->bserrno = bserrno;
    6166             :                 }
    6167             :         }
    6168             : 
    6169         302 :         switch (cpl->type) {
    6170         246 :         case SPDK_BS_CPL_TYPE_BLOBID:
    6171         246 :                 cpl->u.blobid.cb_fn(cpl->u.blobid.cb_arg, cpl->u.blobid.blobid, ctx->bserrno);
    6172         246 :                 break;
    6173          56 :         case SPDK_BS_CPL_TYPE_BLOB_BASIC:
    6174          56 :                 cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, ctx->bserrno);
    6175          56 :                 break;
    6176           0 :         default:
    6177           0 :                 SPDK_UNREACHABLE();
    6178             :                 break;
    6179             :         }
    6180             : 
    6181         302 :         free(ctx);
    6182         302 : }
    6183             : 
    6184             : static void
    6185         288 : bs_snapshot_unfreeze_cpl(void *cb_arg, int bserrno)
    6186             : {
    6187         288 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6188         288 :         struct spdk_blob *origblob = ctx->original.blob;
    6189             : 
    6190         288 :         if (bserrno != 0) {
    6191           0 :                 if (ctx->bserrno != 0) {
    6192           0 :                         SPDK_ERRLOG("Unfreeze error %d\n", bserrno);
    6193             :                 } else {
    6194           0 :                         ctx->bserrno = bserrno;
    6195             :                 }
    6196             :         }
    6197             : 
    6198         288 :         ctx->original.id = origblob->id;
    6199         288 :         origblob->locked_operation_in_progress = false;
    6200             : 
    6201             :         /* Revert md_ro to original state */
    6202         288 :         origblob->md_ro = ctx->original.md_ro;
    6203             : 
    6204         288 :         spdk_blob_close(origblob, bs_clone_snapshot_cleanup_finish, ctx);
    6205         288 : }
    6206             : 
    6207             : static void
    6208         288 : bs_clone_snapshot_origblob_cleanup(void *cb_arg, int bserrno)
    6209             : {
    6210         288 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6211         288 :         struct spdk_blob *origblob = ctx->original.blob;
    6212             : 
    6213         288 :         if (bserrno != 0) {
    6214          24 :                 if (ctx->bserrno != 0) {
    6215           4 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6216             :                 } else {
    6217          20 :                         ctx->bserrno = bserrno;
    6218             :                 }
    6219             :         }
    6220             : 
    6221         288 :         if (ctx->frozen) {
    6222             :                 /* Unfreeze any outstanding I/O */
    6223         180 :                 blob_unfreeze_io(origblob, bs_snapshot_unfreeze_cpl, ctx);
    6224             :         } else {
    6225         108 :                 bs_snapshot_unfreeze_cpl(ctx, 0);
    6226             :         }
    6227             : 
    6228         288 : }
    6229             : 
    6230             : static void
    6231           4 : bs_clone_snapshot_newblob_cleanup(struct spdk_clone_snapshot_ctx *ctx, int bserrno)
    6232             : {
    6233           4 :         struct spdk_blob *newblob = ctx->new.blob;
    6234             : 
    6235           4 :         if (bserrno != 0) {
    6236           4 :                 if (ctx->bserrno != 0) {
    6237           0 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6238             :                 } else {
    6239           4 :                         ctx->bserrno = bserrno;
    6240             :                 }
    6241             :         }
    6242             : 
    6243           4 :         ctx->new.id = newblob->id;
    6244           4 :         spdk_blob_close(newblob, bs_clone_snapshot_origblob_cleanup, ctx);
    6245           4 : }
    6246             : 
    6247             : /* END blob_cleanup */
    6248             : 
    6249             : /* START spdk_bs_create_snapshot */
    6250             : 
    6251             : static void
    6252         188 : bs_snapshot_swap_cluster_maps(struct spdk_blob *blob1, struct spdk_blob *blob2)
    6253             : {
    6254             :         uint64_t *cluster_temp;
    6255             :         uint32_t *extent_page_temp;
    6256             : 
    6257         188 :         cluster_temp = blob1->active.clusters;
    6258         188 :         blob1->active.clusters = blob2->active.clusters;
    6259         188 :         blob2->active.clusters = cluster_temp;
    6260             : 
    6261         188 :         extent_page_temp = blob1->active.extent_pages;
    6262         188 :         blob1->active.extent_pages = blob2->active.extent_pages;
    6263         188 :         blob2->active.extent_pages = extent_page_temp;
    6264         188 : }
    6265             : 
    6266             : /* Copies an internal xattr */
    6267             : static int
    6268          20 : bs_snapshot_copy_xattr(struct spdk_blob *toblob, struct spdk_blob *fromblob, const char *name)
    6269             : {
    6270          20 :         const void      *val = NULL;
    6271          20 :         size_t          len;
    6272             :         int             bserrno;
    6273             : 
    6274          20 :         bserrno = blob_get_xattr_value(fromblob, name, &val, &len, true);
    6275          20 :         if (bserrno != 0) {
    6276           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " missing %s XATTR\n", fromblob->id, name);
    6277           0 :                 return bserrno;
    6278             :         }
    6279             : 
    6280          20 :         bserrno = blob_set_xattr(toblob, name, val, len, true);
    6281          20 :         if (bserrno != 0) {
    6282           0 :                 SPDK_ERRLOG("could not set %s XATTR on blob 0x%" PRIx64 "\n",
    6283             :                             name, toblob->id);
    6284           0 :                 return bserrno;
    6285             :         }
    6286          20 :         return 0;
    6287             : }
    6288             : 
    6289             : static void
    6290         176 : bs_snapshot_origblob_sync_cpl(void *cb_arg, int bserrno)
    6291             : {
    6292         176 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6293         176 :         struct spdk_blob *origblob = ctx->original.blob;
    6294         176 :         struct spdk_blob *newblob = ctx->new.blob;
    6295             : 
    6296         176 :         if (bserrno != 0) {
    6297           4 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6298           4 :                 if (blob_is_esnap_clone(newblob)) {
    6299           0 :                         bs_snapshot_copy_xattr(origblob, newblob, BLOB_EXTERNAL_SNAPSHOT_ID);
    6300           0 :                         origblob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6301             :                 }
    6302           4 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6303           4 :                 return;
    6304             :         }
    6305             : 
    6306             :         /* Remove metadata descriptor SNAPSHOT_IN_PROGRESS */
    6307         172 :         bserrno = blob_remove_xattr(newblob, SNAPSHOT_IN_PROGRESS, true);
    6308         172 :         if (bserrno != 0) {
    6309           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6310           0 :                 return;
    6311             :         }
    6312             : 
    6313         172 :         bs_blob_list_add(ctx->original.blob);
    6314             : 
    6315         172 :         spdk_blob_set_read_only(newblob);
    6316             : 
    6317             :         /* sync snapshot metadata */
    6318         172 :         spdk_blob_sync_md(newblob, bs_clone_snapshot_origblob_cleanup, ctx);
    6319             : }
    6320             : 
    6321             : static void
    6322         180 : bs_snapshot_newblob_sync_cpl(void *cb_arg, int bserrno)
    6323             : {
    6324         180 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6325         180 :         struct spdk_blob *origblob = ctx->original.blob;
    6326         180 :         struct spdk_blob *newblob = ctx->new.blob;
    6327             : 
    6328         180 :         if (bserrno != 0) {
    6329             :                 /* return cluster map back to original */
    6330           4 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6331             : 
    6332             :                 /* Newblob md sync failed. Valid clusters are only present in origblob.
    6333             :                  * Since I/O is frozen on origblob, not changes to zeroed out cluster map should have occurred.
    6334             :                  * Newblob needs to be reverted to thin_provisioned state at creation to properly close. */
    6335           4 :                 blob_set_thin_provision(newblob);
    6336           4 :                 assert(spdk_mem_all_zero(newblob->active.clusters,
    6337             :                                          newblob->active.num_clusters * sizeof(*newblob->active.clusters)));
    6338           4 :                 assert(spdk_mem_all_zero(newblob->active.extent_pages,
    6339             :                                          newblob->active.num_extent_pages * sizeof(*newblob->active.extent_pages)));
    6340             : 
    6341           4 :                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6342           4 :                 return;
    6343             :         }
    6344             : 
    6345             :         /* Set internal xattr for snapshot id */
    6346         176 :         bserrno = blob_set_xattr(origblob, BLOB_SNAPSHOT, &newblob->id, sizeof(spdk_blob_id), true);
    6347         176 :         if (bserrno != 0) {
    6348             :                 /* return cluster map back to original */
    6349           0 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6350           0 :                 blob_set_thin_provision(newblob);
    6351           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6352           0 :                 return;
    6353             :         }
    6354             : 
    6355             :         /* Create new back_bs_dev for snapshot */
    6356         176 :         origblob->back_bs_dev = bs_create_blob_bs_dev(newblob);
    6357         176 :         if (origblob->back_bs_dev == NULL) {
    6358             :                 /* return cluster map back to original */
    6359           0 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6360           0 :                 blob_set_thin_provision(newblob);
    6361           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, -EINVAL);
    6362           0 :                 return;
    6363             :         }
    6364             : 
    6365             :         /* Remove the xattr that references an external snapshot */
    6366         176 :         if (blob_is_esnap_clone(origblob)) {
    6367          12 :                 origblob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6368          12 :                 bserrno = blob_remove_xattr(origblob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
    6369          12 :                 if (bserrno != 0) {
    6370           0 :                         if (bserrno == -ENOENT) {
    6371           0 :                                 SPDK_ERRLOG("blob 0x%" PRIx64 " has no " BLOB_EXTERNAL_SNAPSHOT_ID
    6372             :                                             " xattr to remove\n", origblob->id);
    6373           0 :                                 assert(false);
    6374             :                         } else {
    6375             :                                 /* return cluster map back to original */
    6376           0 :                                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6377           0 :                                 blob_set_thin_provision(newblob);
    6378           0 :                                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6379           0 :                                 return;
    6380             :                         }
    6381             :                 }
    6382             :         }
    6383             : 
    6384         176 :         bs_blob_list_remove(origblob);
    6385         176 :         origblob->parent_id = newblob->id;
    6386             :         /* set clone blob as thin provisioned */
    6387         176 :         blob_set_thin_provision(origblob);
    6388             : 
    6389         176 :         bs_blob_list_add(newblob);
    6390             : 
    6391             :         /* sync clone metadata */
    6392         176 :         spdk_blob_sync_md(origblob, bs_snapshot_origblob_sync_cpl, ctx);
    6393             : }
    6394             : 
    6395             : static void
    6396         180 : bs_snapshot_freeze_cpl(void *cb_arg, int rc)
    6397             : {
    6398         180 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6399         180 :         struct spdk_blob *origblob = ctx->original.blob;
    6400         180 :         struct spdk_blob *newblob = ctx->new.blob;
    6401             :         int bserrno;
    6402             : 
    6403         180 :         if (rc != 0) {
    6404           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, rc);
    6405           0 :                 return;
    6406             :         }
    6407             : 
    6408         180 :         ctx->frozen = true;
    6409             : 
    6410         180 :         if (blob_is_esnap_clone(origblob)) {
    6411             :                 /* Clean up any channels associated with the original blob id because future IO will
    6412             :                  * perform IO using the snapshot blob_id.
    6413             :                  */
    6414          12 :                 blob_esnap_destroy_bs_dev_channels(origblob, false, NULL, NULL);
    6415             :         }
    6416         180 :         if (newblob->back_bs_dev) {
    6417         180 :                 blob_back_bs_destroy(newblob);
    6418             :         }
    6419             :         /* set new back_bs_dev for snapshot */
    6420         180 :         newblob->back_bs_dev = origblob->back_bs_dev;
    6421             :         /* Set invalid flags from origblob */
    6422         180 :         newblob->invalid_flags = origblob->invalid_flags;
    6423             : 
    6424             :         /* inherit parent from original blob if set */
    6425         180 :         newblob->parent_id = origblob->parent_id;
    6426         180 :         switch (origblob->parent_id) {
    6427          12 :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    6428          12 :                 bserrno = bs_snapshot_copy_xattr(newblob, origblob, BLOB_EXTERNAL_SNAPSHOT_ID);
    6429          12 :                 if (bserrno != 0) {
    6430           0 :                         bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6431           0 :                         return;
    6432             :                 }
    6433          12 :                 break;
    6434         124 :         case SPDK_BLOBID_INVALID:
    6435         124 :                 break;
    6436          44 :         default:
    6437             :                 /* Set internal xattr for snapshot id */
    6438          44 :                 bserrno = blob_set_xattr(newblob, BLOB_SNAPSHOT,
    6439          44 :                                          &origblob->parent_id, sizeof(spdk_blob_id), true);
    6440          44 :                 if (bserrno != 0) {
    6441           0 :                         bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6442           0 :                         return;
    6443             :                 }
    6444             :         }
    6445             : 
    6446             :         /* swap cluster maps */
    6447         180 :         bs_snapshot_swap_cluster_maps(newblob, origblob);
    6448             : 
    6449             :         /* Set the clear method on the new blob to match the original. */
    6450         180 :         blob_set_clear_method(newblob, origblob->clear_method);
    6451             : 
    6452             :         /* sync snapshot metadata */
    6453         180 :         spdk_blob_sync_md(newblob, bs_snapshot_newblob_sync_cpl, ctx);
    6454             : }
    6455             : 
    6456             : static void
    6457         184 : bs_snapshot_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6458             : {
    6459         184 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6460         184 :         struct spdk_blob *origblob = ctx->original.blob;
    6461         184 :         struct spdk_blob *newblob = _blob;
    6462             : 
    6463         184 :         if (bserrno != 0) {
    6464           4 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6465           4 :                 return;
    6466             :         }
    6467             : 
    6468         180 :         ctx->new.blob = newblob;
    6469         180 :         assert(spdk_blob_is_thin_provisioned(newblob));
    6470         180 :         assert(spdk_mem_all_zero(newblob->active.clusters,
    6471             :                                  newblob->active.num_clusters * sizeof(*newblob->active.clusters)));
    6472         180 :         assert(spdk_mem_all_zero(newblob->active.extent_pages,
    6473             :                                  newblob->active.num_extent_pages * sizeof(*newblob->active.extent_pages)));
    6474             : 
    6475         180 :         blob_freeze_io(origblob, bs_snapshot_freeze_cpl, ctx);
    6476             : }
    6477             : 
    6478             : static void
    6479         188 : bs_snapshot_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
    6480             : {
    6481         188 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6482         188 :         struct spdk_blob *origblob = ctx->original.blob;
    6483             : 
    6484         188 :         if (bserrno != 0) {
    6485           4 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6486           4 :                 return;
    6487             :         }
    6488             : 
    6489         184 :         ctx->new.id = blobid;
    6490         184 :         ctx->cpl.u.blobid.blobid = blobid;
    6491             : 
    6492         184 :         spdk_bs_open_blob(origblob->bs, ctx->new.id, bs_snapshot_newblob_open_cpl, ctx);
    6493             : }
    6494             : 
    6495             : 
    6496             : static void
    6497         188 : bs_xattr_snapshot(void *arg, const char *name,
    6498             :                   const void **value, size_t *value_len)
    6499             : {
    6500         188 :         assert(strncmp(name, SNAPSHOT_IN_PROGRESS, sizeof(SNAPSHOT_IN_PROGRESS)) == 0);
    6501             : 
    6502         188 :         struct spdk_blob *blob = (struct spdk_blob *)arg;
    6503         188 :         *value = &blob->id;
    6504         188 :         *value_len = sizeof(blob->id);
    6505         188 : }
    6506             : 
    6507             : static void
    6508         198 : bs_snapshot_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6509             : {
    6510         198 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6511         198 :         struct spdk_blob_opts opts;
    6512         198 :         struct spdk_blob_xattr_opts internal_xattrs;
    6513         198 :         char *xattrs_names[] = { SNAPSHOT_IN_PROGRESS };
    6514             : 
    6515         198 :         if (bserrno != 0) {
    6516           6 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    6517           6 :                 return;
    6518             :         }
    6519             : 
    6520         192 :         ctx->original.blob = _blob;
    6521             : 
    6522         192 :         if (_blob->data_ro || _blob->md_ro) {
    6523           4 :                 SPDK_DEBUGLOG(blob, "Cannot create snapshot from read only blob with id 0x%"
    6524             :                               PRIx64 "\n", _blob->id);
    6525           4 :                 ctx->bserrno = -EINVAL;
    6526           4 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6527           4 :                 return;
    6528             :         }
    6529             : 
    6530         188 :         if (_blob->locked_operation_in_progress) {
    6531           0 :                 SPDK_DEBUGLOG(blob, "Cannot create snapshot - another operation in progress\n");
    6532           0 :                 ctx->bserrno = -EBUSY;
    6533           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6534           0 :                 return;
    6535             :         }
    6536             : 
    6537         188 :         _blob->locked_operation_in_progress = true;
    6538             : 
    6539         188 :         spdk_blob_opts_init(&opts, sizeof(opts));
    6540         188 :         blob_xattrs_init(&internal_xattrs);
    6541             : 
    6542             :         /* Change the size of new blob to the same as in original blob,
    6543             :          * but do not allocate clusters */
    6544         188 :         opts.thin_provision = true;
    6545         188 :         opts.num_clusters = spdk_blob_get_num_clusters(_blob);
    6546         188 :         opts.use_extent_table = _blob->use_extent_table;
    6547             : 
    6548             :         /* If there are any xattrs specified for snapshot, set them now */
    6549         188 :         if (ctx->xattrs) {
    6550           4 :                 memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
    6551             :         }
    6552             :         /* Set internal xattr SNAPSHOT_IN_PROGRESS */
    6553         188 :         internal_xattrs.count = 1;
    6554         188 :         internal_xattrs.ctx = _blob;
    6555         188 :         internal_xattrs.names = xattrs_names;
    6556         188 :         internal_xattrs.get_value = bs_xattr_snapshot;
    6557             : 
    6558         188 :         bs_create_blob(_blob->bs, &opts, &internal_xattrs,
    6559             :                        bs_snapshot_newblob_create_cpl, ctx);
    6560             : }
    6561             : 
    6562             : void
    6563         198 : spdk_bs_create_snapshot(struct spdk_blob_store *bs, spdk_blob_id blobid,
    6564             :                         const struct spdk_blob_xattr_opts *snapshot_xattrs,
    6565             :                         spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6566             : {
    6567         198 :         struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
    6568             : 
    6569         198 :         if (!ctx) {
    6570           0 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
    6571           0 :                 return;
    6572             :         }
    6573         198 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6574         198 :         ctx->cpl.u.blobid.cb_fn = cb_fn;
    6575         198 :         ctx->cpl.u.blobid.cb_arg = cb_arg;
    6576         198 :         ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
    6577         198 :         ctx->bserrno = 0;
    6578         198 :         ctx->frozen = false;
    6579         198 :         ctx->original.id = blobid;
    6580         198 :         ctx->xattrs = snapshot_xattrs;
    6581             : 
    6582         198 :         spdk_bs_open_blob(bs, ctx->original.id, bs_snapshot_origblob_open_cpl, ctx);
    6583             : }
    6584             : /* END spdk_bs_create_snapshot */
    6585             : 
    6586             : /* START spdk_bs_create_clone */
    6587             : 
    6588             : static void
    6589          44 : bs_xattr_clone(void *arg, const char *name,
    6590             :                const void **value, size_t *value_len)
    6591             : {
    6592          44 :         assert(strncmp(name, BLOB_SNAPSHOT, sizeof(BLOB_SNAPSHOT)) == 0);
    6593             : 
    6594          44 :         struct spdk_blob *blob = (struct spdk_blob *)arg;
    6595          44 :         *value = &blob->id;
    6596          44 :         *value_len = sizeof(blob->id);
    6597          44 : }
    6598             : 
    6599             : static void
    6600          44 : bs_clone_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6601             : {
    6602          44 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6603          44 :         struct spdk_blob *clone = _blob;
    6604             : 
    6605          44 :         ctx->new.blob = clone;
    6606          44 :         bs_blob_list_add(clone);
    6607             : 
    6608          44 :         spdk_blob_close(clone, bs_clone_snapshot_origblob_cleanup, ctx);
    6609          44 : }
    6610             : 
    6611             : static void
    6612          44 : bs_clone_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
    6613             : {
    6614          44 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6615             : 
    6616          44 :         ctx->cpl.u.blobid.blobid = blobid;
    6617          44 :         spdk_bs_open_blob(ctx->original.blob->bs, blobid, bs_clone_newblob_open_cpl, ctx);
    6618          44 : }
    6619             : 
    6620             : static void
    6621          48 : bs_clone_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6622             : {
    6623          48 :         struct spdk_clone_snapshot_ctx  *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6624          48 :         struct spdk_blob_opts           opts;
    6625          48 :         struct spdk_blob_xattr_opts internal_xattrs;
    6626          48 :         char *xattr_names[] = { BLOB_SNAPSHOT };
    6627             : 
    6628          48 :         if (bserrno != 0) {
    6629           0 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    6630           0 :                 return;
    6631             :         }
    6632             : 
    6633          48 :         ctx->original.blob = _blob;
    6634          48 :         ctx->original.md_ro = _blob->md_ro;
    6635             : 
    6636          48 :         if (!_blob->data_ro || !_blob->md_ro) {
    6637           4 :                 SPDK_DEBUGLOG(blob, "Clone not from read-only blob\n");
    6638           4 :                 ctx->bserrno = -EINVAL;
    6639           4 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6640           4 :                 return;
    6641             :         }
    6642             : 
    6643          44 :         if (_blob->locked_operation_in_progress) {
    6644           0 :                 SPDK_DEBUGLOG(blob, "Cannot create clone - another operation in progress\n");
    6645           0 :                 ctx->bserrno = -EBUSY;
    6646           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6647           0 :                 return;
    6648             :         }
    6649             : 
    6650          44 :         _blob->locked_operation_in_progress = true;
    6651             : 
    6652          44 :         spdk_blob_opts_init(&opts, sizeof(opts));
    6653          44 :         blob_xattrs_init(&internal_xattrs);
    6654             : 
    6655          44 :         opts.thin_provision = true;
    6656          44 :         opts.num_clusters = spdk_blob_get_num_clusters(_blob);
    6657          44 :         opts.use_extent_table = _blob->use_extent_table;
    6658          44 :         if (ctx->xattrs) {
    6659           4 :                 memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
    6660             :         }
    6661             : 
    6662             :         /* Set internal xattr BLOB_SNAPSHOT */
    6663          44 :         internal_xattrs.count = 1;
    6664          44 :         internal_xattrs.ctx = _blob;
    6665          44 :         internal_xattrs.names = xattr_names;
    6666          44 :         internal_xattrs.get_value = bs_xattr_clone;
    6667             : 
    6668          44 :         bs_create_blob(_blob->bs, &opts, &internal_xattrs,
    6669             :                        bs_clone_newblob_create_cpl, ctx);
    6670             : }
    6671             : 
    6672             : void
    6673          48 : spdk_bs_create_clone(struct spdk_blob_store *bs, spdk_blob_id blobid,
    6674             :                      const struct spdk_blob_xattr_opts *clone_xattrs,
    6675             :                      spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6676             : {
    6677          48 :         struct spdk_clone_snapshot_ctx  *ctx = calloc(1, sizeof(*ctx));
    6678             : 
    6679          48 :         if (!ctx) {
    6680           0 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
    6681           0 :                 return;
    6682             :         }
    6683             : 
    6684          48 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6685          48 :         ctx->cpl.u.blobid.cb_fn = cb_fn;
    6686          48 :         ctx->cpl.u.blobid.cb_arg = cb_arg;
    6687          48 :         ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
    6688          48 :         ctx->bserrno = 0;
    6689          48 :         ctx->xattrs = clone_xattrs;
    6690          48 :         ctx->original.id = blobid;
    6691             : 
    6692          48 :         spdk_bs_open_blob(bs, ctx->original.id, bs_clone_origblob_open_cpl, ctx);
    6693             : }
    6694             : 
    6695             : /* END spdk_bs_create_clone */
    6696             : 
    6697             : /* START spdk_bs_inflate_blob */
    6698             : 
    6699             : static void
    6700          12 : bs_inflate_blob_set_parent_cpl(void *cb_arg, struct spdk_blob *_parent, int bserrno)
    6701             : {
    6702          12 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6703          12 :         struct spdk_blob *_blob = ctx->original.blob;
    6704             : 
    6705          12 :         if (bserrno != 0) {
    6706           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6707           0 :                 return;
    6708             :         }
    6709             : 
    6710             :         /* Temporarily override md_ro flag for MD modification */
    6711          12 :         _blob->md_ro = false;
    6712             : 
    6713          12 :         bserrno = blob_set_xattr(_blob, BLOB_SNAPSHOT, &_parent->id, sizeof(spdk_blob_id), true);
    6714          12 :         if (bserrno != 0) {
    6715           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6716           0 :                 return;
    6717             :         }
    6718             : 
    6719          12 :         assert(_parent != NULL);
    6720             : 
    6721          12 :         bs_blob_list_remove(_blob);
    6722          12 :         _blob->parent_id = _parent->id;
    6723             : 
    6724          12 :         blob_back_bs_destroy(_blob);
    6725          12 :         _blob->back_bs_dev = bs_create_blob_bs_dev(_parent);
    6726          12 :         bs_blob_list_add(_blob);
    6727             : 
    6728          12 :         spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
    6729             : }
    6730             : 
    6731             : static void
    6732          52 : bs_inflate_blob_done(struct spdk_clone_snapshot_ctx *ctx)
    6733             : {
    6734          52 :         struct spdk_blob *_blob = ctx->original.blob;
    6735             :         struct spdk_blob *_parent;
    6736             : 
    6737          52 :         if (ctx->allocate_all) {
    6738             :                 /* remove thin provisioning */
    6739          28 :                 bs_blob_list_remove(_blob);
    6740          28 :                 if (_blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    6741           8 :                         blob_remove_xattr(_blob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
    6742           8 :                         _blob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6743             :                 } else {
    6744          20 :                         blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
    6745             :                 }
    6746          28 :                 _blob->invalid_flags = _blob->invalid_flags & ~SPDK_BLOB_THIN_PROV;
    6747          28 :                 blob_back_bs_destroy(_blob);
    6748          28 :                 _blob->parent_id = SPDK_BLOBID_INVALID;
    6749             :         } else {
    6750             :                 /* For now, esnap clones always have allocate_all set. */
    6751          24 :                 assert(!blob_is_esnap_clone(_blob));
    6752             : 
    6753          24 :                 _parent = ((struct spdk_blob_bs_dev *)(_blob->back_bs_dev))->blob;
    6754          24 :                 if (_parent->parent_id != SPDK_BLOBID_INVALID) {
    6755             :                         /* We must change the parent of the inflated blob */
    6756          12 :                         spdk_bs_open_blob(_blob->bs, _parent->parent_id,
    6757             :                                           bs_inflate_blob_set_parent_cpl, ctx);
    6758          12 :                         return;
    6759             :                 }
    6760             : 
    6761          12 :                 bs_blob_list_remove(_blob);
    6762          12 :                 _blob->parent_id = SPDK_BLOBID_INVALID;
    6763          12 :                 blob_back_bs_destroy(_blob);
    6764          12 :                 _blob->back_bs_dev = bs_create_zeroes_dev();
    6765             :         }
    6766             : 
    6767             :         /* Temporarily override md_ro flag for MD modification */
    6768          40 :         _blob->md_ro = false;
    6769          40 :         blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
    6770          40 :         _blob->state = SPDK_BLOB_STATE_DIRTY;
    6771             : 
    6772          40 :         spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
    6773             : }
    6774             : 
    6775             : /* Check if cluster needs allocation */
    6776             : static inline bool
    6777        1040 : bs_cluster_needs_allocation(struct spdk_blob *blob, uint64_t cluster, bool allocate_all)
    6778             : {
    6779             :         struct spdk_blob_bs_dev *b;
    6780             : 
    6781        1040 :         assert(blob != NULL);
    6782             : 
    6783        1040 :         if (blob->active.clusters[cluster] != 0) {
    6784             :                 /* Cluster is already allocated */
    6785          16 :                 return false;
    6786             :         }
    6787             : 
    6788        1024 :         if (blob->parent_id == SPDK_BLOBID_INVALID) {
    6789             :                 /* Blob have no parent blob */
    6790          80 :                 return allocate_all;
    6791             :         }
    6792             : 
    6793         944 :         if (blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    6794          64 :                 return true;
    6795             :         }
    6796             : 
    6797         880 :         b = (struct spdk_blob_bs_dev *)blob->back_bs_dev;
    6798         880 :         return (allocate_all || b->blob->active.clusters[cluster] != 0);
    6799             : }
    6800             : 
    6801             : static void
    6802         432 : bs_inflate_blob_touch_next(void *cb_arg, int bserrno)
    6803             : {
    6804         432 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6805         432 :         struct spdk_blob *_blob = ctx->original.blob;
    6806         432 :         struct spdk_bs_cpl cpl;
    6807             :         spdk_bs_user_op_t *op;
    6808             :         uint64_t offset;
    6809             : 
    6810         432 :         if (bserrno != 0) {
    6811           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6812           0 :                 return;
    6813             :         }
    6814             : 
    6815         572 :         for (; ctx->cluster < _blob->active.num_clusters; ctx->cluster++) {
    6816         520 :                 if (bs_cluster_needs_allocation(_blob, ctx->cluster, ctx->allocate_all)) {
    6817         380 :                         break;
    6818             :                 }
    6819             :         }
    6820             : 
    6821         432 :         if (ctx->cluster < _blob->active.num_clusters) {
    6822         380 :                 offset = bs_cluster_to_lba(_blob->bs, ctx->cluster);
    6823             : 
    6824             :                 /* We may safely increment a cluster before copying */
    6825         380 :                 ctx->cluster++;
    6826             : 
    6827             :                 /* Use a dummy 0B read as a context for cluster copy */
    6828         380 :                 cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    6829         380 :                 cpl.u.blob_basic.cb_fn = bs_inflate_blob_touch_next;
    6830         380 :                 cpl.u.blob_basic.cb_arg = ctx;
    6831             : 
    6832         380 :                 op = bs_user_op_alloc(ctx->channel, &cpl, SPDK_BLOB_READ, _blob,
    6833             :                                       NULL, 0, offset, 0);
    6834         380 :                 if (!op) {
    6835           0 :                         bs_clone_snapshot_origblob_cleanup(ctx, -ENOMEM);
    6836           0 :                         return;
    6837             :                 }
    6838             : 
    6839         380 :                 bs_allocate_and_copy_cluster(_blob, ctx->channel, offset, op);
    6840             :         } else {
    6841          52 :                 bs_inflate_blob_done(ctx);
    6842             :         }
    6843             : }
    6844             : 
    6845             : static void
    6846          56 : bs_inflate_blob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6847             : {
    6848          56 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6849             :         uint64_t clusters_needed;
    6850             :         uint64_t i;
    6851             : 
    6852          56 :         if (bserrno != 0) {
    6853           0 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    6854           0 :                 return;
    6855             :         }
    6856             : 
    6857          56 :         ctx->original.blob = _blob;
    6858          56 :         ctx->original.md_ro = _blob->md_ro;
    6859             : 
    6860          56 :         if (_blob->locked_operation_in_progress) {
    6861           0 :                 SPDK_DEBUGLOG(blob, "Cannot inflate blob - another operation in progress\n");
    6862           0 :                 ctx->bserrno = -EBUSY;
    6863           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6864           0 :                 return;
    6865             :         }
    6866             : 
    6867          56 :         _blob->locked_operation_in_progress = true;
    6868             : 
    6869          56 :         switch (_blob->parent_id) {
    6870           8 :         case SPDK_BLOBID_INVALID:
    6871           8 :                 if (!ctx->allocate_all) {
    6872             :                         /* This blob has no parent, so we cannot decouple it. */
    6873           4 :                         SPDK_ERRLOG("Cannot decouple parent of blob with no parent.\n");
    6874           4 :                         bs_clone_snapshot_origblob_cleanup(ctx, -EINVAL);
    6875           4 :                         return;
    6876             :                 }
    6877           4 :                 break;
    6878           8 :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    6879             :                 /*
    6880             :                  * It would be better to rely on back_bs_dev->is_zeroes(), to determine which
    6881             :                  * clusters require allocation. Until there is a blobstore consumer that
    6882             :                  * uses esnaps with an spdk_bs_dev that implements a useful is_zeroes() it is not
    6883             :                  * worth the effort.
    6884             :                  */
    6885           8 :                 ctx->allocate_all = true;
    6886           8 :                 break;
    6887          40 :         default:
    6888          40 :                 break;
    6889             :         }
    6890             : 
    6891          52 :         if (spdk_blob_is_thin_provisioned(_blob) == false) {
    6892             :                 /* This is not thin provisioned blob. No need to inflate. */
    6893           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, 0);
    6894           0 :                 return;
    6895             :         }
    6896             : 
    6897             :         /* Do two passes - one to verify that we can obtain enough clusters
    6898             :          * and another to actually claim them.
    6899             :          */
    6900          52 :         clusters_needed = 0;
    6901         572 :         for (i = 0; i < _blob->active.num_clusters; i++) {
    6902         520 :                 if (bs_cluster_needs_allocation(_blob, i, ctx->allocate_all)) {
    6903         380 :                         clusters_needed++;
    6904             :                 }
    6905             :         }
    6906             : 
    6907          52 :         if (clusters_needed > _blob->bs->num_free_clusters) {
    6908             :                 /* Not enough free clusters. Cannot satisfy the request. */
    6909           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, -ENOSPC);
    6910           0 :                 return;
    6911             :         }
    6912             : 
    6913          52 :         ctx->cluster = 0;
    6914          52 :         bs_inflate_blob_touch_next(ctx, 0);
    6915             : }
    6916             : 
    6917             : static void
    6918          56 : bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    6919             :                 spdk_blob_id blobid, bool allocate_all, spdk_blob_op_complete cb_fn, void *cb_arg)
    6920             : {
    6921          56 :         struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
    6922             : 
    6923          56 :         if (!ctx) {
    6924           0 :                 cb_fn(cb_arg, -ENOMEM);
    6925           0 :                 return;
    6926             :         }
    6927          56 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    6928          56 :         ctx->cpl.u.bs_basic.cb_fn = cb_fn;
    6929          56 :         ctx->cpl.u.bs_basic.cb_arg = cb_arg;
    6930          56 :         ctx->bserrno = 0;
    6931          56 :         ctx->original.id = blobid;
    6932          56 :         ctx->channel = channel;
    6933          56 :         ctx->allocate_all = allocate_all;
    6934             : 
    6935          56 :         spdk_bs_open_blob(bs, ctx->original.id, bs_inflate_blob_open_cpl, ctx);
    6936             : }
    6937             : 
    6938             : void
    6939          24 : spdk_bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    6940             :                      spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
    6941             : {
    6942          24 :         bs_inflate_blob(bs, channel, blobid, true, cb_fn, cb_arg);
    6943          24 : }
    6944             : 
    6945             : void
    6946          32 : spdk_bs_blob_decouple_parent(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    6947             :                              spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
    6948             : {
    6949          32 :         bs_inflate_blob(bs, channel, blobid, false, cb_fn, cb_arg);
    6950          32 : }
    6951             : /* END spdk_bs_inflate_blob */
    6952             : 
    6953             : /* START spdk_blob_resize */
    6954             : struct spdk_bs_resize_ctx {
    6955             :         spdk_blob_op_complete cb_fn;
    6956             :         void *cb_arg;
    6957             :         struct spdk_blob *blob;
    6958             :         uint64_t sz;
    6959             :         int rc;
    6960             : };
    6961             : 
    6962             : static void
    6963         182 : bs_resize_unfreeze_cpl(void *cb_arg, int rc)
    6964             : {
    6965         182 :         struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
    6966             : 
    6967         182 :         if (rc != 0) {
    6968           0 :                 SPDK_ERRLOG("Unfreeze failed, rc=%d\n", rc);
    6969             :         }
    6970             : 
    6971         182 :         if (ctx->rc != 0) {
    6972           4 :                 SPDK_ERRLOG("Unfreeze failed, ctx->rc=%d\n", ctx->rc);
    6973           4 :                 rc = ctx->rc;
    6974             :         }
    6975             : 
    6976         182 :         ctx->blob->locked_operation_in_progress = false;
    6977             : 
    6978         182 :         ctx->cb_fn(ctx->cb_arg, rc);
    6979         182 :         free(ctx);
    6980         182 : }
    6981             : 
    6982             : static void
    6983         182 : bs_resize_freeze_cpl(void *cb_arg, int rc)
    6984             : {
    6985         182 :         struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
    6986             : 
    6987         182 :         if (rc != 0) {
    6988           0 :                 ctx->blob->locked_operation_in_progress = false;
    6989           0 :                 ctx->cb_fn(ctx->cb_arg, rc);
    6990           0 :                 free(ctx);
    6991           0 :                 return;
    6992             :         }
    6993             : 
    6994         182 :         ctx->rc = blob_resize(ctx->blob, ctx->sz);
    6995             : 
    6996         182 :         blob_unfreeze_io(ctx->blob, bs_resize_unfreeze_cpl, ctx);
    6997             : }
    6998             : 
    6999             : void
    7000         196 : spdk_blob_resize(struct spdk_blob *blob, uint64_t sz, spdk_blob_op_complete cb_fn, void *cb_arg)
    7001             : {
    7002             :         struct spdk_bs_resize_ctx *ctx;
    7003             : 
    7004         196 :         blob_verify_md_op(blob);
    7005             : 
    7006         196 :         SPDK_DEBUGLOG(blob, "Resizing blob 0x%" PRIx64 " to %" PRIu64 " clusters\n", blob->id, sz);
    7007             : 
    7008         196 :         if (blob->md_ro) {
    7009           4 :                 cb_fn(cb_arg, -EPERM);
    7010           4 :                 return;
    7011             :         }
    7012             : 
    7013         192 :         if (sz == blob->active.num_clusters) {
    7014          10 :                 cb_fn(cb_arg, 0);
    7015          10 :                 return;
    7016             :         }
    7017             : 
    7018         182 :         if (blob->locked_operation_in_progress) {
    7019           0 :                 cb_fn(cb_arg, -EBUSY);
    7020           0 :                 return;
    7021             :         }
    7022             : 
    7023         182 :         ctx = calloc(1, sizeof(*ctx));
    7024         182 :         if (!ctx) {
    7025           0 :                 cb_fn(cb_arg, -ENOMEM);
    7026           0 :                 return;
    7027             :         }
    7028             : 
    7029         182 :         blob->locked_operation_in_progress = true;
    7030         182 :         ctx->cb_fn = cb_fn;
    7031         182 :         ctx->cb_arg = cb_arg;
    7032         182 :         ctx->blob = blob;
    7033         182 :         ctx->sz = sz;
    7034         182 :         blob_freeze_io(blob, bs_resize_freeze_cpl, ctx);
    7035             : }
    7036             : 
    7037             : /* END spdk_blob_resize */
    7038             : 
    7039             : 
    7040             : /* START spdk_bs_delete_blob */
    7041             : 
    7042             : static void
    7043        1420 : bs_delete_close_cpl(void *cb_arg, int bserrno)
    7044             : {
    7045        1420 :         spdk_bs_sequence_t *seq = cb_arg;
    7046             : 
    7047        1420 :         bs_sequence_finish(seq, bserrno);
    7048        1420 : }
    7049             : 
    7050             : static void
    7051        1420 : bs_delete_persist_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    7052             : {
    7053        1420 :         struct spdk_blob *blob = cb_arg;
    7054             : 
    7055        1420 :         if (bserrno != 0) {
    7056             :                 /*
    7057             :                  * We already removed this blob from the blobstore tailq, so
    7058             :                  *  we need to free it here since this is the last reference
    7059             :                  *  to it.
    7060             :                  */
    7061           0 :                 blob_free(blob);
    7062           0 :                 bs_delete_close_cpl(seq, bserrno);
    7063           0 :                 return;
    7064             :         }
    7065             : 
    7066             :         /*
    7067             :          * This will immediately decrement the ref_count and call
    7068             :          *  the completion routine since the metadata state is clean.
    7069             :          *  By calling spdk_blob_close, we reduce the number of call
    7070             :          *  points into code that touches the blob->open_ref count
    7071             :          *  and the blobstore's blob list.
    7072             :          */
    7073        1420 :         spdk_blob_close(blob, bs_delete_close_cpl, seq);
    7074             : }
    7075             : 
    7076             : struct delete_snapshot_ctx {
    7077             :         struct spdk_blob_list *parent_snapshot_entry;
    7078             :         struct spdk_blob *snapshot;
    7079             :         struct spdk_blob_md_page *page;
    7080             :         bool snapshot_md_ro;
    7081             :         struct spdk_blob *clone;
    7082             :         bool clone_md_ro;
    7083             :         spdk_blob_op_with_handle_complete cb_fn;
    7084             :         void *cb_arg;
    7085             :         int bserrno;
    7086             :         uint32_t next_extent_page;
    7087             : };
    7088             : 
    7089             : static void
    7090         110 : delete_blob_cleanup_finish(void *cb_arg, int bserrno)
    7091             : {
    7092         110 :         struct delete_snapshot_ctx *ctx = cb_arg;
    7093             : 
    7094         110 :         if (bserrno != 0) {
    7095           0 :                 SPDK_ERRLOG("Snapshot cleanup error %d\n", bserrno);
    7096             :         }
    7097             : 
    7098         110 :         assert(ctx != NULL);
    7099             : 
    7100         110 :         if (bserrno != 0 && ctx->bserrno == 0) {
    7101           0 :                 ctx->bserrno = bserrno;
    7102             :         }
    7103             : 
    7104         110 :         ctx->cb_fn(ctx->cb_arg, ctx->snapshot, ctx->bserrno);
    7105         110 :         spdk_free(ctx->page);
    7106         110 :         free(ctx);
    7107         110 : }
    7108             : 
    7109             : static void
    7110          22 : delete_snapshot_cleanup_snapshot(void *cb_arg, int bserrno)
    7111             : {
    7112          22 :         struct delete_snapshot_ctx *ctx = cb_arg;
    7113             : 
    7114          22 :         if (bserrno != 0) {
    7115           0 :                 ctx->bserrno = bserrno;
    7116           0 :                 SPDK_ERRLOG("Clone cleanup error %d\n", bserrno);
    7117             :         }
    7118             : 
    7119          22 :         if (ctx->bserrno != 0) {
    7120          22 :                 assert(blob_lookup(ctx->snapshot->bs, ctx->snapshot->id) == NULL);
    7121          22 :                 RB_INSERT(spdk_blob_tree, &ctx->snapshot->bs->open_blobs, ctx->snapshot);
    7122          22 :                 spdk_bit_array_set(ctx->snapshot->bs->open_blobids, ctx->snapshot->id);
    7123             :         }
    7124             : 
    7125          22 :         ctx->snapshot->locked_operation_in_progress = false;
    7126          22 :         ctx->snapshot->md_ro = ctx->snapshot_md_ro;
    7127             : 
    7128          22 :         spdk_blob_close(ctx->snapshot, delete_blob_cleanup_finish, ctx);
    7129          22 : }
    7130             : 
    7131             : static void
    7132          12 : delete_snapshot_cleanup_clone(void *cb_arg, int bserrno)
    7133             : {
    7134          12 :         struct delete_snapshot_ctx *ctx = cb_arg;
    7135             : 
    7136          12 :         ctx->clone->locked_operation_in_progress = false;
    7137          12 :         ctx->clone->md_ro = ctx->clone_md_ro;
    7138             : 
    7139          12 :         spdk_blob_close(ctx->clone, delete_snapshot_cleanup_snapshot, ctx);
    7140          12 : }
    7141             : 
    7142             : static void
    7143          48 : delete_snapshot_unfreeze_cpl(void *cb_arg, int bserrno)
    7144             : {
    7145          48 :         struct delete_snapshot_ctx *ctx = cb_arg;
    7146             : 
    7147          48 :         if (bserrno) {
    7148           0 :                 ctx->bserrno = bserrno;
    7149           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    7150           0 :                 return;
    7151             :         }
    7152             : 
    7153          48 :         ctx->clone->locked_operation_in_progress = false;
    7154          48 :         spdk_blob_close(ctx->clone, delete_blob_cleanup_finish, ctx);
    7155             : }
    7156             : 
    7157             : static void
    7158          52 : delete_snapshot_sync_snapshot_cpl(void *cb_arg, int bserrno)
    7159             : {
    7160          52 :         struct delete_snapshot_ctx *ctx = cb_arg;
    7161          52 :         struct spdk_blob_list *parent_snapshot_entry = NULL;
    7162          52 :         struct spdk_blob_list *snapshot_entry = NULL;
    7163          52 :         struct spdk_blob_list *clone_entry = NULL;
    7164          52 :         struct spdk_blob_list *snapshot_clone_entry = NULL;
    7165             : 
    7166          52 :         if (bserrno) {
    7167           4 :                 SPDK_ERRLOG("Failed to sync MD on blob\n");
    7168           4 :                 ctx->bserrno = bserrno;
    7169           4 :                 delete_snapshot_cleanup_clone(ctx, 0);
    7170           4 :                 return;
    7171             :         }
    7172             : 
    7173             :         /* Get snapshot entry for the snapshot we want to remove */
    7174          48 :         snapshot_entry = bs_get_snapshot_entry(ctx->snapshot->bs, ctx->snapshot->id);
    7175             : 
    7176          48 :         assert(snapshot_entry != NULL);
    7177             : 
    7178             :         /* Remove clone entry in this snapshot (at this point there can be only one clone) */
    7179          48 :         clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    7180          48 :         assert(clone_entry != NULL);
    7181          48 :         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    7182          48 :         snapshot_entry->clone_count--;
    7183          48 :         assert(TAILQ_EMPTY(&snapshot_entry->clones));
    7184             : 
    7185          48 :         switch (ctx->snapshot->parent_id) {
    7186          40 :         case SPDK_BLOBID_INVALID:
    7187             :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    7188             :                 /* No parent snapshot - just remove clone entry */
    7189          40 :                 free(clone_entry);
    7190          40 :                 break;
    7191           8 :         default:
    7192             :                 /* This snapshot is at the same time a clone of another snapshot - we need to
    7193             :                  * update parent snapshot (remove current clone, add new one inherited from
    7194             :                  * the snapshot that is being removed) */
    7195             : 
    7196             :                 /* Get snapshot entry for parent snapshot and clone entry within that snapshot for
    7197             :                  * snapshot that we are removing */
    7198           8 :                 blob_get_snapshot_and_clone_entries(ctx->snapshot, &parent_snapshot_entry,
    7199             :                                                     &snapshot_clone_entry);
    7200             : 
    7201             :                 /* Switch clone entry in parent snapshot */
    7202           8 :                 TAILQ_INSERT_TAIL(&parent_snapshot_entry->clones, clone_entry, link);
    7203           8 :                 TAILQ_REMOVE(&parent_snapshot_entry->clones, snapshot_clone_entry, link);
    7204           8 :                 free(snapshot_clone_entry);
    7205             :         }
    7206             : 
    7207             :         /* Restore md_ro flags */
    7208          48 :         ctx->clone->md_ro = ctx->clone_md_ro;
    7209          48 :         ctx->snapshot->md_ro = ctx->snapshot_md_ro;
    7210             : 
    7211          48 :         blob_unfreeze_io(ctx->clone, delete_snapshot_unfreeze_cpl, ctx);
    7212             : }
    7213             : 
    7214             : static void
    7215          56 : delete_snapshot_sync_clone_cpl(void *cb_arg, int bserrno)
    7216             : {
    7217          56 :         struct delete_snapshot_ctx *ctx = cb_arg;
    7218             :         uint64_t i;
    7219             : 
    7220          56 :         ctx->snapshot->md_ro = false;
    7221             : 
    7222          56 :         if (bserrno) {
    7223           4 :                 SPDK_ERRLOG("Failed to sync MD on clone\n");
    7224           4 :                 ctx->bserrno = bserrno;
    7225             : 
    7226             :                 /* Restore snapshot to previous state */
    7227           4 :                 bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
    7228           4 :                 if (bserrno != 0) {
    7229           0 :                         delete_snapshot_cleanup_clone(ctx, bserrno);
    7230           0 :                         return;
    7231             :                 }
    7232             : 
    7233           4 :                 spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
    7234           4 :                 return;
    7235             :         }
    7236             : 
    7237             :         /* Clear cluster map entries for snapshot */
    7238         552 :         for (i = 0; i < ctx->snapshot->active.num_clusters && i < ctx->clone->active.num_clusters; i++) {
    7239         500 :                 if (ctx->clone->active.clusters[i] == ctx->snapshot->active.clusters[i]) {
    7240         492 :                         ctx->snapshot->active.clusters[i] = 0;
    7241             :                 }
    7242             :         }
    7243          78 :         for (i = 0; i < ctx->snapshot->active.num_extent_pages &&
    7244          52 :              i < ctx->clone->active.num_extent_pages; i++) {
    7245          26 :                 if (ctx->clone->active.extent_pages[i] == ctx->snapshot->active.extent_pages[i]) {
    7246          24 :                         ctx->snapshot->active.extent_pages[i] = 0;
    7247             :                 }
    7248             :         }
    7249             : 
    7250          52 :         blob_set_thin_provision(ctx->snapshot);
    7251          52 :         ctx->snapshot->state = SPDK_BLOB_STATE_DIRTY;
    7252             : 
    7253          52 :         if (ctx->parent_snapshot_entry != NULL) {
    7254           8 :                 ctx->snapshot->back_bs_dev = NULL;
    7255             :         }
    7256             : 
    7257          52 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_cpl, ctx);
    7258             : }
    7259             : 
    7260             : static void
    7261          56 : delete_snapshot_update_extent_pages_cpl(struct delete_snapshot_ctx *ctx)
    7262             : {
    7263             :         int bserrno;
    7264             : 
    7265             :         /* Delete old backing bs_dev from clone (related to snapshot that will be removed) */
    7266          56 :         blob_back_bs_destroy(ctx->clone);
    7267             : 
    7268             :         /* Set/remove snapshot xattr and switch parent ID and backing bs_dev on clone... */
    7269          56 :         if (ctx->snapshot->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    7270           8 :                 bserrno = bs_snapshot_copy_xattr(ctx->clone, ctx->snapshot,
    7271             :                                                  BLOB_EXTERNAL_SNAPSHOT_ID);
    7272           8 :                 if (bserrno != 0) {
    7273           0 :                         ctx->bserrno = bserrno;
    7274             : 
    7275             :                         /* Restore snapshot to previous state */
    7276           0 :                         bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
    7277           0 :                         if (bserrno != 0) {
    7278           0 :                                 delete_snapshot_cleanup_clone(ctx, bserrno);
    7279           0 :                                 return;
    7280             :                         }
    7281             : 
    7282           0 :                         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
    7283           0 :                         return;
    7284             :                 }
    7285           8 :                 ctx->clone->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
    7286           8 :                 ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
    7287             :                 /* Do not delete the external snapshot along with this snapshot */
    7288           8 :                 ctx->snapshot->back_bs_dev = NULL;
    7289           8 :                 ctx->clone->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    7290          48 :         } else if (ctx->parent_snapshot_entry != NULL) {
    7291             :                 /* ...to parent snapshot */
    7292           8 :                 ctx->clone->parent_id = ctx->parent_snapshot_entry->id;
    7293           8 :                 ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
    7294           8 :                 blob_set_xattr(ctx->clone, BLOB_SNAPSHOT, &ctx->parent_snapshot_entry->id,
    7295             :                                sizeof(spdk_blob_id),
    7296             :                                true);
    7297             :         } else {
    7298             :                 /* ...to blobid invalid and zeroes dev */
    7299          40 :                 ctx->clone->parent_id = SPDK_BLOBID_INVALID;
    7300          40 :                 ctx->clone->back_bs_dev = bs_create_zeroes_dev();
    7301          40 :                 blob_remove_xattr(ctx->clone, BLOB_SNAPSHOT, true);
    7302             :         }
    7303             : 
    7304          56 :         spdk_blob_sync_md(ctx->clone, delete_snapshot_sync_clone_cpl, ctx);
    7305             : }
    7306             : 
    7307             : static void
    7308          58 : delete_snapshot_update_extent_pages(void *cb_arg, int bserrno)
    7309             : {
    7310          58 :         struct delete_snapshot_ctx *ctx = cb_arg;
    7311             :         uint32_t *extent_page;
    7312             :         uint64_t i;
    7313             : 
    7314          84 :         for (i = ctx->next_extent_page; i < ctx->snapshot->active.num_extent_pages &&
    7315          54 :              i < ctx->clone->active.num_extent_pages; i++) {
    7316          28 :                 if (ctx->snapshot->active.extent_pages[i] == 0) {
    7317             :                         /* No extent page to use from snapshot */
    7318           8 :                         continue;
    7319             :                 }
    7320             : 
    7321          20 :                 extent_page = &ctx->clone->active.extent_pages[i];
    7322          20 :                 if (*extent_page == 0) {
    7323             :                         /* Copy extent page from snapshot when clone did not have a matching one */
    7324          18 :                         *extent_page = ctx->snapshot->active.extent_pages[i];
    7325          18 :                         continue;
    7326             :                 }
    7327             : 
    7328             :                 /* Clone and snapshot both contain partially filled matching extent pages.
    7329             :                  * Update the clone extent page in place with cluster map containing the mix of both. */
    7330           2 :                 ctx->next_extent_page = i + 1;
    7331           2 :                 memset(ctx->page, 0, SPDK_BS_PAGE_SIZE);
    7332             : 
    7333           2 :                 blob_write_extent_page(ctx->clone, *extent_page, i * SPDK_EXTENTS_PER_EP, ctx->page,
    7334             :                                        delete_snapshot_update_extent_pages, ctx);
    7335           2 :                 return;
    7336             :         }
    7337          56 :         delete_snapshot_update_extent_pages_cpl(ctx);
    7338             : }
    7339             : 
    7340             : static void
    7341          60 : delete_snapshot_sync_snapshot_xattr_cpl(void *cb_arg, int bserrno)
    7342             : {
    7343          60 :         struct delete_snapshot_ctx *ctx = cb_arg;
    7344             :         uint64_t i;
    7345             : 
    7346             :         /* Temporarily override md_ro flag for clone for MD modification */
    7347          60 :         ctx->clone_md_ro = ctx->clone->md_ro;
    7348          60 :         ctx->clone->md_ro = false;
    7349             : 
    7350          60 :         if (bserrno) {
    7351           4 :                 SPDK_ERRLOG("Failed to sync MD with xattr on blob\n");
    7352           4 :                 ctx->bserrno = bserrno;
    7353           4 :                 delete_snapshot_cleanup_clone(ctx, 0);
    7354           4 :                 return;
    7355             :         }
    7356             : 
    7357             :         /* Copy snapshot map to clone map (only unallocated clusters in clone) */
    7358         596 :         for (i = 0; i < ctx->snapshot->active.num_clusters && i < ctx->clone->active.num_clusters; i++) {
    7359         540 :                 if (ctx->clone->active.clusters[i] == 0) {
    7360         532 :                         ctx->clone->active.clusters[i] = ctx->snapshot->active.clusters[i];
    7361             :                 }
    7362             :         }
    7363          56 :         ctx->next_extent_page = 0;
    7364          56 :         delete_snapshot_update_extent_pages(ctx, 0);
    7365             : }
    7366             : 
    7367             : static void
    7368           8 : delete_snapshot_esnap_channels_destroyed_cb(void *cb_arg, struct spdk_blob *blob, int bserrno)
    7369             : {
    7370           8 :         struct delete_snapshot_ctx *ctx = cb_arg;
    7371             : 
    7372           8 :         if (bserrno != 0) {
    7373           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to destroy esnap channels: %d\n",
    7374             :                             blob->id, bserrno);
    7375             :                 /* That error should not stop us from syncing metadata. */
    7376             :         }
    7377             : 
    7378           8 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
    7379           8 : }
    7380             : 
    7381             : static void
    7382          60 : delete_snapshot_freeze_io_cb(void *cb_arg, int bserrno)
    7383             : {
    7384          60 :         struct delete_snapshot_ctx *ctx = cb_arg;
    7385             : 
    7386          60 :         if (bserrno) {
    7387           0 :                 SPDK_ERRLOG("Failed to freeze I/O on clone\n");
    7388           0 :                 ctx->bserrno = bserrno;
    7389           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    7390           0 :                 return;
    7391             :         }
    7392             : 
    7393             :         /* Temporarily override md_ro flag for snapshot for MD modification */
    7394          60 :         ctx->snapshot_md_ro = ctx->snapshot->md_ro;
    7395          60 :         ctx->snapshot->md_ro = false;
    7396             : 
    7397             :         /* Mark blob as pending for removal for power failure safety, use clone id for recovery */
    7398          60 :         ctx->bserrno = blob_set_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, &ctx->clone->id,
    7399             :                                       sizeof(spdk_blob_id), true);
    7400          60 :         if (ctx->bserrno != 0) {
    7401           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    7402           0 :                 return;
    7403             :         }
    7404             : 
    7405          60 :         if (blob_is_esnap_clone(ctx->snapshot)) {
    7406           8 :                 blob_esnap_destroy_bs_dev_channels(ctx->snapshot, false,
    7407             :                                                    delete_snapshot_esnap_channels_destroyed_cb,
    7408             :                                                    ctx);
    7409           8 :                 return;
    7410             :         }
    7411             : 
    7412          52 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
    7413             : }
    7414             : 
    7415             : static void
    7416          70 : delete_snapshot_open_clone_cb(void *cb_arg, struct spdk_blob *clone, int bserrno)
    7417             : {
    7418          70 :         struct delete_snapshot_ctx *ctx = cb_arg;
    7419             : 
    7420          70 :         if (bserrno) {
    7421          10 :                 SPDK_ERRLOG("Failed to open clone\n");
    7422          10 :                 ctx->bserrno = bserrno;
    7423          10 :                 delete_snapshot_cleanup_snapshot(ctx, 0);
    7424          10 :                 return;
    7425             :         }
    7426             : 
    7427          60 :         ctx->clone = clone;
    7428             : 
    7429          60 :         if (clone->locked_operation_in_progress) {
    7430           0 :                 SPDK_DEBUGLOG(blob, "Cannot remove blob - another operation in progress on its clone\n");
    7431           0 :                 ctx->bserrno = -EBUSY;
    7432           0 :                 spdk_blob_close(ctx->clone, delete_snapshot_cleanup_snapshot, ctx);
    7433           0 :                 return;
    7434             :         }
    7435             : 
    7436          60 :         clone->locked_operation_in_progress = true;
    7437             : 
    7438          60 :         blob_freeze_io(clone, delete_snapshot_freeze_io_cb, ctx);
    7439             : }
    7440             : 
    7441             : static void
    7442          70 : update_clone_on_snapshot_deletion(struct spdk_blob *snapshot, struct delete_snapshot_ctx *ctx)
    7443             : {
    7444          70 :         struct spdk_blob_list *snapshot_entry = NULL;
    7445          70 :         struct spdk_blob_list *clone_entry = NULL;
    7446          70 :         struct spdk_blob_list *snapshot_clone_entry = NULL;
    7447             : 
    7448             :         /* Get snapshot entry for the snapshot we want to remove */
    7449          70 :         snapshot_entry = bs_get_snapshot_entry(snapshot->bs, snapshot->id);
    7450             : 
    7451          70 :         assert(snapshot_entry != NULL);
    7452             : 
    7453             :         /* Get clone of the snapshot (at this point there can be only one clone) */
    7454          70 :         clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    7455          70 :         assert(snapshot_entry->clone_count == 1);
    7456          70 :         assert(clone_entry != NULL);
    7457             : 
    7458             :         /* Get snapshot entry for parent snapshot and clone entry within that snapshot for
    7459             :          * snapshot that we are removing */
    7460          70 :         blob_get_snapshot_and_clone_entries(snapshot, &ctx->parent_snapshot_entry,
    7461             :                                             &snapshot_clone_entry);
    7462             : 
    7463          70 :         spdk_bs_open_blob(snapshot->bs, clone_entry->id, delete_snapshot_open_clone_cb, ctx);
    7464          70 : }
    7465             : 
    7466             : static void
    7467        1482 : bs_delete_blob_finish(void *cb_arg, struct spdk_blob *blob, int bserrno)
    7468             : {
    7469        1482 :         spdk_bs_sequence_t *seq = cb_arg;
    7470        1482 :         struct spdk_blob_list *snapshot_entry = NULL;
    7471             :         uint32_t page_num;
    7472             : 
    7473        1482 :         if (bserrno) {
    7474          62 :                 SPDK_ERRLOG("Failed to remove blob\n");
    7475          62 :                 bs_sequence_finish(seq, bserrno);
    7476          62 :                 return;
    7477             :         }
    7478             : 
    7479             :         /* Remove snapshot from the list */
    7480        1420 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    7481        1420 :         if (snapshot_entry != NULL) {
    7482         124 :                 TAILQ_REMOVE(&blob->bs->snapshots, snapshot_entry, link);
    7483         124 :                 free(snapshot_entry);
    7484             :         }
    7485             : 
    7486        1420 :         page_num = bs_blobid_to_page(blob->id);
    7487        1420 :         spdk_bit_array_clear(blob->bs->used_blobids, page_num);
    7488        1420 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    7489        1420 :         blob->active.num_pages = 0;
    7490        1420 :         blob_resize(blob, 0);
    7491             : 
    7492        1420 :         blob_persist(seq, blob, bs_delete_persist_cpl, blob);
    7493             : }
    7494             : 
    7495             : static int
    7496        1482 : bs_is_blob_deletable(struct spdk_blob *blob, bool *update_clone)
    7497             : {
    7498        1482 :         struct spdk_blob_list *snapshot_entry = NULL;
    7499        1482 :         struct spdk_blob_list *clone_entry = NULL;
    7500        1482 :         struct spdk_blob *clone = NULL;
    7501        1482 :         bool has_one_clone = false;
    7502             : 
    7503             :         /* Check if this is a snapshot with clones */
    7504        1482 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    7505        1482 :         if (snapshot_entry != NULL) {
    7506         174 :                 if (snapshot_entry->clone_count > 1) {
    7507          24 :                         SPDK_ERRLOG("Cannot remove snapshot with more than one clone\n");
    7508          24 :                         return -EBUSY;
    7509         150 :                 } else if (snapshot_entry->clone_count == 1) {
    7510          70 :                         has_one_clone = true;
    7511             :                 }
    7512             :         }
    7513             : 
    7514             :         /* Check if someone has this blob open (besides this delete context):
    7515             :          * - open_ref = 1 - only this context opened blob, so it is ok to remove it
    7516             :          * - open_ref <= 2 && has_one_clone = true - clone is holding snapshot
    7517             :          *      and that is ok, because we will update it accordingly */
    7518        1458 :         if (blob->open_ref <= 2 && has_one_clone) {
    7519          70 :                 clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    7520          70 :                 assert(clone_entry != NULL);
    7521          70 :                 clone = blob_lookup(blob->bs, clone_entry->id);
    7522             : 
    7523          70 :                 if (blob->open_ref == 2 && clone == NULL) {
    7524             :                         /* Clone is closed and someone else opened this blob */
    7525           0 :                         SPDK_ERRLOG("Cannot remove snapshot because it is open\n");
    7526           0 :                         return -EBUSY;
    7527             :                 }
    7528             : 
    7529          70 :                 *update_clone = true;
    7530          70 :                 return 0;
    7531             :         }
    7532             : 
    7533        1388 :         if (blob->open_ref > 1) {
    7534          16 :                 SPDK_ERRLOG("Cannot remove snapshot because it is open\n");
    7535          16 :                 return -EBUSY;
    7536             :         }
    7537             : 
    7538        1372 :         assert(has_one_clone == false);
    7539        1372 :         *update_clone = false;
    7540        1372 :         return 0;
    7541             : }
    7542             : 
    7543             : static void
    7544           0 : bs_delete_enomem_close_cpl(void *cb_arg, int bserrno)
    7545             : {
    7546           0 :         spdk_bs_sequence_t *seq = cb_arg;
    7547             : 
    7548           0 :         bs_sequence_finish(seq, -ENOMEM);
    7549           0 : }
    7550             : 
    7551             : static void
    7552        1492 : bs_delete_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
    7553             : {
    7554        1492 :         spdk_bs_sequence_t *seq = cb_arg;
    7555             :         struct delete_snapshot_ctx *ctx;
    7556        1492 :         bool update_clone = false;
    7557             : 
    7558        1492 :         if (bserrno != 0) {
    7559          10 :                 bs_sequence_finish(seq, bserrno);
    7560          10 :                 return;
    7561             :         }
    7562             : 
    7563        1482 :         blob_verify_md_op(blob);
    7564             : 
    7565        1482 :         ctx = calloc(1, sizeof(*ctx));
    7566        1482 :         if (ctx == NULL) {
    7567           0 :                 spdk_blob_close(blob, bs_delete_enomem_close_cpl, seq);
    7568           0 :                 return;
    7569             :         }
    7570             : 
    7571        1482 :         ctx->snapshot = blob;
    7572        1482 :         ctx->cb_fn = bs_delete_blob_finish;
    7573        1482 :         ctx->cb_arg = seq;
    7574             : 
    7575             :         /* Check if blob can be removed and if it is a snapshot with clone on top of it */
    7576        1482 :         ctx->bserrno = bs_is_blob_deletable(blob, &update_clone);
    7577        1482 :         if (ctx->bserrno) {
    7578          40 :                 spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    7579          40 :                 return;
    7580             :         }
    7581             : 
    7582        1442 :         if (blob->locked_operation_in_progress) {
    7583           0 :                 SPDK_DEBUGLOG(blob, "Cannot remove blob - another operation in progress\n");
    7584           0 :                 ctx->bserrno = -EBUSY;
    7585           0 :                 spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    7586           0 :                 return;
    7587             :         }
    7588             : 
    7589        1442 :         blob->locked_operation_in_progress = true;
    7590             : 
    7591             :         /*
    7592             :          * Remove the blob from the blob_store list now, to ensure it does not
    7593             :          *  get returned after this point by blob_lookup().
    7594             :          */
    7595        1442 :         spdk_bit_array_clear(blob->bs->open_blobids, blob->id);
    7596        1442 :         RB_REMOVE(spdk_blob_tree, &blob->bs->open_blobs, blob);
    7597             : 
    7598        1442 :         if (update_clone) {
    7599          70 :                 ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    7600          70 :                 if (!ctx->page) {
    7601           0 :                         ctx->bserrno = -ENOMEM;
    7602           0 :                         spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    7603           0 :                         return;
    7604             :                 }
    7605             :                 /* This blob is a snapshot with active clone - update clone first */
    7606          70 :                 update_clone_on_snapshot_deletion(blob, ctx);
    7607             :         } else {
    7608             :                 /* This blob does not have any clones - just remove it */
    7609        1372 :                 bs_blob_list_remove(blob);
    7610        1372 :                 bs_delete_blob_finish(seq, blob, 0);
    7611        1372 :                 free(ctx);
    7612             :         }
    7613             : }
    7614             : 
    7615             : void
    7616        1492 : spdk_bs_delete_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
    7617             :                     spdk_blob_op_complete cb_fn, void *cb_arg)
    7618             : {
    7619        1492 :         struct spdk_bs_cpl      cpl;
    7620             :         spdk_bs_sequence_t      *seq;
    7621             : 
    7622        1492 :         SPDK_DEBUGLOG(blob, "Deleting blob 0x%" PRIx64 "\n", blobid);
    7623             : 
    7624        1492 :         assert(spdk_get_thread() == bs->md_thread);
    7625             : 
    7626        1492 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7627        1492 :         cpl.u.blob_basic.cb_fn = cb_fn;
    7628        1492 :         cpl.u.blob_basic.cb_arg = cb_arg;
    7629             : 
    7630        1492 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    7631        1492 :         if (!seq) {
    7632           0 :                 cb_fn(cb_arg, -ENOMEM);
    7633           0 :                 return;
    7634             :         }
    7635             : 
    7636        1492 :         spdk_bs_open_blob(bs, blobid, bs_delete_open_cpl, seq);
    7637             : }
    7638             : 
    7639             : /* END spdk_bs_delete_blob */
    7640             : 
    7641             : /* START spdk_bs_open_blob */
    7642             : 
    7643             : static void
    7644        3278 : bs_open_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    7645             : {
    7646        3278 :         struct spdk_blob *blob = cb_arg;
    7647             :         struct spdk_blob *existing;
    7648             : 
    7649        3278 :         if (bserrno != 0) {
    7650          64 :                 blob_free(blob);
    7651          64 :                 seq->cpl.u.blob_handle.blob = NULL;
    7652          64 :                 bs_sequence_finish(seq, bserrno);
    7653          64 :                 return;
    7654             :         }
    7655             : 
    7656        3214 :         existing = blob_lookup(blob->bs, blob->id);
    7657        3214 :         if (existing) {
    7658           4 :                 blob_free(blob);
    7659           4 :                 existing->open_ref++;
    7660           4 :                 seq->cpl.u.blob_handle.blob = existing;
    7661           4 :                 bs_sequence_finish(seq, 0);
    7662           4 :                 return;
    7663             :         }
    7664             : 
    7665        3210 :         blob->open_ref++;
    7666             : 
    7667        3210 :         spdk_bit_array_set(blob->bs->open_blobids, blob->id);
    7668        3210 :         RB_INSERT(spdk_blob_tree, &blob->bs->open_blobs, blob);
    7669             : 
    7670        3210 :         bs_sequence_finish(seq, bserrno);
    7671             : }
    7672             : 
    7673             : static inline void
    7674           4 : blob_open_opts_copy(const struct spdk_blob_open_opts *src, struct spdk_blob_open_opts *dst)
    7675             : {
    7676             : #define FIELD_OK(field) \
    7677             :         offsetof(struct spdk_blob_open_opts, field) + sizeof(src->field) <= src->opts_size
    7678             : 
    7679             : #define SET_FIELD(field) \
    7680             :         if (FIELD_OK(field)) { \
    7681             :                 dst->field = src->field; \
    7682             :         } \
    7683             : 
    7684           4 :         SET_FIELD(clear_method);
    7685           4 :         SET_FIELD(esnap_ctx);
    7686             : 
    7687           4 :         dst->opts_size = src->opts_size;
    7688             : 
    7689             :         /* You should not remove this statement, but need to update the assert statement
    7690             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    7691             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_open_opts) == 24, "Incorrect size");
    7692             : 
    7693             : #undef FIELD_OK
    7694             : #undef SET_FIELD
    7695           4 : }
    7696             : 
    7697             : static void
    7698        3939 : bs_open_blob(struct spdk_blob_store *bs,
    7699             :              spdk_blob_id blobid,
    7700             :              struct spdk_blob_open_opts *opts,
    7701             :              spdk_blob_op_with_handle_complete cb_fn,
    7702             :              void *cb_arg)
    7703             : {
    7704             :         struct spdk_blob                *blob;
    7705        3939 :         struct spdk_bs_cpl              cpl;
    7706        3939 :         struct spdk_blob_open_opts      opts_local;
    7707             :         spdk_bs_sequence_t              *seq;
    7708             :         uint32_t                        page_num;
    7709             : 
    7710        3939 :         SPDK_DEBUGLOG(blob, "Opening blob 0x%" PRIx64 "\n", blobid);
    7711        3939 :         assert(spdk_get_thread() == bs->md_thread);
    7712             : 
    7713        3939 :         page_num = bs_blobid_to_page(blobid);
    7714        3939 :         if (spdk_bit_array_get(bs->used_blobids, page_num) == false) {
    7715             :                 /* Invalid blobid */
    7716          48 :                 cb_fn(cb_arg, NULL, -ENOENT);
    7717          48 :                 return;
    7718             :         }
    7719             : 
    7720        3891 :         blob = blob_lookup(bs, blobid);
    7721        3891 :         if (blob) {
    7722         613 :                 blob->open_ref++;
    7723         613 :                 cb_fn(cb_arg, blob, 0);
    7724         613 :                 return;
    7725             :         }
    7726             : 
    7727        3278 :         blob = blob_alloc(bs, blobid);
    7728        3278 :         if (!blob) {
    7729           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    7730           0 :                 return;
    7731             :         }
    7732             : 
    7733        3278 :         spdk_blob_open_opts_init(&opts_local, sizeof(opts_local));
    7734        3278 :         if (opts) {
    7735           4 :                 blob_open_opts_copy(opts, &opts_local);
    7736             :         }
    7737             : 
    7738        3278 :         blob->clear_method = opts_local.clear_method;
    7739             : 
    7740        3278 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_HANDLE;
    7741        3278 :         cpl.u.blob_handle.cb_fn = cb_fn;
    7742        3278 :         cpl.u.blob_handle.cb_arg = cb_arg;
    7743        3278 :         cpl.u.blob_handle.blob = blob;
    7744        3278 :         cpl.u.blob_handle.esnap_ctx = opts_local.esnap_ctx;
    7745             : 
    7746        3278 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    7747        3278 :         if (!seq) {
    7748           0 :                 blob_free(blob);
    7749           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    7750           0 :                 return;
    7751             :         }
    7752             : 
    7753        3278 :         blob_load(seq, blob, bs_open_blob_cpl, blob);
    7754             : }
    7755             : 
    7756             : void
    7757        3935 : spdk_bs_open_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
    7758             :                   spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    7759             : {
    7760        3935 :         bs_open_blob(bs, blobid, NULL, cb_fn, cb_arg);
    7761        3935 : }
    7762             : 
    7763             : void
    7764           4 : spdk_bs_open_blob_ext(struct spdk_blob_store *bs, spdk_blob_id blobid,
    7765             :                       struct spdk_blob_open_opts *opts, spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    7766             : {
    7767           4 :         bs_open_blob(bs, blobid, opts, cb_fn, cb_arg);
    7768           4 : }
    7769             : 
    7770             : /* END spdk_bs_open_blob */
    7771             : 
    7772             : /* START spdk_blob_set_read_only */
    7773             : int
    7774         200 : spdk_blob_set_read_only(struct spdk_blob *blob)
    7775             : {
    7776         200 :         blob_verify_md_op(blob);
    7777             : 
    7778         200 :         blob->data_ro_flags |= SPDK_BLOB_READ_ONLY;
    7779             : 
    7780         200 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    7781         200 :         return 0;
    7782             : }
    7783             : /* END spdk_blob_set_read_only */
    7784             : 
    7785             : /* START spdk_blob_sync_md */
    7786             : 
    7787             : static void
    7788        1331 : blob_sync_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    7789             : {
    7790        1331 :         struct spdk_blob *blob = cb_arg;
    7791             : 
    7792        1331 :         if (bserrno == 0 && (blob->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
    7793         368 :                 blob->data_ro = true;
    7794         368 :                 blob->md_ro = true;
    7795             :         }
    7796             : 
    7797        1331 :         bs_sequence_finish(seq, bserrno);
    7798        1331 : }
    7799             : 
    7800             : static void
    7801        1331 : blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    7802             : {
    7803        1331 :         struct spdk_bs_cpl      cpl;
    7804             :         spdk_bs_sequence_t      *seq;
    7805             : 
    7806        1331 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7807        1331 :         cpl.u.blob_basic.cb_fn = cb_fn;
    7808        1331 :         cpl.u.blob_basic.cb_arg = cb_arg;
    7809             : 
    7810        1331 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    7811        1331 :         if (!seq) {
    7812           0 :                 cb_fn(cb_arg, -ENOMEM);
    7813           0 :                 return;
    7814             :         }
    7815             : 
    7816        1331 :         blob_persist(seq, blob, blob_sync_md_cpl, blob);
    7817             : }
    7818             : 
    7819             : void
    7820         961 : spdk_blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    7821             : {
    7822         961 :         blob_verify_md_op(blob);
    7823             : 
    7824         961 :         SPDK_DEBUGLOG(blob, "Syncing blob 0x%" PRIx64 "\n", blob->id);
    7825             : 
    7826         961 :         if (blob->md_ro) {
    7827           4 :                 assert(blob->state == SPDK_BLOB_STATE_CLEAN);
    7828           4 :                 cb_fn(cb_arg, 0);
    7829           4 :                 return;
    7830             :         }
    7831             : 
    7832         957 :         blob_sync_md(blob, cb_fn, cb_arg);
    7833             : }
    7834             : 
    7835             : /* END spdk_blob_sync_md */
    7836             : 
    7837             : struct spdk_blob_insert_cluster_ctx {
    7838             :         struct spdk_thread      *thread;
    7839             :         struct spdk_blob        *blob;
    7840             :         uint32_t                cluster_num;    /* cluster index in blob */
    7841             :         uint32_t                cluster;        /* cluster on disk */
    7842             :         uint32_t                extent_page;    /* extent page on disk */
    7843             :         struct spdk_blob_md_page *page; /* preallocated extent page */
    7844             :         int                     rc;
    7845             :         spdk_blob_op_complete   cb_fn;
    7846             :         void                    *cb_arg;
    7847             : };
    7848             : 
    7849             : static void
    7850         612 : blob_insert_cluster_msg_cpl(void *arg)
    7851             : {
    7852         612 :         struct spdk_blob_insert_cluster_ctx *ctx = arg;
    7853             : 
    7854         612 :         ctx->cb_fn(ctx->cb_arg, ctx->rc);
    7855         612 :         free(ctx);
    7856         612 : }
    7857             : 
    7858             : static void
    7859         608 : blob_insert_cluster_msg_cb(void *arg, int bserrno)
    7860             : {
    7861         608 :         struct spdk_blob_insert_cluster_ctx *ctx = arg;
    7862             : 
    7863         608 :         ctx->rc = bserrno;
    7864         608 :         spdk_thread_send_msg(ctx->thread, blob_insert_cluster_msg_cpl, ctx);
    7865         608 : }
    7866             : 
    7867             : static void
    7868          70 : blob_insert_new_ep_cb(void *arg, int bserrno)
    7869             : {
    7870          70 :         struct spdk_blob_insert_cluster_ctx *ctx = arg;
    7871             :         uint32_t *extent_page;
    7872             : 
    7873          70 :         extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
    7874          70 :         *extent_page = ctx->extent_page;
    7875          70 :         ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    7876          70 :         blob_sync_md(ctx->blob, blob_insert_cluster_msg_cb, ctx);
    7877          70 : }
    7878             : 
    7879             : struct spdk_blob_write_extent_page_ctx {
    7880             :         struct spdk_blob_store          *bs;
    7881             : 
    7882             :         uint32_t                        extent;
    7883             :         struct spdk_blob_md_page        *page;
    7884             : };
    7885             : 
    7886             : static void
    7887         306 : blob_persist_extent_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    7888             : {
    7889         306 :         struct spdk_blob_write_extent_page_ctx *ctx = cb_arg;
    7890             : 
    7891         306 :         free(ctx);
    7892         306 :         bs_sequence_finish(seq, bserrno);
    7893         306 : }
    7894             : 
    7895             : static void
    7896         306 : blob_write_extent_page_ready(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    7897             : {
    7898         306 :         struct spdk_blob_write_extent_page_ctx *ctx = cb_arg;
    7899             : 
    7900         306 :         if (bserrno != 0) {
    7901           0 :                 blob_persist_extent_page_cpl(seq, ctx, bserrno);
    7902           0 :                 return;
    7903             :         }
    7904         306 :         bs_sequence_write_dev(seq, ctx->page, bs_md_page_to_lba(ctx->bs, ctx->extent),
    7905         306 :                               bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
    7906             :                               blob_persist_extent_page_cpl, ctx);
    7907             : }
    7908             : 
    7909             : static void
    7910         306 : blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
    7911             :                        struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg)
    7912             : {
    7913             :         struct spdk_blob_write_extent_page_ctx  *ctx;
    7914             :         spdk_bs_sequence_t                      *seq;
    7915         306 :         struct spdk_bs_cpl                      cpl;
    7916             : 
    7917         306 :         ctx = calloc(1, sizeof(*ctx));
    7918         306 :         if (!ctx) {
    7919           0 :                 cb_fn(cb_arg, -ENOMEM);
    7920           0 :                 return;
    7921             :         }
    7922         306 :         ctx->bs = blob->bs;
    7923         306 :         ctx->extent = extent;
    7924         306 :         ctx->page = page;
    7925             : 
    7926         306 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7927         306 :         cpl.u.blob_basic.cb_fn = cb_fn;
    7928         306 :         cpl.u.blob_basic.cb_arg = cb_arg;
    7929             : 
    7930         306 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    7931         306 :         if (!seq) {
    7932           0 :                 free(ctx);
    7933           0 :                 cb_fn(cb_arg, -ENOMEM);
    7934           0 :                 return;
    7935             :         }
    7936             : 
    7937         306 :         assert(page);
    7938         306 :         page->next = SPDK_INVALID_MD_PAGE;
    7939         306 :         page->id = blob->id;
    7940         306 :         page->sequence_num = 0;
    7941             : 
    7942         306 :         blob_serialize_extent_page(blob, cluster_num, page);
    7943             : 
    7944         306 :         page->crc = blob_md_page_calc_crc(page);
    7945             : 
    7946         306 :         assert(spdk_bit_array_get(blob->bs->used_md_pages, extent) == true);
    7947             : 
    7948         306 :         bs_mark_dirty(seq, blob->bs, blob_write_extent_page_ready, ctx);
    7949             : }
    7950             : 
    7951             : static void
    7952         612 : blob_insert_cluster_msg(void *arg)
    7953             : {
    7954         612 :         struct spdk_blob_insert_cluster_ctx *ctx = arg;
    7955             :         uint32_t *extent_page;
    7956             : 
    7957         612 :         ctx->rc = blob_insert_cluster(ctx->blob, ctx->cluster_num, ctx->cluster);
    7958         612 :         if (ctx->rc != 0) {
    7959           4 :                 spdk_thread_send_msg(ctx->thread, blob_insert_cluster_msg_cpl, ctx);
    7960           4 :                 return;
    7961             :         }
    7962             : 
    7963         608 :         if (ctx->blob->use_extent_table == false) {
    7964             :                 /* Extent table is not used, proceed with sync of md that will only use extents_rle. */
    7965         304 :                 ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    7966         304 :                 blob_sync_md(ctx->blob, blob_insert_cluster_msg_cb, ctx);
    7967         304 :                 return;
    7968             :         }
    7969             : 
    7970         304 :         extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
    7971         304 :         if (*extent_page == 0) {
    7972             :                 /* Extent page requires allocation.
    7973             :                  * It was already claimed in the used_md_pages map and placed in ctx. */
    7974          70 :                 assert(ctx->extent_page != 0);
    7975          70 :                 assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    7976          70 :                 blob_write_extent_page(ctx->blob, ctx->extent_page, ctx->cluster_num, ctx->page,
    7977             :                                        blob_insert_new_ep_cb, ctx);
    7978             :         } else {
    7979             :                 /* It is possible for original thread to allocate extent page for
    7980             :                  * different cluster in the same extent page. In such case proceed with
    7981             :                  * updating the existing extent page, but release the additional one. */
    7982         234 :                 if (ctx->extent_page != 0) {
    7983           0 :                         spdk_spin_lock(&ctx->blob->bs->used_lock);
    7984           0 :                         assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    7985           0 :                         bs_release_md_page(ctx->blob->bs, ctx->extent_page);
    7986           0 :                         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    7987           0 :                         ctx->extent_page = 0;
    7988             :                 }
    7989             :                 /* Extent page already allocated.
    7990             :                  * Every cluster allocation, requires just an update of single extent page. */
    7991         234 :                 blob_write_extent_page(ctx->blob, *extent_page, ctx->cluster_num, ctx->page,
    7992             :                                        blob_insert_cluster_msg_cb, ctx);
    7993             :         }
    7994             : }
    7995             : 
    7996             : static void
    7997         612 : blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
    7998             :                                  uint64_t cluster, uint32_t extent_page, struct spdk_blob_md_page *page,
    7999             :                                  spdk_blob_op_complete cb_fn, void *cb_arg)
    8000             : {
    8001             :         struct spdk_blob_insert_cluster_ctx *ctx;
    8002             : 
    8003         612 :         ctx = calloc(1, sizeof(*ctx));
    8004         612 :         if (ctx == NULL) {
    8005           0 :                 cb_fn(cb_arg, -ENOMEM);
    8006           0 :                 return;
    8007             :         }
    8008             : 
    8009         612 :         ctx->thread = spdk_get_thread();
    8010         612 :         ctx->blob = blob;
    8011         612 :         ctx->cluster_num = cluster_num;
    8012         612 :         ctx->cluster = cluster;
    8013         612 :         ctx->extent_page = extent_page;
    8014         612 :         ctx->page = page;
    8015         612 :         ctx->cb_fn = cb_fn;
    8016         612 :         ctx->cb_arg = cb_arg;
    8017             : 
    8018         612 :         spdk_thread_send_msg(blob->bs->md_thread, blob_insert_cluster_msg, ctx);
    8019             : }
    8020             : 
    8021             : /* START spdk_blob_close */
    8022             : 
    8023             : static void
    8024        3827 : blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8025             : {
    8026        3827 :         struct spdk_blob *blob = cb_arg;
    8027             : 
    8028        3827 :         if (bserrno == 0) {
    8029        3827 :                 blob->open_ref--;
    8030        3827 :                 if (blob->open_ref == 0) {
    8031             :                         /*
    8032             :                          * Blobs with active.num_pages == 0 are deleted blobs.
    8033             :                          *  these blobs are removed from the blob_store list
    8034             :                          *  when the deletion process starts - so don't try to
    8035             :                          *  remove them again.
    8036             :                          */
    8037        3210 :                         if (blob->active.num_pages > 0) {
    8038        1790 :                                 spdk_bit_array_clear(blob->bs->open_blobids, blob->id);
    8039        1790 :                                 RB_REMOVE(spdk_blob_tree, &blob->bs->open_blobs, blob);
    8040             :                         }
    8041        3210 :                         blob_free(blob);
    8042             :                 }
    8043             :         }
    8044             : 
    8045        3827 :         bs_sequence_finish(seq, bserrno);
    8046        3827 : }
    8047             : 
    8048             : static void
    8049          84 : blob_close_esnap_done(void *cb_arg, struct spdk_blob *blob, int bserrno)
    8050             : {
    8051          84 :         spdk_bs_sequence_t      *seq = cb_arg;
    8052             : 
    8053          84 :         if (bserrno != 0) {
    8054           0 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": close failed with error %d\n",
    8055             :                               blob->id, bserrno);
    8056           0 :                 bs_sequence_finish(seq, bserrno);
    8057           0 :                 return;
    8058             :         }
    8059             : 
    8060          84 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": closed, syncing metadata on thread %s\n",
    8061             :                       blob->id, spdk_thread_get_name(spdk_get_thread()));
    8062             : 
    8063             :         /* Sync metadata */
    8064          84 :         blob_persist(seq, blob, blob_close_cpl, blob);
    8065             : }
    8066             : 
    8067             : void
    8068        3827 : spdk_blob_close(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    8069             : {
    8070        3827 :         struct spdk_bs_cpl      cpl;
    8071             :         spdk_bs_sequence_t      *seq;
    8072             : 
    8073        3827 :         blob_verify_md_op(blob);
    8074             : 
    8075        3827 :         SPDK_DEBUGLOG(blob, "Closing blob 0x%" PRIx64 "\n", blob->id);
    8076             : 
    8077        3827 :         if (blob->open_ref == 0) {
    8078           0 :                 cb_fn(cb_arg, -EBADF);
    8079           0 :                 return;
    8080             :         }
    8081             : 
    8082        3827 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    8083        3827 :         cpl.u.blob_basic.cb_fn = cb_fn;
    8084        3827 :         cpl.u.blob_basic.cb_arg = cb_arg;
    8085             : 
    8086        3827 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    8087        3827 :         if (!seq) {
    8088           0 :                 cb_fn(cb_arg, -ENOMEM);
    8089           0 :                 return;
    8090             :         }
    8091             : 
    8092        3827 :         if (blob->open_ref == 1 && blob_is_esnap_clone(blob)) {
    8093          84 :                 blob_esnap_destroy_bs_dev_channels(blob, false, blob_close_esnap_done, seq);
    8094          84 :                 return;
    8095             :         }
    8096             : 
    8097             :         /* Sync metadata */
    8098        3743 :         blob_persist(seq, blob, blob_close_cpl, blob);
    8099             : }
    8100             : 
    8101             : /* END spdk_blob_close */
    8102             : 
    8103         209 : struct spdk_io_channel *spdk_bs_alloc_io_channel(struct spdk_blob_store *bs)
    8104             : {
    8105         209 :         return spdk_get_io_channel(bs);
    8106             : }
    8107             : 
    8108             : void
    8109         209 : spdk_bs_free_io_channel(struct spdk_io_channel *channel)
    8110             : {
    8111         209 :         blob_esnap_destroy_bs_channel(spdk_io_channel_get_ctx(channel));
    8112         209 :         spdk_put_io_channel(channel);
    8113         209 : }
    8114             : 
    8115             : void
    8116          36 : spdk_blob_io_unmap(struct spdk_blob *blob, struct spdk_io_channel *channel,
    8117             :                    uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
    8118             : {
    8119          36 :         blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
    8120             :                                SPDK_BLOB_UNMAP);
    8121          36 : }
    8122             : 
    8123             : void
    8124          48 : spdk_blob_io_write_zeroes(struct spdk_blob *blob, struct spdk_io_channel *channel,
    8125             :                           uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
    8126             : {
    8127          48 :         blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
    8128             :                                SPDK_BLOB_WRITE_ZEROES);
    8129          48 : }
    8130             : 
    8131             : void
    8132       11452 : spdk_blob_io_write(struct spdk_blob *blob, struct spdk_io_channel *channel,
    8133             :                    void *payload, uint64_t offset, uint64_t length,
    8134             :                    spdk_blob_op_complete cb_fn, void *cb_arg)
    8135             : {
    8136       11452 :         blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
    8137             :                                SPDK_BLOB_WRITE);
    8138       11452 : }
    8139             : 
    8140             : void
    8141       14216 : spdk_blob_io_read(struct spdk_blob *blob, struct spdk_io_channel *channel,
    8142             :                   void *payload, uint64_t offset, uint64_t length,
    8143             :                   spdk_blob_op_complete cb_fn, void *cb_arg)
    8144             : {
    8145       14216 :         blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
    8146             :                                SPDK_BLOB_READ);
    8147       14216 : }
    8148             : 
    8149             : void
    8150         140 : spdk_blob_io_writev(struct spdk_blob *blob, struct spdk_io_channel *channel,
    8151             :                     struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    8152             :                     spdk_blob_op_complete cb_fn, void *cb_arg)
    8153             : {
    8154         140 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, false, NULL);
    8155         140 : }
    8156             : 
    8157             : void
    8158         940 : spdk_blob_io_readv(struct spdk_blob *blob, struct spdk_io_channel *channel,
    8159             :                    struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    8160             :                    spdk_blob_op_complete cb_fn, void *cb_arg)
    8161             : {
    8162         940 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, true, NULL);
    8163         940 : }
    8164             : 
    8165             : void
    8166         208 : spdk_blob_io_writev_ext(struct spdk_blob *blob, struct spdk_io_channel *channel,
    8167             :                         struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    8168             :                         spdk_blob_op_complete cb_fn, void *cb_arg, struct spdk_blob_ext_io_opts *io_opts)
    8169             : {
    8170         208 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, false,
    8171             :                                    io_opts);
    8172         208 : }
    8173             : 
    8174             : void
    8175        1300 : spdk_blob_io_readv_ext(struct spdk_blob *blob, struct spdk_io_channel *channel,
    8176             :                        struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    8177             :                        spdk_blob_op_complete cb_fn, void *cb_arg, struct spdk_blob_ext_io_opts *io_opts)
    8178             : {
    8179        1300 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, true,
    8180             :                                    io_opts);
    8181        1300 : }
    8182             : 
    8183             : struct spdk_bs_iter_ctx {
    8184             :         int64_t page_num;
    8185             :         struct spdk_blob_store *bs;
    8186             : 
    8187             :         spdk_blob_op_with_handle_complete cb_fn;
    8188             :         void *cb_arg;
    8189             : };
    8190             : 
    8191             : static void
    8192        1164 : bs_iter_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    8193             : {
    8194        1164 :         struct spdk_bs_iter_ctx *ctx = cb_arg;
    8195        1164 :         struct spdk_blob_store *bs = ctx->bs;
    8196             :         spdk_blob_id id;
    8197             : 
    8198        1164 :         if (bserrno == 0) {
    8199         444 :                 ctx->cb_fn(ctx->cb_arg, _blob, bserrno);
    8200         444 :                 free(ctx);
    8201         444 :                 return;
    8202             :         }
    8203             : 
    8204         720 :         ctx->page_num++;
    8205         720 :         ctx->page_num = spdk_bit_array_find_first_set(bs->used_blobids, ctx->page_num);
    8206         720 :         if (ctx->page_num >= spdk_bit_array_capacity(bs->used_blobids)) {
    8207         268 :                 ctx->cb_fn(ctx->cb_arg, NULL, -ENOENT);
    8208         268 :                 free(ctx);
    8209         268 :                 return;
    8210             :         }
    8211             : 
    8212         452 :         id = bs_page_to_blobid(ctx->page_num);
    8213             : 
    8214         452 :         spdk_bs_open_blob(bs, id, bs_iter_cpl, ctx);
    8215             : }
    8216             : 
    8217             : void
    8218         292 : spdk_bs_iter_first(struct spdk_blob_store *bs,
    8219             :                    spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    8220             : {
    8221             :         struct spdk_bs_iter_ctx *ctx;
    8222             : 
    8223         292 :         ctx = calloc(1, sizeof(*ctx));
    8224         292 :         if (!ctx) {
    8225           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    8226           0 :                 return;
    8227             :         }
    8228             : 
    8229         292 :         ctx->page_num = -1;
    8230         292 :         ctx->bs = bs;
    8231         292 :         ctx->cb_fn = cb_fn;
    8232         292 :         ctx->cb_arg = cb_arg;
    8233             : 
    8234         292 :         bs_iter_cpl(ctx, NULL, -1);
    8235             : }
    8236             : 
    8237             : static void
    8238         420 : bs_iter_close_cpl(void *cb_arg, int bserrno)
    8239             : {
    8240         420 :         struct spdk_bs_iter_ctx *ctx = cb_arg;
    8241             : 
    8242         420 :         bs_iter_cpl(ctx, NULL, -1);
    8243         420 : }
    8244             : 
    8245             : void
    8246         420 : spdk_bs_iter_next(struct spdk_blob_store *bs, struct spdk_blob *blob,
    8247             :                   spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    8248             : {
    8249             :         struct spdk_bs_iter_ctx *ctx;
    8250             : 
    8251         420 :         assert(blob != NULL);
    8252             : 
    8253         420 :         ctx = calloc(1, sizeof(*ctx));
    8254         420 :         if (!ctx) {
    8255           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    8256           0 :                 return;
    8257             :         }
    8258             : 
    8259         420 :         ctx->page_num = bs_blobid_to_page(blob->id);
    8260         420 :         ctx->bs = bs;
    8261         420 :         ctx->cb_fn = cb_fn;
    8262         420 :         ctx->cb_arg = cb_arg;
    8263             : 
    8264             :         /* Close the existing blob */
    8265         420 :         spdk_blob_close(blob, bs_iter_close_cpl, ctx);
    8266             : }
    8267             : 
    8268             : static int
    8269         843 : blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
    8270             :                uint16_t value_len, bool internal)
    8271             : {
    8272             :         struct spdk_xattr_tailq *xattrs;
    8273             :         struct spdk_xattr       *xattr;
    8274             :         size_t                  desc_size;
    8275             :         void                    *tmp;
    8276             : 
    8277         843 :         blob_verify_md_op(blob);
    8278             : 
    8279         843 :         if (blob->md_ro) {
    8280           4 :                 return -EPERM;
    8281             :         }
    8282             : 
    8283         839 :         desc_size = sizeof(struct spdk_blob_md_descriptor_xattr) + strlen(name) + value_len;
    8284         839 :         if (desc_size > SPDK_BS_MAX_DESC_SIZE) {
    8285           4 :                 SPDK_DEBUGLOG(blob, "Xattr '%s' of size %zu does not fix into single page %zu\n", name,
    8286             :                               desc_size, SPDK_BS_MAX_DESC_SIZE);
    8287           4 :                 return -ENOMEM;
    8288             :         }
    8289             : 
    8290         835 :         if (internal) {
    8291         624 :                 xattrs = &blob->xattrs_internal;
    8292         624 :                 blob->invalid_flags |= SPDK_BLOB_INTERNAL_XATTR;
    8293             :         } else {
    8294         211 :                 xattrs = &blob->xattrs;
    8295             :         }
    8296             : 
    8297        1042 :         TAILQ_FOREACH(xattr, xattrs, link) {
    8298         304 :                 if (!strcmp(name, xattr->name)) {
    8299          97 :                         tmp = malloc(value_len);
    8300          97 :                         if (!tmp) {
    8301           0 :                                 return -ENOMEM;
    8302             :                         }
    8303             : 
    8304          97 :                         free(xattr->value);
    8305          97 :                         xattr->value_len = value_len;
    8306          97 :                         xattr->value = tmp;
    8307          97 :                         memcpy(xattr->value, value, value_len);
    8308             : 
    8309          97 :                         blob->state = SPDK_BLOB_STATE_DIRTY;
    8310             : 
    8311          97 :                         return 0;
    8312             :                 }
    8313             :         }
    8314             : 
    8315         738 :         xattr = calloc(1, sizeof(*xattr));
    8316         738 :         if (!xattr) {
    8317           0 :                 return -ENOMEM;
    8318             :         }
    8319             : 
    8320         738 :         xattr->name = strdup(name);
    8321         738 :         if (!xattr->name) {
    8322           0 :                 free(xattr);
    8323           0 :                 return -ENOMEM;
    8324             :         }
    8325             : 
    8326         738 :         xattr->value_len = value_len;
    8327         738 :         xattr->value = malloc(value_len);
    8328         738 :         if (!xattr->value) {
    8329           0 :                 free(xattr->name);
    8330           0 :                 free(xattr);
    8331           0 :                 return -ENOMEM;
    8332             :         }
    8333         738 :         memcpy(xattr->value, value, value_len);
    8334         738 :         TAILQ_INSERT_TAIL(xattrs, xattr, link);
    8335             : 
    8336         738 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    8337             : 
    8338         738 :         return 0;
    8339             : }
    8340             : 
    8341             : int
    8342         183 : spdk_blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
    8343             :                     uint16_t value_len)
    8344             : {
    8345         183 :         return blob_set_xattr(blob, name, value, value_len, false);
    8346             : }
    8347             : 
    8348             : static int
    8349         364 : blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal)
    8350             : {
    8351             :         struct spdk_xattr_tailq *xattrs;
    8352             :         struct spdk_xattr       *xattr;
    8353             : 
    8354         364 :         blob_verify_md_op(blob);
    8355             : 
    8356         364 :         if (blob->md_ro) {
    8357           4 :                 return -EPERM;
    8358             :         }
    8359         360 :         xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
    8360             : 
    8361         372 :         TAILQ_FOREACH(xattr, xattrs, link) {
    8362         324 :                 if (!strcmp(name, xattr->name)) {
    8363         312 :                         TAILQ_REMOVE(xattrs, xattr, link);
    8364         312 :                         free(xattr->value);
    8365         312 :                         free(xattr->name);
    8366         312 :                         free(xattr);
    8367             : 
    8368         312 :                         if (internal && TAILQ_EMPTY(&blob->xattrs_internal)) {
    8369         216 :                                 blob->invalid_flags &= ~SPDK_BLOB_INTERNAL_XATTR;
    8370             :                         }
    8371         312 :                         blob->state = SPDK_BLOB_STATE_DIRTY;
    8372             : 
    8373         312 :                         return 0;
    8374             :                 }
    8375             :         }
    8376             : 
    8377          48 :         return -ENOENT;
    8378             : }
    8379             : 
    8380             : int
    8381          36 : spdk_blob_remove_xattr(struct spdk_blob *blob, const char *name)
    8382             : {
    8383          36 :         return blob_remove_xattr(blob, name, false);
    8384             : }
    8385             : 
    8386             : static int
    8387        2116 : blob_get_xattr_value(struct spdk_blob *blob, const char *name,
    8388             :                      const void **value, size_t *value_len, bool internal)
    8389             : {
    8390             :         struct spdk_xattr       *xattr;
    8391             :         struct spdk_xattr_tailq *xattrs;
    8392             : 
    8393        2116 :         xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
    8394             : 
    8395        2702 :         TAILQ_FOREACH(xattr, xattrs, link) {
    8396        1256 :                 if (!strcmp(name, xattr->name)) {
    8397         670 :                         *value = xattr->value;
    8398         670 :                         *value_len = xattr->value_len;
    8399         670 :                         return 0;
    8400             :                 }
    8401             :         }
    8402        1446 :         return -ENOENT;
    8403             : }
    8404             : 
    8405             : int
    8406         154 : spdk_blob_get_xattr_value(struct spdk_blob *blob, const char *name,
    8407             :                           const void **value, size_t *value_len)
    8408             : {
    8409         154 :         blob_verify_md_op(blob);
    8410             : 
    8411         154 :         return blob_get_xattr_value(blob, name, value, value_len, false);
    8412             : }
    8413             : 
    8414             : struct spdk_xattr_names {
    8415             :         uint32_t        count;
    8416             :         const char      *names[0];
    8417             : };
    8418             : 
    8419             : static int
    8420           4 : blob_get_xattr_names(struct spdk_xattr_tailq *xattrs, struct spdk_xattr_names **names)
    8421             : {
    8422             :         struct spdk_xattr       *xattr;
    8423           4 :         int                     count = 0;
    8424             : 
    8425          12 :         TAILQ_FOREACH(xattr, xattrs, link) {
    8426           8 :                 count++;
    8427             :         }
    8428             : 
    8429           4 :         *names = calloc(1, sizeof(struct spdk_xattr_names) + count * sizeof(char *));
    8430           4 :         if (*names == NULL) {
    8431           0 :                 return -ENOMEM;
    8432             :         }
    8433             : 
    8434          12 :         TAILQ_FOREACH(xattr, xattrs, link) {
    8435           8 :                 (*names)->names[(*names)->count++] = xattr->name;
    8436             :         }
    8437             : 
    8438           4 :         return 0;
    8439             : }
    8440             : 
    8441             : int
    8442           4 : spdk_blob_get_xattr_names(struct spdk_blob *blob, struct spdk_xattr_names **names)
    8443             : {
    8444           4 :         blob_verify_md_op(blob);
    8445             : 
    8446           4 :         return blob_get_xattr_names(&blob->xattrs, names);
    8447             : }
    8448             : 
    8449             : uint32_t
    8450           4 : spdk_xattr_names_get_count(struct spdk_xattr_names *names)
    8451             : {
    8452           4 :         assert(names != NULL);
    8453             : 
    8454           4 :         return names->count;
    8455             : }
    8456             : 
    8457             : const char *
    8458           8 : spdk_xattr_names_get_name(struct spdk_xattr_names *names, uint32_t index)
    8459             : {
    8460           8 :         if (index >= names->count) {
    8461           0 :                 return NULL;
    8462             :         }
    8463             : 
    8464           8 :         return names->names[index];
    8465             : }
    8466             : 
    8467             : void
    8468           4 : spdk_xattr_names_free(struct spdk_xattr_names *names)
    8469             : {
    8470           4 :         free(names);
    8471           4 : }
    8472             : 
    8473             : struct spdk_bs_type
    8474           2 : spdk_bs_get_bstype(struct spdk_blob_store *bs)
    8475             : {
    8476           2 :         return bs->bstype;
    8477             : }
    8478             : 
    8479             : void
    8480           0 : spdk_bs_set_bstype(struct spdk_blob_store *bs, struct spdk_bs_type bstype)
    8481             : {
    8482           0 :         memcpy(&bs->bstype, &bstype, sizeof(bstype));
    8483           0 : }
    8484             : 
    8485             : bool
    8486          32 : spdk_blob_is_read_only(struct spdk_blob *blob)
    8487             : {
    8488          32 :         assert(blob != NULL);
    8489          32 :         return (blob->data_ro || blob->md_ro);
    8490             : }
    8491             : 
    8492             : bool
    8493          32 : spdk_blob_is_snapshot(struct spdk_blob *blob)
    8494             : {
    8495             :         struct spdk_blob_list *snapshot_entry;
    8496             : 
    8497          32 :         assert(blob != NULL);
    8498             : 
    8499          32 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    8500          32 :         if (snapshot_entry == NULL) {
    8501          24 :                 return false;
    8502             :         }
    8503             : 
    8504           8 :         return true;
    8505             : }
    8506             : 
    8507             : bool
    8508          40 : spdk_blob_is_clone(struct spdk_blob *blob)
    8509             : {
    8510          40 :         assert(blob != NULL);
    8511             : 
    8512          40 :         if (blob->parent_id != SPDK_BLOBID_INVALID &&
    8513          32 :             blob->parent_id != SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    8514          28 :                 assert(spdk_blob_is_thin_provisioned(blob));
    8515          28 :                 return true;
    8516             :         }
    8517             : 
    8518          12 :         return false;
    8519             : }
    8520             : 
    8521             : bool
    8522       35274 : spdk_blob_is_thin_provisioned(struct spdk_blob *blob)
    8523             : {
    8524       35274 :         assert(blob != NULL);
    8525       35274 :         return !!(blob->invalid_flags & SPDK_BLOB_THIN_PROV);
    8526             : }
    8527             : 
    8528             : bool
    8529       28124 : spdk_blob_is_esnap_clone(const struct spdk_blob *blob)
    8530             : {
    8531       28124 :         return blob_is_esnap_clone(blob);
    8532             : }
    8533             : 
    8534             : static void
    8535        3238 : blob_update_clear_method(struct spdk_blob *blob)
    8536             : {
    8537             :         enum blob_clear_method stored_cm;
    8538             : 
    8539        3238 :         assert(blob != NULL);
    8540             : 
    8541             :         /* If BLOB_CLEAR_WITH_DEFAULT was passed in, use the setting stored
    8542             :          * in metadata previously.  If something other than the default was
    8543             :          * specified, ignore stored value and used what was passed in.
    8544             :          */
    8545        3238 :         stored_cm = ((blob->md_ro_flags & SPDK_BLOB_CLEAR_METHOD) >> SPDK_BLOB_CLEAR_METHOD_SHIFT);
    8546             : 
    8547        3238 :         if (blob->clear_method == BLOB_CLEAR_WITH_DEFAULT) {
    8548        3238 :                 blob->clear_method = stored_cm;
    8549           0 :         } else if (blob->clear_method != stored_cm) {
    8550           0 :                 SPDK_WARNLOG("Using passed in clear method 0x%x instead of stored value of 0x%x\n",
    8551             :                              blob->clear_method, stored_cm);
    8552             :         }
    8553        3238 : }
    8554             : 
    8555             : spdk_blob_id
    8556         218 : spdk_blob_get_parent_snapshot(struct spdk_blob_store *bs, spdk_blob_id blob_id)
    8557             : {
    8558         218 :         struct spdk_blob_list *snapshot_entry = NULL;
    8559         218 :         struct spdk_blob_list *clone_entry = NULL;
    8560             : 
    8561         382 :         TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
    8562         548 :                 TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    8563         384 :                         if (clone_entry->id == blob_id) {
    8564         148 :                                 return snapshot_entry->id;
    8565             :                         }
    8566             :                 }
    8567             :         }
    8568             : 
    8569          70 :         return SPDK_BLOBID_INVALID;
    8570             : }
    8571             : 
    8572             : int
    8573         184 : spdk_blob_get_clones(struct spdk_blob_store *bs, spdk_blob_id blobid, spdk_blob_id *ids,
    8574             :                      size_t *count)
    8575             : {
    8576             :         struct spdk_blob_list *snapshot_entry, *clone_entry;
    8577             :         size_t n;
    8578             : 
    8579         184 :         snapshot_entry = bs_get_snapshot_entry(bs, blobid);
    8580         184 :         if (snapshot_entry == NULL) {
    8581          28 :                 *count = 0;
    8582          28 :                 return 0;
    8583             :         }
    8584             : 
    8585         156 :         if (ids == NULL || *count < snapshot_entry->clone_count) {
    8586           8 :                 *count = snapshot_entry->clone_count;
    8587           8 :                 return -ENOMEM;
    8588             :         }
    8589         148 :         *count = snapshot_entry->clone_count;
    8590             : 
    8591         148 :         n = 0;
    8592         308 :         TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    8593         160 :                 ids[n++] = clone_entry->id;
    8594             :         }
    8595             : 
    8596         148 :         return 0;
    8597             : }
    8598             : 
    8599             : static void
    8600           4 : bs_load_grow_continue(struct spdk_bs_load_ctx *ctx)
    8601             : {
    8602             :         int rc;
    8603             : 
    8604           4 :         if (ctx->super->size == 0) {
    8605           0 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    8606             :         }
    8607             : 
    8608           4 :         if (ctx->super->io_unit_size == 0) {
    8609           0 :                 ctx->super->io_unit_size = SPDK_BS_PAGE_SIZE;
    8610             :         }
    8611             : 
    8612             :         /* Parse the super block */
    8613           4 :         ctx->bs->clean = 1;
    8614           4 :         ctx->bs->cluster_sz = ctx->super->cluster_size;
    8615           4 :         ctx->bs->total_clusters = ctx->super->size / ctx->super->cluster_size;
    8616           4 :         ctx->bs->pages_per_cluster = ctx->bs->cluster_sz / SPDK_BS_PAGE_SIZE;
    8617           4 :         if (spdk_u32_is_pow2(ctx->bs->pages_per_cluster)) {
    8618           4 :                 ctx->bs->pages_per_cluster_shift = spdk_u32log2(ctx->bs->pages_per_cluster);
    8619             :         }
    8620           4 :         ctx->bs->io_unit_size = ctx->super->io_unit_size;
    8621           4 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    8622           4 :         if (rc < 0) {
    8623           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    8624           0 :                 return;
    8625             :         }
    8626           4 :         ctx->bs->md_start = ctx->super->md_start;
    8627           4 :         ctx->bs->md_len = ctx->super->md_len;
    8628           4 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->bs->md_len);
    8629           4 :         if (rc < 0) {
    8630           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    8631           0 :                 return;
    8632             :         }
    8633             : 
    8634           8 :         ctx->bs->total_data_clusters = ctx->bs->total_clusters - spdk_divide_round_up(
    8635           4 :                                                ctx->bs->md_start + ctx->bs->md_len, ctx->bs->pages_per_cluster);
    8636           4 :         ctx->bs->super_blob = ctx->super->super_blob;
    8637           4 :         memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
    8638             : 
    8639           4 :         if (ctx->super->used_blobid_mask_len == 0 || ctx->super->clean == 0) {
    8640           0 :                 SPDK_ERRLOG("Can not grow an unclean blobstore, please load it normally to clean it.\n");
    8641           0 :                 bs_load_ctx_fail(ctx, -EIO);
    8642           0 :                 return;
    8643             :         } else {
    8644           4 :                 bs_load_read_used_pages(ctx);
    8645             :         }
    8646             : }
    8647             : 
    8648             : static void
    8649           4 : bs_load_grow_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8650             : {
    8651           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    8652             : 
    8653           4 :         if (bserrno != 0) {
    8654           0 :                 bs_load_ctx_fail(ctx, bserrno);
    8655           0 :                 return;
    8656             :         }
    8657           4 :         bs_load_grow_continue(ctx);
    8658             : }
    8659             : 
    8660             : static void
    8661           4 : bs_load_grow_used_clusters_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8662             : {
    8663           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    8664             : 
    8665           4 :         if (bserrno != 0) {
    8666           0 :                 bs_load_ctx_fail(ctx, bserrno);
    8667           0 :                 return;
    8668             :         }
    8669             : 
    8670           4 :         spdk_free(ctx->mask);
    8671             : 
    8672           4 :         bs_sequence_write_dev(ctx->seq, ctx->super, bs_page_to_lba(ctx->bs, 0),
    8673           4 :                               bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)),
    8674             :                               bs_load_grow_super_write_cpl, ctx);
    8675             : }
    8676             : 
    8677             : static void
    8678           4 : bs_load_grow_used_clusters_read_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8679             : {
    8680           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    8681             :         uint64_t                lba, lba_count;
    8682             :         uint64_t                dev_size;
    8683             :         uint64_t                total_clusters;
    8684             : 
    8685           4 :         if (bserrno != 0) {
    8686           0 :                 bs_load_ctx_fail(ctx, bserrno);
    8687           0 :                 return;
    8688             :         }
    8689             : 
    8690             :         /* The type must be correct */
    8691           4 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS);
    8692             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    8693           4 :         assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof(
    8694             :                                              struct spdk_blob_md_page) * 8));
    8695           4 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    8696           4 :         total_clusters = dev_size / ctx->super->cluster_size;
    8697           4 :         ctx->mask->length = total_clusters;
    8698             : 
    8699           4 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    8700           4 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    8701           4 :         bs_sequence_write_dev(ctx->seq, ctx->mask, lba, lba_count,
    8702             :                               bs_load_grow_used_clusters_write_cpl, ctx);
    8703             : }
    8704             : 
    8705             : static void
    8706           4 : bs_load_try_to_grow(struct spdk_bs_load_ctx *ctx)
    8707             : {
    8708             :         uint64_t dev_size, total_clusters, used_cluster_mask_len, max_used_cluster_mask;
    8709             :         uint64_t lba, lba_count, mask_size;
    8710             : 
    8711           4 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    8712           4 :         total_clusters = dev_size / ctx->super->cluster_size;
    8713           4 :         used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    8714           4 :                                 spdk_divide_round_up(total_clusters, 8),
    8715             :                                 SPDK_BS_PAGE_SIZE);
    8716           4 :         max_used_cluster_mask = ctx->super->used_blobid_mask_start - ctx->super->used_cluster_mask_start;
    8717             :         /* No necessary to grow or no space to grow */
    8718           4 :         if (ctx->super->size >= dev_size || used_cluster_mask_len > max_used_cluster_mask) {
    8719           0 :                 SPDK_DEBUGLOG(blob, "No grow\n");
    8720           0 :                 bs_load_grow_continue(ctx);
    8721           0 :                 return;
    8722             :         }
    8723             : 
    8724           4 :         SPDK_DEBUGLOG(blob, "Resize blobstore\n");
    8725             : 
    8726           4 :         ctx->super->size = dev_size;
    8727           4 :         ctx->super->used_cluster_mask_len = used_cluster_mask_len;
    8728           4 :         ctx->super->crc = blob_md_page_calc_crc(ctx->super);
    8729             : 
    8730           4 :         mask_size = used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
    8731           4 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
    8732             :                                  SPDK_MALLOC_DMA);
    8733           4 :         if (!ctx->mask) {
    8734           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    8735           0 :                 return;
    8736             :         }
    8737           4 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    8738           4 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    8739           4 :         bs_sequence_read_dev(ctx->seq, ctx->mask, lba, lba_count,
    8740             :                              bs_load_grow_used_clusters_read_cpl, ctx);
    8741             : }
    8742             : 
    8743             : static void
    8744           4 : bs_grow_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8745             : {
    8746           4 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    8747             :         int rc;
    8748             : 
    8749           4 :         rc = bs_super_validate(ctx->super, ctx->bs);
    8750           4 :         if (rc != 0) {
    8751           0 :                 bs_load_ctx_fail(ctx, rc);
    8752           0 :                 return;
    8753             :         }
    8754             : 
    8755           4 :         bs_load_try_to_grow(ctx);
    8756             : }
    8757             : 
    8758             : struct spdk_bs_grow_ctx {
    8759             :         struct spdk_blob_store          *bs;
    8760             :         struct spdk_bs_super_block      *super;
    8761             : 
    8762             :         struct spdk_bit_pool            *new_used_clusters;
    8763             :         struct spdk_bs_md_mask          *new_used_clusters_mask;
    8764             : 
    8765             :         spdk_bs_sequence_t              *seq;
    8766             : };
    8767             : 
    8768             : static void
    8769          32 : bs_grow_live_done(struct spdk_bs_grow_ctx *ctx, int bserrno)
    8770             : {
    8771          32 :         if (bserrno != 0) {
    8772           8 :                 spdk_bit_pool_free(&ctx->new_used_clusters);
    8773             :         }
    8774             : 
    8775          32 :         bs_sequence_finish(ctx->seq, bserrno);
    8776          32 :         free(ctx->new_used_clusters_mask);
    8777          32 :         spdk_free(ctx->super);
    8778          32 :         free(ctx);
    8779          32 : }
    8780             : 
    8781             : static void
    8782           8 : bs_grow_live_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8783             : {
    8784           8 :         struct spdk_bs_grow_ctx *ctx = cb_arg;
    8785           8 :         struct spdk_blob_store *bs = ctx->bs;
    8786             :         uint64_t total_clusters;
    8787             : 
    8788           8 :         if (bserrno != 0) {
    8789           0 :                 bs_grow_live_done(ctx, bserrno);
    8790           0 :                 return;
    8791             :         }
    8792             : 
    8793             :         /*
    8794             :          * Blobstore is not clean until unload, for now only the super block is up to date.
    8795             :          * This is similar to state right after blobstore init, when bs_write_used_md() didn't
    8796             :          * yet execute.
    8797             :          * When cleanly unloaded, the used md pages will be written out.
    8798             :          * In case of unclean shutdown, loading blobstore will go through recovery path correctly
    8799             :          * filling out the used_clusters with new size and writing it out.
    8800             :          */
    8801           8 :         bs->clean = 0;
    8802             : 
    8803             :         /* Reverting the super->size past this point is complex, avoid any error paths
    8804             :          * that require to do so. */
    8805           8 :         spdk_spin_lock(&bs->used_lock);
    8806             : 
    8807           8 :         total_clusters = ctx->super->size / ctx->super->cluster_size;
    8808             : 
    8809           8 :         assert(total_clusters >= spdk_bit_pool_capacity(bs->used_clusters));
    8810           8 :         spdk_bit_pool_store_mask(bs->used_clusters, ctx->new_used_clusters_mask);
    8811             : 
    8812           8 :         assert(total_clusters == spdk_bit_pool_capacity(ctx->new_used_clusters));
    8813           8 :         spdk_bit_pool_load_mask(ctx->new_used_clusters, ctx->new_used_clusters_mask);
    8814             : 
    8815           8 :         spdk_bit_pool_free(&bs->used_clusters);
    8816           8 :         bs->used_clusters = ctx->new_used_clusters;
    8817             : 
    8818           8 :         bs->total_clusters = total_clusters;
    8819          16 :         bs->total_data_clusters = bs->total_clusters - spdk_divide_round_up(
    8820           8 :                                           bs->md_start + bs->md_len, bs->pages_per_cluster);
    8821             : 
    8822           8 :         bs->num_free_clusters = spdk_bit_pool_count_free(bs->used_clusters);
    8823           8 :         assert(ctx->bs->num_free_clusters <= ctx->bs->total_clusters);
    8824           8 :         spdk_spin_unlock(&bs->used_lock);
    8825             : 
    8826           8 :         bs_grow_live_done(ctx, 0);
    8827             : }
    8828             : 
    8829             : static void
    8830          32 : bs_grow_live_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8831             : {
    8832          32 :         struct spdk_bs_grow_ctx *ctx = cb_arg;
    8833             :         uint64_t dev_size, total_clusters, used_cluster_mask_len, max_used_cluster_mask;
    8834             :         int rc;
    8835             : 
    8836          32 :         if (bserrno != 0) {
    8837           0 :                 bs_grow_live_done(ctx, bserrno);
    8838           0 :                 return;
    8839             :         }
    8840             : 
    8841          32 :         rc = bs_super_validate(ctx->super, ctx->bs);
    8842          32 :         if (rc != 0) {
    8843           4 :                 bs_grow_live_done(ctx, rc);
    8844           4 :                 return;
    8845             :         }
    8846             : 
    8847          28 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    8848          28 :         total_clusters = dev_size / ctx->super->cluster_size;
    8849          28 :         used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    8850          28 :                                 spdk_divide_round_up(total_clusters, 8),
    8851             :                                 SPDK_BS_PAGE_SIZE);
    8852          28 :         max_used_cluster_mask = ctx->super->used_blobid_mask_start - ctx->super->used_cluster_mask_start;
    8853             :         /* Only checking dev_size. Since it can change, but total_clusters remain the same. */
    8854          28 :         if (dev_size == ctx->super->size) {
    8855          16 :                 SPDK_DEBUGLOG(blob, "No need to grow blobstore\n");
    8856          16 :                 bs_grow_live_done(ctx, 0);
    8857          16 :                 return;
    8858             :         }
    8859             :         /*
    8860             :          * Blobstore cannot be shrunk, so check before if:
    8861             :          * - new size of the device is smaller than size in super_block
    8862             :          * - new total number of clusters is smaller than used_clusters bit_pool
    8863             :          * - there is enough space in metadata for used_cluster_mask to be written out
    8864             :          */
    8865          12 :         if (dev_size < ctx->super->size ||
    8866          12 :             total_clusters < spdk_bit_pool_capacity(ctx->bs->used_clusters) ||
    8867             :             used_cluster_mask_len > max_used_cluster_mask) {
    8868           4 :                 SPDK_DEBUGLOG(blob, "No space to grow blobstore\n");
    8869           4 :                 bs_grow_live_done(ctx, -ENOSPC);
    8870           4 :                 return;
    8871             :         }
    8872             : 
    8873           8 :         SPDK_DEBUGLOG(blob, "Resizing blobstore\n");
    8874             : 
    8875           8 :         ctx->new_used_clusters_mask = calloc(1, total_clusters);
    8876           8 :         if (!ctx->new_used_clusters_mask) {
    8877           0 :                 bs_grow_live_done(ctx, -ENOMEM);
    8878           0 :                 return;
    8879             :         }
    8880           8 :         ctx->new_used_clusters = spdk_bit_pool_create(total_clusters);
    8881           8 :         if (!ctx->new_used_clusters) {
    8882           0 :                 bs_grow_live_done(ctx, -ENOMEM);
    8883           0 :                 return;
    8884             :         }
    8885             : 
    8886           8 :         ctx->super->clean = 0;
    8887           8 :         ctx->super->size = dev_size;
    8888           8 :         ctx->super->used_cluster_mask_len = used_cluster_mask_len;
    8889           8 :         bs_write_super(seq, ctx->bs, ctx->super, bs_grow_live_super_write_cpl, ctx);
    8890             : }
    8891             : 
    8892             : void
    8893          32 : spdk_bs_grow_live(struct spdk_blob_store *bs,
    8894             :                   spdk_bs_op_complete cb_fn, void *cb_arg)
    8895             : {
    8896          32 :         struct spdk_bs_cpl      cpl;
    8897             :         struct spdk_bs_grow_ctx *ctx;
    8898             : 
    8899          32 :         assert(spdk_get_thread() == bs->md_thread);
    8900             : 
    8901          32 :         SPDK_DEBUGLOG(blob, "Growing blobstore on dev %p\n", bs->dev);
    8902             : 
    8903          32 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    8904          32 :         cpl.u.bs_basic.cb_fn = cb_fn;
    8905          32 :         cpl.u.bs_basic.cb_arg = cb_arg;
    8906             : 
    8907          32 :         ctx = calloc(1, sizeof(struct spdk_bs_grow_ctx));
    8908          32 :         if (!ctx) {
    8909           0 :                 cb_fn(cb_arg, -ENOMEM);
    8910           0 :                 return;
    8911             :         }
    8912          32 :         ctx->bs = bs;
    8913             : 
    8914          32 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    8915             :                                   SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
    8916          32 :         if (!ctx->super) {
    8917           0 :                 free(ctx);
    8918           0 :                 cb_fn(cb_arg, -ENOMEM);
    8919           0 :                 return;
    8920             :         }
    8921             : 
    8922          32 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    8923          32 :         if (!ctx->seq) {
    8924           0 :                 spdk_free(ctx->super);
    8925           0 :                 free(ctx);
    8926           0 :                 cb_fn(cb_arg, -ENOMEM);
    8927           0 :                 return;
    8928             :         }
    8929             : 
    8930             :         /* Read the super block */
    8931          32 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    8932          32 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    8933             :                              bs_grow_live_load_super_cpl, ctx);
    8934             : }
    8935             : 
    8936             : void
    8937           4 : spdk_bs_grow(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
    8938             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
    8939             : {
    8940           4 :         struct spdk_blob_store  *bs;
    8941           4 :         struct spdk_bs_cpl      cpl;
    8942           4 :         struct spdk_bs_load_ctx *ctx;
    8943           4 :         struct spdk_bs_opts     opts = {};
    8944             :         int err;
    8945             : 
    8946           4 :         SPDK_DEBUGLOG(blob, "Loading blobstore from dev %p\n", dev);
    8947             : 
    8948           4 :         if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
    8949           0 :                 SPDK_DEBUGLOG(blob, "unsupported dev block length of %d\n", dev->blocklen);
    8950           0 :                 dev->destroy(dev);
    8951           0 :                 cb_fn(cb_arg, NULL, -EINVAL);
    8952           0 :                 return;
    8953             :         }
    8954             : 
    8955           4 :         spdk_bs_opts_init(&opts, sizeof(opts));
    8956           4 :         if (o) {
    8957           4 :                 if (bs_opts_copy(o, &opts)) {
    8958           0 :                         return;
    8959             :                 }
    8960             :         }
    8961             : 
    8962           4 :         if (opts.max_md_ops == 0 || opts.max_channel_ops == 0) {
    8963           0 :                 dev->destroy(dev);
    8964           0 :                 cb_fn(cb_arg, NULL, -EINVAL);
    8965           0 :                 return;
    8966             :         }
    8967             : 
    8968           4 :         err = bs_alloc(dev, &opts, &bs, &ctx);
    8969           4 :         if (err) {
    8970           0 :                 dev->destroy(dev);
    8971           0 :                 cb_fn(cb_arg, NULL, err);
    8972           0 :                 return;
    8973             :         }
    8974             : 
    8975           4 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
    8976           4 :         cpl.u.bs_handle.cb_fn = cb_fn;
    8977           4 :         cpl.u.bs_handle.cb_arg = cb_arg;
    8978           4 :         cpl.u.bs_handle.bs = bs;
    8979             : 
    8980           4 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    8981           4 :         if (!ctx->seq) {
    8982           0 :                 spdk_free(ctx->super);
    8983           0 :                 free(ctx);
    8984           0 :                 bs_free(bs);
    8985           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    8986           0 :                 return;
    8987             :         }
    8988             : 
    8989             :         /* Read the super block */
    8990           4 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    8991           4 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    8992             :                              bs_grow_load_super_cpl, ctx);
    8993             : }
    8994             : 
    8995             : int
    8996           0 : spdk_blob_get_esnap_id(struct spdk_blob *blob, const void **id, size_t *len)
    8997             : {
    8998           0 :         if (!blob_is_esnap_clone(blob)) {
    8999           0 :                 return -EINVAL;
    9000             :         }
    9001             : 
    9002           0 :         return blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, id, len, true);
    9003             : }
    9004             : 
    9005             : struct spdk_io_channel *
    9006        4720 : blob_esnap_get_io_channel(struct spdk_io_channel *ch, struct spdk_blob *blob)
    9007             : {
    9008        4720 :         struct spdk_bs_channel          *bs_channel = spdk_io_channel_get_ctx(ch);
    9009        4720 :         struct spdk_bs_dev              *bs_dev = blob->back_bs_dev;
    9010        4720 :         struct blob_esnap_channel       find = {};
    9011             :         struct blob_esnap_channel       *esnap_channel, *existing;
    9012             : 
    9013        4720 :         find.blob_id = blob->id;
    9014        4720 :         esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
    9015        4720 :         if (spdk_likely(esnap_channel != NULL)) {
    9016        4680 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": using cached channel on thread %s\n",
    9017             :                               blob->id, spdk_thread_get_name(spdk_get_thread()));
    9018        4680 :                 return esnap_channel->channel;
    9019             :         }
    9020             : 
    9021          40 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": allocating channel on thread %s\n",
    9022             :                       blob->id, spdk_thread_get_name(spdk_get_thread()));
    9023             : 
    9024          40 :         esnap_channel = calloc(1, sizeof(*esnap_channel));
    9025          40 :         if (esnap_channel == NULL) {
    9026           0 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " channel allocation failed: no memory\n",
    9027             :                                find.blob_id);
    9028           0 :                 return NULL;
    9029             :         }
    9030          40 :         esnap_channel->channel = bs_dev->create_channel(bs_dev);
    9031          40 :         if (esnap_channel->channel == NULL) {
    9032           0 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " back channel allocation failed\n", blob->id);
    9033           0 :                 free(esnap_channel);
    9034           0 :                 return NULL;
    9035             :         }
    9036          40 :         esnap_channel->blob_id = find.blob_id;
    9037          40 :         existing = RB_INSERT(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
    9038          40 :         if (spdk_unlikely(existing != NULL)) {
    9039             :                 /*
    9040             :                  * This should be unreachable: all modifications to this tree happen on this thread.
    9041             :                  */
    9042           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 "lost race to allocate a channel\n", find.blob_id);
    9043           0 :                 assert(false);
    9044             : 
    9045             :                 bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
    9046             :                 free(esnap_channel);
    9047             : 
    9048             :                 return existing->channel;
    9049             :         }
    9050             : 
    9051          40 :         return esnap_channel->channel;
    9052             : }
    9053             : 
    9054             : static int
    9055        4700 : blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2)
    9056             : {
    9057        4700 :         return (c1->blob_id < c2->blob_id ? -1 : c1->blob_id > c2->blob_id);
    9058             : }
    9059             : 
    9060             : struct blob_esnap_destroy_ctx {
    9061             :         spdk_blob_op_with_handle_complete       cb_fn;
    9062             :         void                                    *cb_arg;
    9063             :         struct spdk_blob                        *blob;
    9064             :         struct spdk_bs_dev                      *back_bs_dev;
    9065             :         bool                                    abort_io;
    9066             : };
    9067             : 
    9068             : static void
    9069         104 : blob_esnap_destroy_channels_done(struct spdk_io_channel_iter *i, int status)
    9070             : {
    9071         104 :         struct blob_esnap_destroy_ctx   *ctx = spdk_io_channel_iter_get_ctx(i);
    9072         104 :         struct spdk_blob                *blob = ctx->blob;
    9073         104 :         struct spdk_blob_store          *bs = blob->bs;
    9074             : 
    9075         104 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": done destroying channels for this blob\n",
    9076             :                       blob->id);
    9077             : 
    9078         104 :         if (ctx->cb_fn != NULL) {
    9079          92 :                 ctx->cb_fn(ctx->cb_arg, blob, status);
    9080             :         }
    9081         104 :         free(ctx);
    9082             : 
    9083         104 :         bs->esnap_channels_unloading--;
    9084         104 :         if (bs->esnap_channels_unloading == 0 && bs->esnap_unload_cb_fn != NULL) {
    9085           4 :                 spdk_bs_unload(bs, bs->esnap_unload_cb_fn, bs->esnap_unload_cb_arg);
    9086             :         }
    9087         104 : }
    9088             : 
    9089             : static void
    9090         112 : blob_esnap_destroy_one_channel(struct spdk_io_channel_iter *i)
    9091             : {
    9092         112 :         struct blob_esnap_destroy_ctx   *ctx = spdk_io_channel_iter_get_ctx(i);
    9093         112 :         struct spdk_blob                *blob = ctx->blob;
    9094         112 :         struct spdk_bs_dev              *bs_dev = ctx->back_bs_dev;
    9095         112 :         struct spdk_io_channel          *channel = spdk_io_channel_iter_get_channel(i);
    9096         112 :         struct spdk_bs_channel          *bs_channel = spdk_io_channel_get_ctx(channel);
    9097             :         struct blob_esnap_channel       *esnap_channel;
    9098         112 :         struct blob_esnap_channel       find = {};
    9099             : 
    9100         112 :         assert(spdk_get_thread() == spdk_io_channel_get_thread(channel));
    9101             : 
    9102         112 :         find.blob_id = blob->id;
    9103         112 :         esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
    9104         112 :         if (esnap_channel != NULL) {
    9105          12 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channel on thread %s\n",
    9106             :                               blob->id, spdk_thread_get_name(spdk_get_thread()));
    9107          12 :                 RB_REMOVE(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
    9108             : 
    9109          12 :                 if (ctx->abort_io) {
    9110             :                         spdk_bs_user_op_t *op, *tmp;
    9111             : 
    9112           8 :                         TAILQ_FOREACH_SAFE(op, &bs_channel->queued_io, link, tmp) {
    9113           0 :                                 if (op->back_channel == esnap_channel->channel) {
    9114           0 :                                         TAILQ_REMOVE(&bs_channel->queued_io, op, link);
    9115           0 :                                         bs_user_op_abort(op, -EIO);
    9116             :                                 }
    9117             :                         }
    9118             :                 }
    9119             : 
    9120          12 :                 bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
    9121          12 :                 free(esnap_channel);
    9122             :         }
    9123             : 
    9124         112 :         spdk_for_each_channel_continue(i, 0);
    9125         112 : }
    9126             : 
    9127             : /*
    9128             :  * Destroy the channels for a specific blob on each thread with a blobstore channel. This should be
    9129             :  * used when closing an esnap clone blob and after decoupling from the parent.
    9130             :  */
    9131             : static void
    9132         400 : blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, bool abort_io,
    9133             :                                    spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    9134             : {
    9135             :         struct blob_esnap_destroy_ctx   *ctx;
    9136             : 
    9137         400 :         if (!blob_is_esnap_clone(blob) || blob->back_bs_dev == NULL) {
    9138         296 :                 if (cb_fn != NULL) {
    9139         296 :                         cb_fn(cb_arg, blob, 0);
    9140             :                 }
    9141         296 :                 return;
    9142             :         }
    9143             : 
    9144         104 :         ctx = calloc(1, sizeof(*ctx));
    9145         104 :         if (ctx == NULL) {
    9146           0 :                 if (cb_fn != NULL) {
    9147           0 :                         cb_fn(cb_arg, blob, -ENOMEM);
    9148             :                 }
    9149           0 :                 return;
    9150             :         }
    9151         104 :         ctx->cb_fn = cb_fn;
    9152         104 :         ctx->cb_arg = cb_arg;
    9153         104 :         ctx->blob = blob;
    9154         104 :         ctx->back_bs_dev = blob->back_bs_dev;
    9155         104 :         ctx->abort_io = abort_io;
    9156             : 
    9157         104 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channels for this blob\n",
    9158             :                       blob->id);
    9159             : 
    9160         104 :         blob->bs->esnap_channels_unloading++;
    9161         104 :         spdk_for_each_channel(blob->bs, blob_esnap_destroy_one_channel, ctx,
    9162             :                               blob_esnap_destroy_channels_done);
    9163             : }
    9164             : 
    9165             : /*
    9166             :  * Destroy all bs_dev channels on a specific blobstore channel. This should be used when a
    9167             :  * bs_channel is destroyed.
    9168             :  */
    9169             : static void
    9170         977 : blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch)
    9171             : {
    9172             :         struct blob_esnap_channel *esnap_channel, *esnap_channel_tmp;
    9173             : 
    9174         977 :         assert(spdk_get_thread() == spdk_io_channel_get_thread(spdk_io_channel_from_ctx(ch)));
    9175             : 
    9176         977 :         SPDK_DEBUGLOG(blob_esnap, "destroying channels on thread %s\n",
    9177             :                       spdk_thread_get_name(spdk_get_thread()));
    9178        1005 :         RB_FOREACH_SAFE(esnap_channel, blob_esnap_channel_tree, &ch->esnap_channels,
    9179             :                         esnap_channel_tmp) {
    9180          28 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64
    9181             :                               ": destroying one channel in thread %s\n",
    9182             :                               esnap_channel->blob_id, spdk_thread_get_name(spdk_get_thread()));
    9183          28 :                 RB_REMOVE(blob_esnap_channel_tree, &ch->esnap_channels, esnap_channel);
    9184          28 :                 spdk_put_io_channel(esnap_channel->channel);
    9185          28 :                 free(esnap_channel);
    9186             :         }
    9187         977 :         SPDK_DEBUGLOG(blob_esnap, "done destroying channels on thread %s\n",
    9188             :                       spdk_thread_get_name(spdk_get_thread()));
    9189         977 : }
    9190             : 
    9191             : struct set_bs_dev_ctx {
    9192             :         struct spdk_blob        *blob;
    9193             :         struct spdk_bs_dev      *back_bs_dev;
    9194             :         spdk_blob_op_complete   cb_fn;
    9195             :         void                    *cb_arg;
    9196             :         int                     bserrno;
    9197             : };
    9198             : 
    9199             : static void
    9200           8 : blob_set_back_bs_dev_done(void *_ctx, int bserrno)
    9201             : {
    9202           8 :         struct set_bs_dev_ctx   *ctx = _ctx;
    9203             : 
    9204           8 :         if (bserrno != 0) {
    9205             :                 /* Even though the unfreeze failed, the update may have succeed. */
    9206           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": unfreeze failed with error %d\n", ctx->blob->id,
    9207             :                             bserrno);
    9208             :         }
    9209           8 :         ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
    9210           8 :         free(ctx);
    9211           8 : }
    9212             : 
    9213             : static void
    9214           8 : blob_frozen_set_back_bs_dev(void *_ctx, struct spdk_blob *blob, int bserrno)
    9215             : {
    9216           8 :         struct set_bs_dev_ctx   *ctx = _ctx;
    9217             : 
    9218           8 :         if (bserrno != 0) {
    9219           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to release old back_bs_dev with error %d\n",
    9220             :                             blob->id, bserrno);
    9221           0 :                 ctx->bserrno = bserrno;
    9222           0 :                 blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
    9223           0 :                 return;
    9224             :         }
    9225             : 
    9226           8 :         if (blob->back_bs_dev != NULL) {
    9227           8 :                 blob->back_bs_dev->destroy(blob->back_bs_dev);
    9228             :         }
    9229             : 
    9230           8 :         SPDK_NOTICELOG("blob 0x%" PRIx64 ": hotplugged back_bs_dev\n", blob->id);
    9231           8 :         blob->back_bs_dev = ctx->back_bs_dev;
    9232           8 :         ctx->bserrno = 0;
    9233             : 
    9234           8 :         blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
    9235             : }
    9236             : 
    9237             : static void
    9238           8 : blob_frozen_destroy_esnap_channels(void *_ctx, int bserrno)
    9239             : {
    9240           8 :         struct set_bs_dev_ctx   *ctx = _ctx;
    9241           8 :         struct spdk_blob        *blob = ctx->blob;
    9242             : 
    9243           8 :         if (bserrno != 0) {
    9244           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to freeze with error %d\n", blob->id,
    9245             :                             bserrno);
    9246           0 :                 ctx->cb_fn(ctx->cb_arg, bserrno);
    9247           0 :                 free(ctx);
    9248           0 :                 return;
    9249             :         }
    9250             : 
    9251             :         /*
    9252             :          * This does not prevent future reads from the esnap device because any future IO will
    9253             :          * lazily create a new esnap IO channel.
    9254             :          */
    9255           8 :         blob_esnap_destroy_bs_dev_channels(blob, true, blob_frozen_set_back_bs_dev, ctx);
    9256             : }
    9257             : 
    9258             : void
    9259           8 : spdk_blob_set_esnap_bs_dev(struct spdk_blob *blob, struct spdk_bs_dev *back_bs_dev,
    9260             :                            spdk_blob_op_complete cb_fn, void *cb_arg)
    9261             : {
    9262             :         struct set_bs_dev_ctx   *ctx;
    9263             : 
    9264           8 :         if (!blob_is_esnap_clone(blob)) {
    9265           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": not an esnap clone\n", blob->id);
    9266           0 :                 cb_fn(cb_arg, -EINVAL);
    9267           0 :                 return;
    9268             :         }
    9269             : 
    9270           8 :         ctx = calloc(1, sizeof(*ctx));
    9271           8 :         if (ctx == NULL) {
    9272           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": out of memory while setting back_bs_dev\n",
    9273             :                             blob->id);
    9274           0 :                 cb_fn(cb_arg, -ENOMEM);
    9275           0 :                 return;
    9276             :         }
    9277           8 :         ctx->cb_fn = cb_fn;
    9278           8 :         ctx->cb_arg = cb_arg;
    9279           8 :         ctx->back_bs_dev = back_bs_dev;
    9280           8 :         ctx->blob = blob;
    9281           8 :         blob_freeze_io(blob, blob_frozen_destroy_esnap_channels, ctx);
    9282             : }
    9283             : 
    9284             : struct spdk_bs_dev *
    9285           4 : spdk_blob_get_esnap_bs_dev(const struct spdk_blob *blob)
    9286             : {
    9287           4 :         if (!blob_is_esnap_clone(blob)) {
    9288           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": not an esnap clone\n", blob->id);
    9289           0 :                 return NULL;
    9290             :         }
    9291             : 
    9292           4 :         return blob->back_bs_dev;
    9293             : }
    9294             : 
    9295             : bool
    9296          28 : spdk_blob_is_degraded(const struct spdk_blob *blob)
    9297             : {
    9298          28 :         if (blob->bs->dev->is_degraded != NULL && blob->bs->dev->is_degraded(blob->bs->dev)) {
    9299           4 :                 return true;
    9300             :         }
    9301          24 :         if (blob->back_bs_dev == NULL || blob->back_bs_dev->is_degraded == NULL) {
    9302          12 :                 return false;
    9303             :         }
    9304             : 
    9305          12 :         return blob->back_bs_dev->is_degraded(blob->back_bs_dev);
    9306             : }
    9307             : 
    9308           3 : SPDK_LOG_REGISTER_COMPONENT(blob)
    9309           3 : SPDK_LOG_REGISTER_COMPONENT(blob_esnap)

Generated by: LCOV version 1.15