LCOV - code coverage report
Current view: top level - module/bdev/nvme - bdev_nvme.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 2318 4384 52.9 %
Date: 2024-11-18 15:39:13 Functions: 227 331 68.6 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2016 Intel Corporation. All rights reserved.
       3             :  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
       4             :  *   Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
       5             :  *   Copyright (c) 2022 Dell Inc, or its subsidiaries. All rights reserved.
       6             :  */
       7             : 
       8             : #include "spdk/stdinc.h"
       9             : 
      10             : #include "bdev_nvme.h"
      11             : 
      12             : #include "spdk/accel.h"
      13             : #include "spdk/config.h"
      14             : #include "spdk/endian.h"
      15             : #include "spdk/bdev.h"
      16             : #include "spdk/json.h"
      17             : #include "spdk/keyring.h"
      18             : #include "spdk/likely.h"
      19             : #include "spdk/nvme.h"
      20             : #include "spdk/nvme_ocssd.h"
      21             : #include "spdk/nvme_zns.h"
      22             : #include "spdk/opal.h"
      23             : #include "spdk/thread.h"
      24             : #include "spdk/trace.h"
      25             : #include "spdk/string.h"
      26             : #include "spdk/util.h"
      27             : #include "spdk/uuid.h"
      28             : 
      29             : #include "spdk/bdev_module.h"
      30             : #include "spdk/log.h"
      31             : 
      32             : #include "spdk_internal/usdt.h"
      33             : #include "spdk_internal/trace_defs.h"
      34             : 
      35             : #define SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT true
      36             : #define SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS (10000)
      37             : 
      38             : #define NSID_STR_LEN 10
      39             : 
      40             : #define SPDK_CONTROLLER_NAME_MAX 512
      41             : 
      42             : static int bdev_nvme_config_json(struct spdk_json_write_ctx *w);
      43             : 
      44             : struct nvme_bdev_io {
      45             :         /** array of iovecs to transfer. */
      46             :         struct iovec *iovs;
      47             : 
      48             :         /** Number of iovecs in iovs array. */
      49             :         int iovcnt;
      50             : 
      51             :         /** Current iovec position. */
      52             :         int iovpos;
      53             : 
      54             :         /** Offset in current iovec. */
      55             :         uint32_t iov_offset;
      56             : 
      57             :         /** Offset in current iovec. */
      58             :         uint32_t fused_iov_offset;
      59             : 
      60             :         /** array of iovecs to transfer. */
      61             :         struct iovec *fused_iovs;
      62             : 
      63             :         /** Number of iovecs in iovs array. */
      64             :         int fused_iovcnt;
      65             : 
      66             :         /** Current iovec position. */
      67             :         int fused_iovpos;
      68             : 
      69             :         /** I/O path the current I/O or admin passthrough is submitted on, or the I/O path
      70             :          *  being reset in a reset I/O.
      71             :          */
      72             :         struct nvme_io_path *io_path;
      73             : 
      74             :         /** Saved status for admin passthru completion event, PI error verification, or intermediate compare-and-write status */
      75             :         struct spdk_nvme_cpl cpl;
      76             : 
      77             :         /** Extended IO opts passed by the user to bdev layer and mapped to NVME format */
      78             :         struct spdk_nvme_ns_cmd_ext_io_opts ext_opts;
      79             : 
      80             :         /** Keeps track if first of fused commands was submitted */
      81             :         bool first_fused_submitted;
      82             : 
      83             :         /** Keeps track if first of fused commands was completed */
      84             :         bool first_fused_completed;
      85             : 
      86             :         /* How many times the current I/O was retried. */
      87             :         int32_t retry_count;
      88             : 
      89             :         /** Expiration value in ticks to retry the current I/O. */
      90             :         uint64_t retry_ticks;
      91             : 
      92             :         /** Temporary pointer to zone report buffer */
      93             :         struct spdk_nvme_zns_zone_report *zone_report_buf;
      94             : 
      95             :         /** Keep track of how many zones that have been copied to the spdk_bdev_zone_info struct */
      96             :         uint64_t handled_zones;
      97             : 
      98             :         /* Current tsc at submit time. */
      99             :         uint64_t submit_tsc;
     100             : 
     101             :         /* Used to put nvme_bdev_io into the list */
     102             :         TAILQ_ENTRY(nvme_bdev_io) retry_link;
     103             : };
     104             : 
     105             : struct nvme_probe_skip_entry {
     106             :         struct spdk_nvme_transport_id           trid;
     107             :         TAILQ_ENTRY(nvme_probe_skip_entry)      tailq;
     108             : };
     109             : /* All the controllers deleted by users via RPC are skipped by hotplug monitor */
     110             : static TAILQ_HEAD(, nvme_probe_skip_entry) g_skipped_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER(
     111             :                         g_skipped_nvme_ctrlrs);
     112             : 
     113             : #define BDEV_NVME_DEFAULT_DIGESTS (SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA256) | \
     114             :                                    SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA384) | \
     115             :                                    SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA512))
     116             : 
     117             : #define BDEV_NVME_DEFAULT_DHGROUPS (SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_NULL) | \
     118             :                                     SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_2048) | \
     119             :                                     SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_3072) | \
     120             :                                     SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_4096) | \
     121             :                                     SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_6144) | \
     122             :                                     SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_8192))
     123             : 
     124             : static struct spdk_bdev_nvme_opts g_opts = {
     125             :         .action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE,
     126             :         .timeout_us = 0,
     127             :         .timeout_admin_us = 0,
     128             :         .keep_alive_timeout_ms = SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS,
     129             :         .transport_retry_count = 4,
     130             :         .arbitration_burst = 0,
     131             :         .low_priority_weight = 0,
     132             :         .medium_priority_weight = 0,
     133             :         .high_priority_weight = 0,
     134             :         .nvme_adminq_poll_period_us = 10000ULL,
     135             :         .nvme_ioq_poll_period_us = 0,
     136             :         .io_queue_requests = 0,
     137             :         .delay_cmd_submit = SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT,
     138             :         .bdev_retry_count = 3,
     139             :         .transport_ack_timeout = 0,
     140             :         .ctrlr_loss_timeout_sec = 0,
     141             :         .reconnect_delay_sec = 0,
     142             :         .fast_io_fail_timeout_sec = 0,
     143             :         .disable_auto_failback = false,
     144             :         .generate_uuids = false,
     145             :         .transport_tos = 0,
     146             :         .nvme_error_stat = false,
     147             :         .io_path_stat = false,
     148             :         .allow_accel_sequence = false,
     149             :         .dhchap_digests = BDEV_NVME_DEFAULT_DIGESTS,
     150             :         .dhchap_dhgroups = BDEV_NVME_DEFAULT_DHGROUPS,
     151             : };
     152             : 
     153             : #define NVME_HOTPLUG_POLL_PERIOD_MAX                    10000000ULL
     154             : #define NVME_HOTPLUG_POLL_PERIOD_DEFAULT                100000ULL
     155             : 
     156             : static int g_hot_insert_nvme_controller_index = 0;
     157             : static uint64_t g_nvme_hotplug_poll_period_us = NVME_HOTPLUG_POLL_PERIOD_DEFAULT;
     158             : static bool g_nvme_hotplug_enabled = false;
     159             : struct spdk_thread *g_bdev_nvme_init_thread;
     160             : static struct spdk_poller *g_hotplug_poller;
     161             : static struct spdk_poller *g_hotplug_probe_poller;
     162             : static struct spdk_nvme_probe_ctx *g_hotplug_probe_ctx;
     163             : 
     164             : static void nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr,
     165             :                 struct nvme_async_probe_ctx *ctx);
     166             : static void nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
     167             :                 struct nvme_async_probe_ctx *ctx);
     168             : static int bdev_nvme_library_init(void);
     169             : static void bdev_nvme_library_fini(void);
     170             : static void _bdev_nvme_submit_request(struct nvme_bdev_channel *nbdev_ch,
     171             :                                       struct spdk_bdev_io *bdev_io);
     172             : static void bdev_nvme_submit_request(struct spdk_io_channel *ch,
     173             :                                      struct spdk_bdev_io *bdev_io);
     174             : static int bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     175             :                            void *md, uint64_t lba_count, uint64_t lba,
     176             :                            uint32_t flags, struct spdk_memory_domain *domain, void *domain_ctx,
     177             :                            struct spdk_accel_sequence *seq);
     178             : static int bdev_nvme_no_pi_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     179             :                                  void *md, uint64_t lba_count, uint64_t lba);
     180             : static int bdev_nvme_writev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     181             :                             void *md, uint64_t lba_count, uint64_t lba,
     182             :                             uint32_t flags, struct spdk_memory_domain *domain, void *domain_ctx,
     183             :                             struct spdk_accel_sequence *seq,
     184             :                             union spdk_bdev_nvme_cdw12 cdw12, union spdk_bdev_nvme_cdw13 cdw13);
     185             : static int bdev_nvme_zone_appendv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     186             :                                   void *md, uint64_t lba_count,
     187             :                                   uint64_t zslba, uint32_t flags);
     188             : static int bdev_nvme_comparev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     189             :                               void *md, uint64_t lba_count, uint64_t lba,
     190             :                               uint32_t flags);
     191             : static int bdev_nvme_comparev_and_writev(struct nvme_bdev_io *bio,
     192             :                 struct iovec *cmp_iov, int cmp_iovcnt, struct iovec *write_iov,
     193             :                 int write_iovcnt, void *md, uint64_t lba_count, uint64_t lba,
     194             :                 uint32_t flags);
     195             : static int bdev_nvme_get_zone_info(struct nvme_bdev_io *bio, uint64_t zone_id,
     196             :                                    uint32_t num_zones, struct spdk_bdev_zone_info *info);
     197             : static int bdev_nvme_zone_management(struct nvme_bdev_io *bio, uint64_t zone_id,
     198             :                                      enum spdk_bdev_zone_action action);
     199             : static void bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch,
     200             :                                      struct nvme_bdev_io *bio,
     201             :                                      struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes);
     202             : static int bdev_nvme_io_passthru(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
     203             :                                  void *buf, size_t nbytes);
     204             : static int bdev_nvme_io_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
     205             :                                     void *buf, size_t nbytes, void *md_buf, size_t md_len);
     206             : static int bdev_nvme_iov_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
     207             :                                      struct iovec *iov, int iovcnt, size_t nbytes,
     208             :                                      void *md_buf, size_t md_len);
     209             : static void bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch,
     210             :                             struct nvme_bdev_io *bio, struct nvme_bdev_io *bio_to_abort);
     211             : static void bdev_nvme_reset_io(struct nvme_bdev *nbdev, struct nvme_bdev_io *bio);
     212             : static int bdev_nvme_reset_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
     213             : static int bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
     214             : static void remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr);
     215             : static int nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr);
     216             : 
     217             : static struct nvme_ns *nvme_ns_alloc(void);
     218             : static void nvme_ns_free(struct nvme_ns *ns);
     219             : 
     220             : static int
     221         175 : nvme_ns_cmp(struct nvme_ns *ns1, struct nvme_ns *ns2)
     222             : {
     223         175 :         return ns1->id < ns2->id ? -1 : ns1->id > ns2->id;
     224             : }
     225             : 
     226         914 : RB_GENERATE_STATIC(nvme_ns_tree, nvme_ns, node, nvme_ns_cmp);
     227             : 
     228             : struct spdk_nvme_qpair *
     229           1 : bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch)
     230             : {
     231             :         struct nvme_ctrlr_channel *ctrlr_ch;
     232             : 
     233           1 :         assert(ctrlr_io_ch != NULL);
     234             : 
     235           1 :         ctrlr_ch = spdk_io_channel_get_ctx(ctrlr_io_ch);
     236             : 
     237           1 :         return ctrlr_ch->qpair->qpair;
     238             : }
     239             : 
     240             : static int
     241           0 : bdev_nvme_get_ctx_size(void)
     242             : {
     243           0 :         return sizeof(struct nvme_bdev_io);
     244             : }
     245             : 
     246             : static struct spdk_bdev_module nvme_if = {
     247             :         .name = "nvme",
     248             :         .async_fini = true,
     249             :         .module_init = bdev_nvme_library_init,
     250             :         .module_fini = bdev_nvme_library_fini,
     251             :         .config_json = bdev_nvme_config_json,
     252             :         .get_ctx_size = bdev_nvme_get_ctx_size,
     253             : 
     254             : };
     255           1 : SPDK_BDEV_MODULE_REGISTER(nvme, &nvme_if)
     256             : 
     257             : struct nvme_bdev_ctrlrs g_nvme_bdev_ctrlrs = TAILQ_HEAD_INITIALIZER(g_nvme_bdev_ctrlrs);
     258             : pthread_mutex_t g_bdev_nvme_mutex = PTHREAD_MUTEX_INITIALIZER;
     259             : bool g_bdev_nvme_module_finish;
     260             : 
     261             : struct nvme_bdev_ctrlr *
     262         275 : nvme_bdev_ctrlr_get_by_name(const char *name)
     263             : {
     264             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
     265             : 
     266         275 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
     267         150 :                 if (strcmp(name, nbdev_ctrlr->name) == 0) {
     268         150 :                         break;
     269             :                 }
     270             :         }
     271             : 
     272         275 :         return nbdev_ctrlr;
     273             : }
     274             : 
     275             : static struct nvme_ctrlr *
     276          58 : nvme_bdev_ctrlr_get_ctrlr(struct nvme_bdev_ctrlr *nbdev_ctrlr,
     277             :                           const struct spdk_nvme_transport_id *trid, const char *hostnqn)
     278             : {
     279             :         const struct spdk_nvme_ctrlr_opts *opts;
     280             :         struct nvme_ctrlr *nvme_ctrlr;
     281             : 
     282          99 :         TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
     283          74 :                 opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
     284          74 :                 if (spdk_nvme_transport_id_compare(trid, &nvme_ctrlr->active_path_id->trid) == 0 &&
     285          33 :                     strcmp(hostnqn, opts->hostnqn) == 0) {
     286          33 :                         break;
     287             :                 }
     288             :         }
     289             : 
     290          58 :         return nvme_ctrlr;
     291             : }
     292             : 
     293             : struct nvme_ctrlr *
     294           0 : nvme_bdev_ctrlr_get_ctrlr_by_id(struct nvme_bdev_ctrlr *nbdev_ctrlr,
     295             :                                 uint16_t cntlid)
     296             : {
     297             :         struct nvme_ctrlr *nvme_ctrlr;
     298             :         const struct spdk_nvme_ctrlr_data *cdata;
     299             : 
     300           0 :         TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
     301           0 :                 cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
     302           0 :                 if (cdata->cntlid == cntlid) {
     303           0 :                         break;
     304             :                 }
     305             :         }
     306             : 
     307           0 :         return nvme_ctrlr;
     308             : }
     309             : 
     310             : static struct nvme_bdev *
     311          73 : nvme_bdev_ctrlr_get_bdev(struct nvme_bdev_ctrlr *nbdev_ctrlr, uint32_t nsid)
     312             : {
     313             :         struct nvme_bdev *bdev;
     314             : 
     315          73 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     316         107 :         TAILQ_FOREACH(bdev, &nbdev_ctrlr->bdevs, tailq) {
     317          68 :                 if (bdev->nsid == nsid) {
     318          34 :                         break;
     319             :                 }
     320             :         }
     321          73 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     322             : 
     323          73 :         return bdev;
     324             : }
     325             : 
     326             : struct nvme_ns *
     327         143 : nvme_ctrlr_get_ns(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid)
     328             : {
     329         143 :         struct nvme_ns ns;
     330             : 
     331         143 :         assert(nsid > 0);
     332             : 
     333         143 :         ns.id = nsid;
     334         143 :         return RB_FIND(nvme_ns_tree, &nvme_ctrlr->namespaces, &ns);
     335             : }
     336             : 
     337             : struct nvme_ns *
     338         155 : nvme_ctrlr_get_first_active_ns(struct nvme_ctrlr *nvme_ctrlr)
     339             : {
     340         155 :         return RB_MIN(nvme_ns_tree, &nvme_ctrlr->namespaces);
     341             : }
     342             : 
     343             : struct nvme_ns *
     344          65 : nvme_ctrlr_get_next_active_ns(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *ns)
     345             : {
     346          65 :         if (ns == NULL) {
     347           0 :                 return NULL;
     348             :         }
     349             : 
     350          65 :         return RB_NEXT(nvme_ns_tree, &nvme_ctrlr->namespaces, ns);
     351             : }
     352             : 
     353             : static struct nvme_ctrlr *
     354          52 : nvme_ctrlr_get(const struct spdk_nvme_transport_id *trid, const char *hostnqn)
     355             : {
     356             :         struct nvme_bdev_ctrlr  *nbdev_ctrlr;
     357          52 :         struct nvme_ctrlr       *nvme_ctrlr = NULL;
     358             : 
     359          52 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     360          71 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
     361          19 :                 nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, trid, hostnqn);
     362          19 :                 if (nvme_ctrlr != NULL) {
     363           0 :                         break;
     364             :                 }
     365             :         }
     366          52 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     367             : 
     368          52 :         return nvme_ctrlr;
     369             : }
     370             : 
     371             : struct nvme_ctrlr *
     372          73 : nvme_ctrlr_get_by_name(const char *name)
     373             : {
     374             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
     375          73 :         struct nvme_ctrlr *nvme_ctrlr = NULL;
     376             : 
     377          73 :         if (name == NULL) {
     378           0 :                 return NULL;
     379             :         }
     380             : 
     381          73 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     382          73 :         nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
     383          73 :         if (nbdev_ctrlr != NULL) {
     384          41 :                 nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
     385             :         }
     386          73 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     387             : 
     388          73 :         return nvme_ctrlr;
     389             : }
     390             : 
     391             : void
     392           0 : nvme_bdev_ctrlr_for_each(nvme_bdev_ctrlr_for_each_fn fn, void *ctx)
     393             : {
     394             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
     395             : 
     396           0 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     397           0 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
     398           0 :                 fn(nbdev_ctrlr, ctx);
     399             :         }
     400           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     401           0 : }
     402             : 
     403             : struct nvme_ctrlr_channel_iter {
     404             :         nvme_ctrlr_for_each_channel_msg fn;
     405             :         nvme_ctrlr_for_each_channel_done cpl;
     406             :         struct spdk_io_channel_iter *i;
     407             :         void *ctx;
     408             : };
     409             : 
     410             : void
     411         194 : nvme_ctrlr_for_each_channel_continue(struct nvme_ctrlr_channel_iter *iter, int status)
     412             : {
     413         194 :         spdk_for_each_channel_continue(iter->i, status);
     414         194 : }
     415             : 
     416             : static void
     417         194 : nvme_ctrlr_each_channel_msg(struct spdk_io_channel_iter *i)
     418             : {
     419         194 :         struct nvme_ctrlr_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
     420         194 :         struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
     421         194 :         struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
     422         194 :         struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(ch);
     423             : 
     424         194 :         iter->i = i;
     425         194 :         iter->fn(iter, nvme_ctrlr, ctrlr_ch, iter->ctx);
     426         194 : }
     427             : 
     428             : static void
     429         120 : nvme_ctrlr_each_channel_cpl(struct spdk_io_channel_iter *i, int status)
     430             : {
     431         120 :         struct nvme_ctrlr_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
     432         120 :         struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
     433             : 
     434         120 :         iter->i = i;
     435         120 :         iter->cpl(nvme_ctrlr, iter->ctx, status);
     436             : 
     437         120 :         free(iter);
     438         120 : }
     439             : 
     440             : void
     441         120 : nvme_ctrlr_for_each_channel(struct nvme_ctrlr *nvme_ctrlr,
     442             :                             nvme_ctrlr_for_each_channel_msg fn, void *ctx,
     443             :                             nvme_ctrlr_for_each_channel_done cpl)
     444             : {
     445             :         struct nvme_ctrlr_channel_iter *iter;
     446             : 
     447         120 :         assert(nvme_ctrlr != NULL && fn != NULL);
     448             : 
     449         120 :         iter = calloc(1, sizeof(struct nvme_ctrlr_channel_iter));
     450         120 :         if (iter == NULL) {
     451           0 :                 SPDK_ERRLOG("Unable to allocate iterator\n");
     452           0 :                 assert(false);
     453             :                 return;
     454             :         }
     455             : 
     456         120 :         iter->fn = fn;
     457         120 :         iter->cpl = cpl;
     458         120 :         iter->ctx = ctx;
     459             : 
     460         120 :         spdk_for_each_channel(nvme_ctrlr, nvme_ctrlr_each_channel_msg,
     461             :                               iter, nvme_ctrlr_each_channel_cpl);
     462             : }
     463             : 
     464             : struct nvme_bdev_channel_iter {
     465             :         nvme_bdev_for_each_channel_msg fn;
     466             :         nvme_bdev_for_each_channel_done cpl;
     467             :         struct spdk_io_channel_iter *i;
     468             :         void *ctx;
     469             : };
     470             : 
     471             : void
     472          39 : nvme_bdev_for_each_channel_continue(struct nvme_bdev_channel_iter *iter, int status)
     473             : {
     474          39 :         spdk_for_each_channel_continue(iter->i, status);
     475          39 : }
     476             : 
     477             : static void
     478          39 : nvme_bdev_each_channel_msg(struct spdk_io_channel_iter *i)
     479             : {
     480          39 :         struct nvme_bdev_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
     481          39 :         struct nvme_bdev *nbdev = spdk_io_channel_iter_get_io_device(i);
     482          39 :         struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
     483          39 :         struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
     484             : 
     485          39 :         iter->i = i;
     486          39 :         iter->fn(iter, nbdev, nbdev_ch, iter->ctx);
     487          39 : }
     488             : 
     489             : static void
     490          45 : nvme_bdev_each_channel_cpl(struct spdk_io_channel_iter *i, int status)
     491             : {
     492          45 :         struct nvme_bdev_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
     493          45 :         struct nvme_bdev *nbdev = spdk_io_channel_iter_get_io_device(i);
     494             : 
     495          45 :         iter->i = i;
     496          45 :         iter->cpl(nbdev, iter->ctx, status);
     497             : 
     498          45 :         free(iter);
     499          45 : }
     500             : 
     501             : void
     502          45 : nvme_bdev_for_each_channel(struct nvme_bdev *nbdev,
     503             :                            nvme_bdev_for_each_channel_msg fn, void *ctx,
     504             :                            nvme_bdev_for_each_channel_done cpl)
     505             : {
     506             :         struct nvme_bdev_channel_iter *iter;
     507             : 
     508          45 :         assert(nbdev != NULL && fn != NULL);
     509             : 
     510          45 :         iter = calloc(1, sizeof(struct nvme_bdev_channel_iter));
     511          45 :         if (iter == NULL) {
     512           0 :                 SPDK_ERRLOG("Unable to allocate iterator\n");
     513           0 :                 assert(false);
     514             :                 return;
     515             :         }
     516             : 
     517          45 :         iter->fn = fn;
     518          45 :         iter->cpl = cpl;
     519          45 :         iter->ctx = ctx;
     520             : 
     521          45 :         spdk_for_each_channel(nbdev, nvme_bdev_each_channel_msg, iter,
     522             :                               nvme_bdev_each_channel_cpl);
     523             : }
     524             : 
     525             : void
     526           0 : nvme_bdev_dump_trid_json(const struct spdk_nvme_transport_id *trid, struct spdk_json_write_ctx *w)
     527             : {
     528             :         const char *trtype_str;
     529             :         const char *adrfam_str;
     530             : 
     531           0 :         trtype_str = spdk_nvme_transport_id_trtype_str(trid->trtype);
     532           0 :         if (trtype_str) {
     533           0 :                 spdk_json_write_named_string(w, "trtype", trtype_str);
     534             :         }
     535             : 
     536           0 :         adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
     537           0 :         if (adrfam_str) {
     538           0 :                 spdk_json_write_named_string(w, "adrfam", adrfam_str);
     539             :         }
     540             : 
     541           0 :         if (trid->traddr[0] != '\0') {
     542           0 :                 spdk_json_write_named_string(w, "traddr", trid->traddr);
     543             :         }
     544             : 
     545           0 :         if (trid->trsvcid[0] != '\0') {
     546           0 :                 spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
     547             :         }
     548             : 
     549           0 :         if (trid->subnqn[0] != '\0') {
     550           0 :                 spdk_json_write_named_string(w, "subnqn", trid->subnqn);
     551             :         }
     552           0 : }
     553             : 
     554             : static void
     555          60 : nvme_bdev_ctrlr_delete(struct nvme_bdev_ctrlr *nbdev_ctrlr,
     556             :                        struct nvme_ctrlr *nvme_ctrlr)
     557             : {
     558             :         SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_delete, nvme_ctrlr->nbdev_ctrlr->name);
     559          60 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     560             : 
     561          60 :         TAILQ_REMOVE(&nbdev_ctrlr->ctrlrs, nvme_ctrlr, tailq);
     562          60 :         if (!TAILQ_EMPTY(&nbdev_ctrlr->ctrlrs)) {
     563          15 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
     564             : 
     565          15 :                 return;
     566             :         }
     567          45 :         TAILQ_REMOVE(&g_nvme_bdev_ctrlrs, nbdev_ctrlr, tailq);
     568             : 
     569          45 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     570             : 
     571          45 :         assert(TAILQ_EMPTY(&nbdev_ctrlr->bdevs));
     572             : 
     573          45 :         free(nbdev_ctrlr->name);
     574          45 :         free(nbdev_ctrlr);
     575             : }
     576             : 
     577             : static void
     578          61 : _nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
     579             : {
     580             :         struct nvme_path_id *path_id, *tmp_path;
     581             :         struct nvme_ns *ns, *tmp_ns;
     582             : 
     583          61 :         free(nvme_ctrlr->copied_ana_desc);
     584          61 :         spdk_free(nvme_ctrlr->ana_log_page);
     585             : 
     586          61 :         if (nvme_ctrlr->opal_dev) {
     587           0 :                 spdk_opal_dev_destruct(nvme_ctrlr->opal_dev);
     588           0 :                 nvme_ctrlr->opal_dev = NULL;
     589             :         }
     590             : 
     591          61 :         if (nvme_ctrlr->nbdev_ctrlr) {
     592          60 :                 nvme_bdev_ctrlr_delete(nvme_ctrlr->nbdev_ctrlr, nvme_ctrlr);
     593             :         }
     594             : 
     595          61 :         RB_FOREACH_SAFE(ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp_ns) {
     596           0 :                 RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, ns);
     597           0 :                 nvme_ns_free(ns);
     598             :         }
     599             : 
     600         122 :         TAILQ_FOREACH_SAFE(path_id, &nvme_ctrlr->trids, link, tmp_path) {
     601          61 :                 TAILQ_REMOVE(&nvme_ctrlr->trids, path_id, link);
     602          61 :                 free(path_id);
     603             :         }
     604             : 
     605          61 :         pthread_mutex_destroy(&nvme_ctrlr->mutex);
     606          61 :         spdk_keyring_put_key(nvme_ctrlr->psk);
     607          61 :         spdk_keyring_put_key(nvme_ctrlr->dhchap_key);
     608          61 :         spdk_keyring_put_key(nvme_ctrlr->dhchap_ctrlr_key);
     609          61 :         free(nvme_ctrlr);
     610             : 
     611          61 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     612          61 :         if (g_bdev_nvme_module_finish && TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
     613           0 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
     614           0 :                 spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
     615           0 :                 spdk_bdev_module_fini_done();
     616           0 :                 return;
     617             :         }
     618          61 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     619             : }
     620             : 
     621             : static int
     622          61 : nvme_detach_poller(void *arg)
     623             : {
     624          61 :         struct nvme_ctrlr *nvme_ctrlr = arg;
     625             :         int rc;
     626             : 
     627          61 :         rc = spdk_nvme_detach_poll_async(nvme_ctrlr->detach_ctx);
     628          61 :         if (rc != -EAGAIN) {
     629          61 :                 spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
     630          61 :                 _nvme_ctrlr_delete(nvme_ctrlr);
     631             :         }
     632             : 
     633          61 :         return SPDK_POLLER_BUSY;
     634             : }
     635             : 
     636             : static void
     637          61 : nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
     638             : {
     639             :         int rc;
     640             : 
     641          61 :         spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
     642             : 
     643             :         /* First, unregister the adminq poller, as the driver will poll adminq if necessary */
     644          61 :         spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
     645             : 
     646             :         /* If we got here, the reset/detach poller cannot be active */
     647          61 :         assert(nvme_ctrlr->reset_detach_poller == NULL);
     648          61 :         nvme_ctrlr->reset_detach_poller = SPDK_POLLER_REGISTER(nvme_detach_poller,
     649             :                                           nvme_ctrlr, 1000);
     650          61 :         if (nvme_ctrlr->reset_detach_poller == NULL) {
     651           0 :                 SPDK_ERRLOG("Failed to register detach poller\n");
     652           0 :                 goto error;
     653             :         }
     654             : 
     655          61 :         rc = spdk_nvme_detach_async(nvme_ctrlr->ctrlr, &nvme_ctrlr->detach_ctx);
     656          61 :         if (rc != 0) {
     657           0 :                 SPDK_ERRLOG("Failed to detach the NVMe controller\n");
     658           0 :                 goto error;
     659             :         }
     660             : 
     661          61 :         return;
     662           0 : error:
     663             :         /* We don't have a good way to handle errors here, so just do what we can and delete the
     664             :          * controller without detaching the underlying NVMe device.
     665             :          */
     666           0 :         spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
     667           0 :         _nvme_ctrlr_delete(nvme_ctrlr);
     668             : }
     669             : 
     670             : static void
     671          60 : nvme_ctrlr_unregister_cb(void *io_device)
     672             : {
     673          60 :         struct nvme_ctrlr *nvme_ctrlr = io_device;
     674             : 
     675          60 :         nvme_ctrlr_delete(nvme_ctrlr);
     676          60 : }
     677             : 
     678             : static void
     679          60 : nvme_ctrlr_unregister(void *ctx)
     680             : {
     681          60 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
     682             : 
     683          60 :         spdk_io_device_unregister(nvme_ctrlr, nvme_ctrlr_unregister_cb);
     684          60 : }
     685             : 
     686             : static bool
     687         225 : nvme_ctrlr_can_be_unregistered(struct nvme_ctrlr *nvme_ctrlr)
     688             : {
     689         225 :         if (!nvme_ctrlr->destruct) {
     690         109 :                 return false;
     691             :         }
     692             : 
     693         116 :         if (nvme_ctrlr->ref > 0) {
     694          56 :                 return false;
     695             :         }
     696             : 
     697          60 :         if (nvme_ctrlr->resetting) {
     698           0 :                 return false;
     699             :         }
     700             : 
     701          60 :         if (nvme_ctrlr->ana_log_page_updating) {
     702           0 :                 return false;
     703             :         }
     704             : 
     705          60 :         if (nvme_ctrlr->io_path_cache_clearing) {
     706           0 :                 return false;
     707             :         }
     708             : 
     709          60 :         return true;
     710             : }
     711             : 
     712             : static void
     713         168 : nvme_ctrlr_release(struct nvme_ctrlr *nvme_ctrlr)
     714             : {
     715         168 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
     716             :         SPDK_DTRACE_PROBE2(bdev_nvme_ctrlr_release, nvme_ctrlr->nbdev_ctrlr->name, nvme_ctrlr->ref);
     717             : 
     718         168 :         assert(nvme_ctrlr->ref > 0);
     719         168 :         nvme_ctrlr->ref--;
     720             : 
     721         168 :         if (!nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
     722         108 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
     723         108 :                 return;
     724             :         }
     725             : 
     726          60 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
     727             : 
     728          60 :         spdk_thread_exec_msg(nvme_ctrlr->thread, nvme_ctrlr_unregister, nvme_ctrlr);
     729             : }
     730             : 
     731             : static void
     732         171 : bdev_nvme_clear_current_io_path(struct nvme_bdev_channel *nbdev_ch)
     733             : {
     734         171 :         nbdev_ch->current_io_path = NULL;
     735         171 :         nbdev_ch->rr_counter = 0;
     736         171 : }
     737             : 
     738             : static struct nvme_io_path *
     739           8 : _bdev_nvme_get_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
     740             : {
     741             :         struct nvme_io_path *io_path;
     742             : 
     743          16 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
     744          15 :                 if (io_path->nvme_ns == nvme_ns) {
     745           7 :                         break;
     746             :                 }
     747             :         }
     748             : 
     749           8 :         return io_path;
     750             : }
     751             : 
     752             : static struct nvme_io_path *
     753          37 : nvme_io_path_alloc(void)
     754             : {
     755             :         struct nvme_io_path *io_path;
     756             : 
     757          37 :         io_path = calloc(1, sizeof(*io_path));
     758          37 :         if (io_path == NULL) {
     759           0 :                 SPDK_ERRLOG("Failed to alloc io_path.\n");
     760           0 :                 return NULL;
     761             :         }
     762             : 
     763          37 :         if (g_opts.io_path_stat) {
     764           0 :                 io_path->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
     765           0 :                 if (io_path->stat == NULL) {
     766           0 :                         free(io_path);
     767           0 :                         SPDK_ERRLOG("Failed to alloc io_path stat.\n");
     768           0 :                         return NULL;
     769             :                 }
     770           0 :                 spdk_bdev_reset_io_stat(io_path->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
     771             :         }
     772             : 
     773          37 :         return io_path;
     774             : }
     775             : 
     776             : static void
     777          37 : nvme_io_path_free(struct nvme_io_path *io_path)
     778             : {
     779          37 :         free(io_path->stat);
     780          37 :         free(io_path);
     781          37 : }
     782             : 
     783             : static int
     784          37 : _bdev_nvme_add_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
     785             : {
     786             :         struct nvme_io_path *io_path;
     787             :         struct spdk_io_channel *ch;
     788             :         struct nvme_ctrlr_channel *ctrlr_ch;
     789             :         struct nvme_qpair *nvme_qpair;
     790             : 
     791          37 :         io_path = nvme_io_path_alloc();
     792          37 :         if (io_path == NULL) {
     793           0 :                 return -ENOMEM;
     794             :         }
     795             : 
     796          37 :         io_path->nvme_ns = nvme_ns;
     797             : 
     798          37 :         ch = spdk_get_io_channel(nvme_ns->ctrlr);
     799          37 :         if (ch == NULL) {
     800           0 :                 nvme_io_path_free(io_path);
     801           0 :                 SPDK_ERRLOG("Failed to alloc io_channel.\n");
     802           0 :                 return -ENOMEM;
     803             :         }
     804             : 
     805          37 :         ctrlr_ch = spdk_io_channel_get_ctx(ch);
     806             : 
     807          37 :         nvme_qpair = ctrlr_ch->qpair;
     808          37 :         assert(nvme_qpair != NULL);
     809             : 
     810          37 :         io_path->qpair = nvme_qpair;
     811          37 :         TAILQ_INSERT_TAIL(&nvme_qpair->io_path_list, io_path, tailq);
     812             : 
     813          37 :         io_path->nbdev_ch = nbdev_ch;
     814          37 :         STAILQ_INSERT_TAIL(&nbdev_ch->io_path_list, io_path, stailq);
     815             : 
     816          37 :         bdev_nvme_clear_current_io_path(nbdev_ch);
     817             : 
     818          37 :         return 0;
     819             : }
     820             : 
     821             : static void
     822          37 : bdev_nvme_clear_retry_io_path(struct nvme_bdev_channel *nbdev_ch,
     823             :                               struct nvme_io_path *io_path)
     824             : {
     825             :         struct nvme_bdev_io *bio;
     826             : 
     827          38 :         TAILQ_FOREACH(bio, &nbdev_ch->retry_io_list, retry_link) {
     828           1 :                 if (bio->io_path == io_path) {
     829           1 :                         bio->io_path = NULL;
     830             :                 }
     831             :         }
     832          37 : }
     833             : 
     834             : static void
     835          37 : _bdev_nvme_delete_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_path *io_path)
     836             : {
     837             :         struct spdk_io_channel *ch;
     838             :         struct nvme_qpair *nvme_qpair;
     839             :         struct nvme_ctrlr_channel *ctrlr_ch;
     840             :         struct nvme_bdev *nbdev;
     841             : 
     842          37 :         nbdev = spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(nbdev_ch));
     843             : 
     844             :         /* Add the statistics to nvme_ns before this path is destroyed. */
     845          37 :         pthread_mutex_lock(&nbdev->mutex);
     846          37 :         if (nbdev->ref != 0 && io_path->nvme_ns->stat != NULL && io_path->stat != NULL) {
     847           0 :                 spdk_bdev_add_io_stat(io_path->nvme_ns->stat, io_path->stat);
     848             :         }
     849          37 :         pthread_mutex_unlock(&nbdev->mutex);
     850             : 
     851          37 :         bdev_nvme_clear_current_io_path(nbdev_ch);
     852          37 :         bdev_nvme_clear_retry_io_path(nbdev_ch, io_path);
     853             : 
     854          37 :         STAILQ_REMOVE(&nbdev_ch->io_path_list, io_path, nvme_io_path, stailq);
     855          37 :         io_path->nbdev_ch = NULL;
     856             : 
     857          37 :         nvme_qpair = io_path->qpair;
     858          37 :         assert(nvme_qpair != NULL);
     859             : 
     860          37 :         ctrlr_ch = nvme_qpair->ctrlr_ch;
     861          37 :         assert(ctrlr_ch != NULL);
     862             : 
     863          37 :         ch = spdk_io_channel_from_ctx(ctrlr_ch);
     864          37 :         spdk_put_io_channel(ch);
     865             : 
     866             :         /* After an io_path is removed, I/Os submitted to it may complete and update statistics
     867             :          * of the io_path. To avoid heap-use-after-free error from this case, do not free the
     868             :          * io_path here but free the io_path when the associated qpair is freed. It is ensured
     869             :          * that all I/Os submitted to the io_path are completed when the associated qpair is freed.
     870             :          */
     871          37 : }
     872             : 
     873             : static void
     874          24 : _bdev_nvme_delete_io_paths(struct nvme_bdev_channel *nbdev_ch)
     875             : {
     876             :         struct nvme_io_path *io_path, *tmp_io_path;
     877             : 
     878          59 :         STAILQ_FOREACH_SAFE(io_path, &nbdev_ch->io_path_list, stailq, tmp_io_path) {
     879          35 :                 _bdev_nvme_delete_io_path(nbdev_ch, io_path);
     880             :         }
     881          24 : }
     882             : 
     883             : static int
     884          24 : bdev_nvme_create_bdev_channel_cb(void *io_device, void *ctx_buf)
     885             : {
     886          24 :         struct nvme_bdev_channel *nbdev_ch = ctx_buf;
     887          24 :         struct nvme_bdev *nbdev = io_device;
     888             :         struct nvme_ns *nvme_ns;
     889             :         int rc;
     890             : 
     891          24 :         STAILQ_INIT(&nbdev_ch->io_path_list);
     892          24 :         TAILQ_INIT(&nbdev_ch->retry_io_list);
     893             : 
     894          24 :         pthread_mutex_lock(&nbdev->mutex);
     895             : 
     896          24 :         nbdev_ch->mp_policy = nbdev->mp_policy;
     897          24 :         nbdev_ch->mp_selector = nbdev->mp_selector;
     898          24 :         nbdev_ch->rr_min_io = nbdev->rr_min_io;
     899             : 
     900          59 :         TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
     901          35 :                 rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns);
     902          35 :                 if (rc != 0) {
     903           0 :                         pthread_mutex_unlock(&nbdev->mutex);
     904             : 
     905           0 :                         _bdev_nvme_delete_io_paths(nbdev_ch);
     906           0 :                         return rc;
     907             :                 }
     908             :         }
     909          24 :         pthread_mutex_unlock(&nbdev->mutex);
     910             : 
     911          24 :         return 0;
     912             : }
     913             : 
     914             : /* If cpl != NULL, complete the bdev_io with nvme status based on 'cpl'.
     915             :  * If cpl == NULL, complete the bdev_io with bdev status based on 'status'.
     916             :  */
     917             : static inline void
     918          50 : __bdev_nvme_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status,
     919             :                         const struct spdk_nvme_cpl *cpl)
     920             : {
     921          50 :         spdk_trace_record(TRACE_BDEV_NVME_IO_DONE, 0, 0, (uintptr_t)bdev_io->driver_ctx,
     922             :                           (uintptr_t)bdev_io);
     923          50 :         if (cpl) {
     924          29 :                 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc);
     925             :         } else {
     926          21 :                 spdk_bdev_io_complete(bdev_io, status);
     927             :         }
     928          50 : }
     929             : 
     930             : static void bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch);
     931             : 
     932             : static void
     933          24 : bdev_nvme_destroy_bdev_channel_cb(void *io_device, void *ctx_buf)
     934             : {
     935          24 :         struct nvme_bdev_channel *nbdev_ch = ctx_buf;
     936             : 
     937          24 :         bdev_nvme_abort_retry_ios(nbdev_ch);
     938          24 :         _bdev_nvme_delete_io_paths(nbdev_ch);
     939          24 : }
     940             : 
     941             : static inline bool
     942          62 : bdev_nvme_io_type_is_admin(enum spdk_bdev_io_type io_type)
     943             : {
     944          62 :         switch (io_type) {
     945           5 :         case SPDK_BDEV_IO_TYPE_RESET:
     946             :         case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
     947             :         case SPDK_BDEV_IO_TYPE_ABORT:
     948           5 :                 return true;
     949          57 :         default:
     950          57 :                 break;
     951             :         }
     952             : 
     953          57 :         return false;
     954             : }
     955             : 
     956             : static inline bool
     957          91 : nvme_ns_is_active(struct nvme_ns *nvme_ns)
     958             : {
     959          91 :         if (spdk_unlikely(nvme_ns->ana_state_updating)) {
     960           1 :                 return false;
     961             :         }
     962             : 
     963          90 :         if (spdk_unlikely(nvme_ns->ns == NULL)) {
     964           0 :                 return false;
     965             :         }
     966             : 
     967          90 :         return true;
     968             : }
     969             : 
     970             : static inline bool
     971          79 : nvme_ns_is_accessible(struct nvme_ns *nvme_ns)
     972             : {
     973          79 :         if (spdk_unlikely(!nvme_ns_is_active(nvme_ns))) {
     974           1 :                 return false;
     975             :         }
     976             : 
     977          78 :         switch (nvme_ns->ana_state) {
     978          69 :         case SPDK_NVME_ANA_OPTIMIZED_STATE:
     979             :         case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
     980          69 :                 return true;
     981           9 :         default:
     982           9 :                 break;
     983             :         }
     984             : 
     985           9 :         return false;
     986             : }
     987             : 
     988             : static inline bool
     989         121 : nvme_qpair_is_connected(struct nvme_qpair *nvme_qpair)
     990             : {
     991         121 :         if (spdk_unlikely(nvme_qpair->qpair == NULL)) {
     992          23 :                 return false;
     993             :         }
     994             : 
     995          98 :         if (spdk_unlikely(spdk_nvme_qpair_get_failure_reason(nvme_qpair->qpair) !=
     996             :                           SPDK_NVME_QPAIR_FAILURE_NONE)) {
     997           2 :                 return false;
     998             :         }
     999             : 
    1000          96 :         if (spdk_unlikely(nvme_qpair->ctrlr_ch->reset_iter != NULL)) {
    1001           0 :                 return false;
    1002             :         }
    1003             : 
    1004          96 :         return true;
    1005             : }
    1006             : 
    1007             : static inline bool
    1008          95 : nvme_io_path_is_available(struct nvme_io_path *io_path)
    1009             : {
    1010          95 :         if (spdk_unlikely(!nvme_qpair_is_connected(io_path->qpair))) {
    1011          16 :                 return false;
    1012             :         }
    1013             : 
    1014          79 :         if (spdk_unlikely(!nvme_ns_is_accessible(io_path->nvme_ns))) {
    1015          10 :                 return false;
    1016             :         }
    1017             : 
    1018          69 :         return true;
    1019             : }
    1020             : 
    1021             : static inline bool
    1022           9 : nvme_ctrlr_is_failed(struct nvme_ctrlr *nvme_ctrlr)
    1023             : {
    1024           9 :         if (nvme_ctrlr->destruct) {
    1025           0 :                 return true;
    1026             :         }
    1027             : 
    1028           9 :         if (nvme_ctrlr->fast_io_fail_timedout) {
    1029           2 :                 return true;
    1030             :         }
    1031             : 
    1032           7 :         if (nvme_ctrlr->resetting) {
    1033           5 :                 if (nvme_ctrlr->opts.reconnect_delay_sec != 0) {
    1034           5 :                         return false;
    1035             :                 } else {
    1036           0 :                         return true;
    1037             :                 }
    1038             :         }
    1039             : 
    1040           2 :         if (nvme_ctrlr->reconnect_is_delayed) {
    1041           2 :                 return false;
    1042             :         }
    1043             : 
    1044           0 :         if (nvme_ctrlr->disabled) {
    1045           0 :                 return true;
    1046             :         }
    1047             : 
    1048           0 :         if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
    1049           0 :                 return true;
    1050             :         } else {
    1051           0 :                 return false;
    1052             :         }
    1053             : }
    1054             : 
    1055             : static bool
    1056          20 : nvme_ctrlr_is_available(struct nvme_ctrlr *nvme_ctrlr)
    1057             : {
    1058          20 :         if (nvme_ctrlr->destruct) {
    1059           0 :                 return false;
    1060             :         }
    1061             : 
    1062          20 :         if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
    1063           3 :                 return false;
    1064             :         }
    1065             : 
    1066          17 :         if (nvme_ctrlr->resetting || nvme_ctrlr->reconnect_is_delayed) {
    1067           1 :                 return false;
    1068             :         }
    1069             : 
    1070          16 :         if (nvme_ctrlr->disabled) {
    1071           0 :                 return false;
    1072             :         }
    1073             : 
    1074          16 :         return true;
    1075             : }
    1076             : 
    1077             : /* Simulate circular linked list. */
    1078             : static inline struct nvme_io_path *
    1079          92 : nvme_io_path_get_next(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_path *prev_path)
    1080             : {
    1081             :         struct nvme_io_path *next_path;
    1082             : 
    1083          92 :         if (prev_path != NULL) {
    1084          39 :                 next_path = STAILQ_NEXT(prev_path, stailq);
    1085          39 :                 if (next_path != NULL) {
    1086          14 :                         return next_path;
    1087             :                 }
    1088             :         }
    1089             : 
    1090          78 :         return STAILQ_FIRST(&nbdev_ch->io_path_list);
    1091             : }
    1092             : 
    1093             : static struct nvme_io_path *
    1094          60 : _bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
    1095             : {
    1096          60 :         struct nvme_io_path *io_path, *start, *non_optimized = NULL;
    1097             : 
    1098          60 :         start = nvme_io_path_get_next(nbdev_ch, nbdev_ch->current_io_path);
    1099             : 
    1100          60 :         io_path = start;
    1101             :         do {
    1102          72 :                 if (spdk_likely(nvme_io_path_is_available(io_path))) {
    1103          50 :                         switch (io_path->nvme_ns->ana_state) {
    1104          40 :                         case SPDK_NVME_ANA_OPTIMIZED_STATE:
    1105          40 :                                 nbdev_ch->current_io_path = io_path;
    1106          40 :                                 return io_path;
    1107          10 :                         case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
    1108          10 :                                 if (non_optimized == NULL) {
    1109           7 :                                         non_optimized = io_path;
    1110             :                                 }
    1111          10 :                                 break;
    1112           0 :                         default:
    1113           0 :                                 assert(false);
    1114             :                                 break;
    1115             :                         }
    1116             :                 }
    1117          32 :                 io_path = nvme_io_path_get_next(nbdev_ch, io_path);
    1118          32 :         } while (io_path != start);
    1119             : 
    1120          20 :         if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
    1121             :                 /* We come here only if there is no optimized path. Cache even non_optimized
    1122             :                  * path for load balance across multiple non_optimized paths.
    1123             :                  */
    1124           1 :                 nbdev_ch->current_io_path = non_optimized;
    1125             :         }
    1126             : 
    1127          20 :         return non_optimized;
    1128             : }
    1129             : 
    1130             : static struct nvme_io_path *
    1131           4 : _bdev_nvme_find_io_path_min_qd(struct nvme_bdev_channel *nbdev_ch)
    1132             : {
    1133             :         struct nvme_io_path *io_path;
    1134           4 :         struct nvme_io_path *optimized = NULL, *non_optimized = NULL;
    1135           4 :         uint32_t opt_min_qd = UINT32_MAX, non_opt_min_qd = UINT32_MAX;
    1136             :         uint32_t num_outstanding_reqs;
    1137             : 
    1138          16 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    1139          12 :                 if (spdk_unlikely(!nvme_qpair_is_connected(io_path->qpair))) {
    1140             :                         /* The device is currently resetting. */
    1141           0 :                         continue;
    1142             :                 }
    1143             : 
    1144          12 :                 if (spdk_unlikely(!nvme_ns_is_active(io_path->nvme_ns))) {
    1145           0 :                         continue;
    1146             :                 }
    1147             : 
    1148          12 :                 num_outstanding_reqs = spdk_nvme_qpair_get_num_outstanding_reqs(io_path->qpair->qpair);
    1149          12 :                 switch (io_path->nvme_ns->ana_state) {
    1150           6 :                 case SPDK_NVME_ANA_OPTIMIZED_STATE:
    1151           6 :                         if (num_outstanding_reqs < opt_min_qd) {
    1152           5 :                                 opt_min_qd = num_outstanding_reqs;
    1153           5 :                                 optimized = io_path;
    1154             :                         }
    1155           6 :                         break;
    1156           3 :                 case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
    1157           3 :                         if (num_outstanding_reqs < non_opt_min_qd) {
    1158           3 :                                 non_opt_min_qd = num_outstanding_reqs;
    1159           3 :                                 non_optimized = io_path;
    1160             :                         }
    1161           3 :                         break;
    1162           3 :                 default:
    1163           3 :                         break;
    1164             :                 }
    1165             :         }
    1166             : 
    1167             :         /* don't cache io path for BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH selector */
    1168           4 :         if (optimized != NULL) {
    1169           3 :                 return optimized;
    1170             :         }
    1171             : 
    1172           1 :         return non_optimized;
    1173             : }
    1174             : 
    1175             : static inline struct nvme_io_path *
    1176          98 : bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
    1177             : {
    1178          98 :         if (spdk_likely(nbdev_ch->current_io_path != NULL)) {
    1179          41 :                 if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE) {
    1180          31 :                         return nbdev_ch->current_io_path;
    1181          10 :                 } else if (nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
    1182          10 :                         if (++nbdev_ch->rr_counter < nbdev_ch->rr_min_io) {
    1183           3 :                                 return nbdev_ch->current_io_path;
    1184             :                         }
    1185           7 :                         nbdev_ch->rr_counter = 0;
    1186             :                 }
    1187             :         }
    1188             : 
    1189          64 :         if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE ||
    1190          14 :             nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
    1191          60 :                 return _bdev_nvme_find_io_path(nbdev_ch);
    1192             :         } else {
    1193           4 :                 return _bdev_nvme_find_io_path_min_qd(nbdev_ch);
    1194             :         }
    1195             : }
    1196             : 
    1197             : /* Return true if there is any io_path whose qpair is active or ctrlr is not failed,
    1198             :  * or false otherwise.
    1199             :  *
    1200             :  * If any io_path has an active qpair but find_io_path() returned NULL, its namespace
    1201             :  * is likely to be non-accessible now but may become accessible.
    1202             :  *
    1203             :  * If any io_path has an unfailed ctrlr but find_io_path() returned NULL, the ctrlr
    1204             :  * is likely to be resetting now but the reset may succeed. A ctrlr is set to unfailed
    1205             :  * when starting to reset it but it is set to failed when the reset failed. Hence, if
    1206             :  * a ctrlr is unfailed, it is likely that it works fine or is resetting.
    1207             :  */
    1208             : static bool
    1209          15 : any_io_path_may_become_available(struct nvme_bdev_channel *nbdev_ch)
    1210             : {
    1211             :         struct nvme_io_path *io_path;
    1212             : 
    1213          15 :         if (nbdev_ch->resetting) {
    1214           1 :                 return false;
    1215             :         }
    1216             : 
    1217          16 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    1218          14 :                 if (io_path->nvme_ns->ana_transition_timedout) {
    1219           0 :                         continue;
    1220             :                 }
    1221             : 
    1222          14 :                 if (nvme_qpair_is_connected(io_path->qpair) ||
    1223           9 :                     !nvme_ctrlr_is_failed(io_path->qpair->ctrlr)) {
    1224          12 :                         return true;
    1225             :                 }
    1226             :         }
    1227             : 
    1228           2 :         return false;
    1229             : }
    1230             : 
    1231             : static void
    1232          14 : bdev_nvme_retry_io(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io)
    1233             : {
    1234          14 :         struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
    1235             :         struct spdk_io_channel *ch;
    1236             : 
    1237          14 :         if (nbdev_io->io_path != NULL && nvme_io_path_is_available(nbdev_io->io_path)) {
    1238           3 :                 _bdev_nvme_submit_request(nbdev_ch, bdev_io);
    1239             :         } else {
    1240          11 :                 ch = spdk_io_channel_from_ctx(nbdev_ch);
    1241          11 :                 bdev_nvme_submit_request(ch, bdev_io);
    1242             :         }
    1243          14 : }
    1244             : 
    1245             : static int
    1246          14 : bdev_nvme_retry_ios(void *arg)
    1247             : {
    1248          14 :         struct nvme_bdev_channel *nbdev_ch = arg;
    1249             :         struct nvme_bdev_io *bio, *tmp_bio;
    1250             :         uint64_t now, delay_us;
    1251             : 
    1252          14 :         now = spdk_get_ticks();
    1253             : 
    1254          28 :         TAILQ_FOREACH_SAFE(bio, &nbdev_ch->retry_io_list, retry_link, tmp_bio) {
    1255          15 :                 if (bio->retry_ticks > now) {
    1256           1 :                         break;
    1257             :                 }
    1258             : 
    1259          14 :                 TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);
    1260             : 
    1261          14 :                 bdev_nvme_retry_io(nbdev_ch, spdk_bdev_io_from_ctx(bio));
    1262             :         }
    1263             : 
    1264          14 :         spdk_poller_unregister(&nbdev_ch->retry_io_poller);
    1265             : 
    1266          14 :         bio = TAILQ_FIRST(&nbdev_ch->retry_io_list);
    1267          14 :         if (bio != NULL) {
    1268           4 :                 delay_us = (bio->retry_ticks - now) * SPDK_SEC_TO_USEC / spdk_get_ticks_hz();
    1269             : 
    1270           4 :                 nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
    1271             :                                             delay_us);
    1272             :         }
    1273             : 
    1274          14 :         return SPDK_POLLER_BUSY;
    1275             : }
    1276             : 
    1277             : static void
    1278          16 : bdev_nvme_queue_retry_io(struct nvme_bdev_channel *nbdev_ch,
    1279             :                          struct nvme_bdev_io *bio, uint64_t delay_ms)
    1280             : {
    1281             :         struct nvme_bdev_io *tmp_bio;
    1282             : 
    1283          16 :         bio->retry_ticks = spdk_get_ticks() + delay_ms * spdk_get_ticks_hz() / 1000ULL;
    1284             : 
    1285          16 :         TAILQ_FOREACH_REVERSE(tmp_bio, &nbdev_ch->retry_io_list, retry_io_head, retry_link) {
    1286           1 :                 if (tmp_bio->retry_ticks <= bio->retry_ticks) {
    1287           1 :                         TAILQ_INSERT_AFTER(&nbdev_ch->retry_io_list, tmp_bio, bio,
    1288             :                                            retry_link);
    1289           1 :                         return;
    1290             :                 }
    1291             :         }
    1292             : 
    1293             :         /* No earlier I/Os were found. This I/O must be the new head. */
    1294          15 :         TAILQ_INSERT_HEAD(&nbdev_ch->retry_io_list, bio, retry_link);
    1295             : 
    1296          15 :         spdk_poller_unregister(&nbdev_ch->retry_io_poller);
    1297             : 
    1298          15 :         nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
    1299             :                                     delay_ms * 1000ULL);
    1300             : }
    1301             : 
    1302             : static void
    1303          40 : bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch)
    1304             : {
    1305             :         struct nvme_bdev_io *bio, *tmp_bio;
    1306             : 
    1307          41 :         TAILQ_FOREACH_SAFE(bio, &nbdev_ch->retry_io_list, retry_link, tmp_bio) {
    1308           1 :                 TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);
    1309           1 :                 __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_ABORTED, NULL);
    1310             :         }
    1311             : 
    1312          40 :         spdk_poller_unregister(&nbdev_ch->retry_io_poller);
    1313          40 : }
    1314             : 
    1315             : static int
    1316           6 : bdev_nvme_abort_retry_io(struct nvme_bdev_channel *nbdev_ch,
    1317             :                          struct nvme_bdev_io *bio_to_abort)
    1318             : {
    1319             :         struct nvme_bdev_io *bio;
    1320             : 
    1321           6 :         TAILQ_FOREACH(bio, &nbdev_ch->retry_io_list, retry_link) {
    1322           1 :                 if (bio == bio_to_abort) {
    1323           1 :                         TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);
    1324           1 :                         __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_ABORTED, NULL);
    1325           1 :                         return 0;
    1326             :                 }
    1327             :         }
    1328             : 
    1329           5 :         return -ENOENT;
    1330             : }
    1331             : 
    1332             : static void
    1333          12 : bdev_nvme_update_nvme_error_stat(struct spdk_bdev_io *bdev_io, const struct spdk_nvme_cpl *cpl)
    1334             : {
    1335             :         struct nvme_bdev *nbdev;
    1336             :         uint16_t sct, sc;
    1337             : 
    1338          12 :         assert(spdk_nvme_cpl_is_error(cpl));
    1339             : 
    1340          12 :         nbdev = bdev_io->bdev->ctxt;
    1341             : 
    1342          12 :         if (nbdev->err_stat == NULL) {
    1343          12 :                 return;
    1344             :         }
    1345             : 
    1346           0 :         sct = cpl->status.sct;
    1347           0 :         sc = cpl->status.sc;
    1348             : 
    1349           0 :         pthread_mutex_lock(&nbdev->mutex);
    1350             : 
    1351           0 :         nbdev->err_stat->status_type[sct]++;
    1352           0 :         switch (sct) {
    1353           0 :         case SPDK_NVME_SCT_GENERIC:
    1354             :         case SPDK_NVME_SCT_COMMAND_SPECIFIC:
    1355             :         case SPDK_NVME_SCT_MEDIA_ERROR:
    1356             :         case SPDK_NVME_SCT_PATH:
    1357           0 :                 nbdev->err_stat->status[sct][sc]++;
    1358           0 :                 break;
    1359           0 :         default:
    1360           0 :                 break;
    1361             :         }
    1362             : 
    1363           0 :         pthread_mutex_unlock(&nbdev->mutex);
    1364             : }
    1365             : 
    1366             : static inline void
    1367          20 : bdev_nvme_update_io_path_stat(struct nvme_bdev_io *bio)
    1368             : {
    1369          20 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    1370          20 :         uint64_t num_blocks = bdev_io->u.bdev.num_blocks;
    1371          20 :         uint32_t blocklen = bdev_io->bdev->blocklen;
    1372             :         struct spdk_bdev_io_stat *stat;
    1373             :         uint64_t tsc_diff;
    1374             : 
    1375          20 :         if (bio->io_path->stat == NULL) {
    1376          20 :                 return;
    1377             :         }
    1378             : 
    1379           0 :         tsc_diff = spdk_get_ticks() - bio->submit_tsc;
    1380           0 :         stat = bio->io_path->stat;
    1381             : 
    1382           0 :         switch (bdev_io->type) {
    1383           0 :         case SPDK_BDEV_IO_TYPE_READ:
    1384           0 :                 stat->bytes_read += num_blocks * blocklen;
    1385           0 :                 stat->num_read_ops++;
    1386           0 :                 stat->read_latency_ticks += tsc_diff;
    1387           0 :                 if (stat->max_read_latency_ticks < tsc_diff) {
    1388           0 :                         stat->max_read_latency_ticks = tsc_diff;
    1389             :                 }
    1390           0 :                 if (stat->min_read_latency_ticks > tsc_diff) {
    1391           0 :                         stat->min_read_latency_ticks = tsc_diff;
    1392             :                 }
    1393           0 :                 break;
    1394           0 :         case SPDK_BDEV_IO_TYPE_WRITE:
    1395           0 :                 stat->bytes_written += num_blocks * blocklen;
    1396           0 :                 stat->num_write_ops++;
    1397           0 :                 stat->write_latency_ticks += tsc_diff;
    1398           0 :                 if (stat->max_write_latency_ticks < tsc_diff) {
    1399           0 :                         stat->max_write_latency_ticks = tsc_diff;
    1400             :                 }
    1401           0 :                 if (stat->min_write_latency_ticks > tsc_diff) {
    1402           0 :                         stat->min_write_latency_ticks = tsc_diff;
    1403             :                 }
    1404           0 :                 break;
    1405           0 :         case SPDK_BDEV_IO_TYPE_UNMAP:
    1406           0 :                 stat->bytes_unmapped += num_blocks * blocklen;
    1407           0 :                 stat->num_unmap_ops++;
    1408           0 :                 stat->unmap_latency_ticks += tsc_diff;
    1409           0 :                 if (stat->max_unmap_latency_ticks < tsc_diff) {
    1410           0 :                         stat->max_unmap_latency_ticks = tsc_diff;
    1411             :                 }
    1412           0 :                 if (stat->min_unmap_latency_ticks > tsc_diff) {
    1413           0 :                         stat->min_unmap_latency_ticks = tsc_diff;
    1414             :                 }
    1415           0 :                 break;
    1416           0 :         case SPDK_BDEV_IO_TYPE_ZCOPY:
    1417             :                 /* Track the data in the start phase only */
    1418           0 :                 if (!bdev_io->u.bdev.zcopy.start) {
    1419           0 :                         break;
    1420             :                 }
    1421           0 :                 if (bdev_io->u.bdev.zcopy.populate) {
    1422           0 :                         stat->bytes_read += num_blocks * blocklen;
    1423           0 :                         stat->num_read_ops++;
    1424           0 :                         stat->read_latency_ticks += tsc_diff;
    1425           0 :                         if (stat->max_read_latency_ticks < tsc_diff) {
    1426           0 :                                 stat->max_read_latency_ticks = tsc_diff;
    1427             :                         }
    1428           0 :                         if (stat->min_read_latency_ticks > tsc_diff) {
    1429           0 :                                 stat->min_read_latency_ticks = tsc_diff;
    1430             :                         }
    1431             :                 } else {
    1432           0 :                         stat->bytes_written += num_blocks * blocklen;
    1433           0 :                         stat->num_write_ops++;
    1434           0 :                         stat->write_latency_ticks += tsc_diff;
    1435           0 :                         if (stat->max_write_latency_ticks < tsc_diff) {
    1436           0 :                                 stat->max_write_latency_ticks = tsc_diff;
    1437             :                         }
    1438           0 :                         if (stat->min_write_latency_ticks > tsc_diff) {
    1439           0 :                                 stat->min_write_latency_ticks = tsc_diff;
    1440             :                         }
    1441             :                 }
    1442           0 :                 break;
    1443           0 :         case SPDK_BDEV_IO_TYPE_COPY:
    1444           0 :                 stat->bytes_copied += num_blocks * blocklen;
    1445           0 :                 stat->num_copy_ops++;
    1446           0 :                 stat->copy_latency_ticks += tsc_diff;
    1447           0 :                 if (stat->max_copy_latency_ticks < tsc_diff) {
    1448           0 :                         stat->max_copy_latency_ticks = tsc_diff;
    1449             :                 }
    1450           0 :                 if (stat->min_copy_latency_ticks > tsc_diff) {
    1451           0 :                         stat->min_copy_latency_ticks = tsc_diff;
    1452             :                 }
    1453           0 :                 break;
    1454           0 :         default:
    1455           0 :                 break;
    1456             :         }
    1457             : }
    1458             : 
    1459             : static bool
    1460           7 : bdev_nvme_check_retry_io(struct nvme_bdev_io *bio,
    1461             :                          const struct spdk_nvme_cpl *cpl,
    1462             :                          struct nvme_bdev_channel *nbdev_ch,
    1463             :                          uint64_t *_delay_ms)
    1464             : {
    1465           7 :         struct nvme_io_path *io_path = bio->io_path;
    1466           7 :         struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
    1467             :         const struct spdk_nvme_ctrlr_data *cdata;
    1468             : 
    1469           7 :         if (spdk_nvme_cpl_is_path_error(cpl) ||
    1470           5 :             spdk_nvme_cpl_is_aborted_sq_deletion(cpl) ||
    1471           4 :             !nvme_io_path_is_available(io_path) ||
    1472           4 :             !nvme_ctrlr_is_available(nvme_ctrlr)) {
    1473           3 :                 bdev_nvme_clear_current_io_path(nbdev_ch);
    1474           3 :                 bio->io_path = NULL;
    1475           3 :                 if (spdk_nvme_cpl_is_ana_error(cpl)) {
    1476           1 :                         if (nvme_ctrlr_read_ana_log_page(nvme_ctrlr) == 0) {
    1477           1 :                                 io_path->nvme_ns->ana_state_updating = true;
    1478             :                         }
    1479             :                 }
    1480           3 :                 if (!any_io_path_may_become_available(nbdev_ch)) {
    1481           0 :                         return false;
    1482             :                 }
    1483           3 :                 *_delay_ms = 0;
    1484             :         } else {
    1485           4 :                 bio->retry_count++;
    1486             : 
    1487           4 :                 cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
    1488             : 
    1489           4 :                 if (cpl->status.crd != 0) {
    1490           1 :                         *_delay_ms = cdata->crdt[cpl->status.crd] * 100;
    1491             :                 } else {
    1492           3 :                         *_delay_ms = 0;
    1493             :                 }
    1494             :         }
    1495             : 
    1496           7 :         return true;
    1497             : }
    1498             : 
    1499             : static inline void
    1500          32 : bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio,
    1501             :                                   const struct spdk_nvme_cpl *cpl)
    1502             : {
    1503          32 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    1504             :         struct nvme_bdev_channel *nbdev_ch;
    1505          32 :         uint64_t delay_ms;
    1506             : 
    1507          32 :         assert(!bdev_nvme_io_type_is_admin(bdev_io->type));
    1508             : 
    1509          32 :         if (spdk_likely(spdk_nvme_cpl_is_success(cpl))) {
    1510          20 :                 bdev_nvme_update_io_path_stat(bio);
    1511          20 :                 goto complete;
    1512             :         }
    1513             : 
    1514             :         /* Update error counts before deciding if retry is needed.
    1515             :          * Hence, error counts may be more than the number of I/O errors.
    1516             :          */
    1517          12 :         bdev_nvme_update_nvme_error_stat(bdev_io, cpl);
    1518             : 
    1519          12 :         if (cpl->status.dnr != 0 || spdk_nvme_cpl_is_aborted_by_request(cpl) ||
    1520           8 :             (g_opts.bdev_retry_count != -1 && bio->retry_count >= g_opts.bdev_retry_count)) {
    1521           5 :                 goto complete;
    1522             :         }
    1523             : 
    1524             :         /* At this point we don't know whether the sequence was successfully executed or not, so we
    1525             :          * cannot retry the IO */
    1526           7 :         if (bdev_io->u.bdev.accel_sequence != NULL) {
    1527           0 :                 goto complete;
    1528             :         }
    1529             : 
    1530           7 :         nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
    1531             : 
    1532           7 :         if (bdev_nvme_check_retry_io(bio, cpl, nbdev_ch, &delay_ms)) {
    1533           7 :                 bdev_nvme_queue_retry_io(nbdev_ch, bio, delay_ms);
    1534           7 :                 return;
    1535             :         }
    1536             : 
    1537          25 : complete:
    1538          25 :         bio->retry_count = 0;
    1539          25 :         bio->submit_tsc = 0;
    1540          25 :         bdev_io->u.bdev.accel_sequence = NULL;
    1541          25 :         __bdev_nvme_io_complete(bdev_io, 0, cpl);
    1542             : }
    1543             : 
    1544             : static inline void
    1545          13 : bdev_nvme_io_complete(struct nvme_bdev_io *bio, int rc)
    1546             : {
    1547          13 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    1548             :         struct nvme_bdev_channel *nbdev_ch;
    1549             :         enum spdk_bdev_io_status io_status;
    1550             : 
    1551          13 :         assert(!bdev_nvme_io_type_is_admin(bdev_io->type));
    1552             : 
    1553          13 :         switch (rc) {
    1554           1 :         case 0:
    1555           1 :                 io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
    1556           1 :                 break;
    1557           0 :         case -ENOMEM:
    1558           0 :                 io_status = SPDK_BDEV_IO_STATUS_NOMEM;
    1559           0 :                 break;
    1560          12 :         case -ENXIO:
    1561          12 :                 if (g_opts.bdev_retry_count == -1 || bio->retry_count < g_opts.bdev_retry_count) {
    1562          12 :                         nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
    1563             : 
    1564          12 :                         bdev_nvme_clear_current_io_path(nbdev_ch);
    1565          12 :                         bio->io_path = NULL;
    1566             : 
    1567          12 :                         if (any_io_path_may_become_available(nbdev_ch)) {
    1568           9 :                                 bdev_nvme_queue_retry_io(nbdev_ch, bio, 1000ULL);
    1569           9 :                                 return;
    1570             :                         }
    1571             :                 }
    1572             : 
    1573             :         /* fallthrough */
    1574             :         default:
    1575           3 :                 spdk_accel_sequence_abort(bdev_io->u.bdev.accel_sequence);
    1576           3 :                 bdev_io->u.bdev.accel_sequence = NULL;
    1577           3 :                 io_status = SPDK_BDEV_IO_STATUS_FAILED;
    1578           3 :                 break;
    1579             :         }
    1580             : 
    1581           4 :         bio->retry_count = 0;
    1582           4 :         bio->submit_tsc = 0;
    1583           4 :         __bdev_nvme_io_complete(bdev_io, io_status, NULL);
    1584             : }
    1585             : 
    1586             : static inline void
    1587           4 : bdev_nvme_admin_complete(struct nvme_bdev_io *bio, int rc)
    1588             : {
    1589           4 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    1590             :         enum spdk_bdev_io_status io_status;
    1591             : 
    1592           4 :         switch (rc) {
    1593           1 :         case 0:
    1594           1 :                 io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
    1595           1 :                 break;
    1596           0 :         case -ENOMEM:
    1597           0 :                 io_status = SPDK_BDEV_IO_STATUS_NOMEM;
    1598           0 :                 break;
    1599           3 :         case -ENXIO:
    1600             :         /* fallthrough */
    1601             :         default:
    1602           3 :                 io_status = SPDK_BDEV_IO_STATUS_FAILED;
    1603           3 :                 break;
    1604             :         }
    1605             : 
    1606           4 :         __bdev_nvme_io_complete(bdev_io, io_status, NULL);
    1607           4 : }
    1608             : 
    1609             : static void
    1610           3 : bdev_nvme_clear_io_path_caches_done(struct nvme_ctrlr *nvme_ctrlr,
    1611             :                                     void *ctx, int status)
    1612             : {
    1613           3 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    1614             : 
    1615           3 :         assert(nvme_ctrlr->io_path_cache_clearing == true);
    1616           3 :         nvme_ctrlr->io_path_cache_clearing = false;
    1617             : 
    1618           3 :         if (!nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
    1619           3 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    1620           3 :                 return;
    1621             :         }
    1622             : 
    1623           0 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    1624             : 
    1625           0 :         nvme_ctrlr_unregister(nvme_ctrlr);
    1626             : }
    1627             : 
    1628             : static void
    1629         328 : _bdev_nvme_clear_io_path_cache(struct nvme_qpair *nvme_qpair)
    1630             : {
    1631             :         struct nvme_io_path *io_path;
    1632             : 
    1633         475 :         TAILQ_FOREACH(io_path, &nvme_qpair->io_path_list, tailq) {
    1634         147 :                 if (io_path->nbdev_ch == NULL) {
    1635          68 :                         continue;
    1636             :                 }
    1637          79 :                 bdev_nvme_clear_current_io_path(io_path->nbdev_ch);
    1638             :         }
    1639         328 : }
    1640             : 
    1641             : static void
    1642           1 : bdev_nvme_clear_io_path_cache(struct nvme_ctrlr_channel_iter *i,
    1643             :                               struct nvme_ctrlr *nvme_ctrlr,
    1644             :                               struct nvme_ctrlr_channel *ctrlr_ch,
    1645             :                               void *ctx)
    1646             : {
    1647           1 :         assert(ctrlr_ch->qpair != NULL);
    1648             : 
    1649           1 :         _bdev_nvme_clear_io_path_cache(ctrlr_ch->qpair);
    1650             : 
    1651           1 :         nvme_ctrlr_for_each_channel_continue(i, 0);
    1652           1 : }
    1653             : 
    1654             : static void
    1655           3 : bdev_nvme_clear_io_path_caches(struct nvme_ctrlr *nvme_ctrlr)
    1656             : {
    1657           3 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    1658           3 :         if (!nvme_ctrlr_is_available(nvme_ctrlr) ||
    1659             :             nvme_ctrlr->io_path_cache_clearing) {
    1660           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    1661           0 :                 return;
    1662             :         }
    1663             : 
    1664           3 :         nvme_ctrlr->io_path_cache_clearing = true;
    1665           3 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    1666             : 
    1667           3 :         nvme_ctrlr_for_each_channel(nvme_ctrlr,
    1668             :                                     bdev_nvme_clear_io_path_cache,
    1669             :                                     NULL,
    1670             :                                     bdev_nvme_clear_io_path_caches_done);
    1671             : }
    1672             : 
    1673             : static struct nvme_qpair *
    1674         103 : nvme_poll_group_get_qpair(struct nvme_poll_group *group, struct spdk_nvme_qpair *qpair)
    1675             : {
    1676             :         struct nvme_qpair *nvme_qpair;
    1677             : 
    1678         112 :         TAILQ_FOREACH(nvme_qpair, &group->qpair_list, tailq) {
    1679         112 :                 if (nvme_qpair->qpair == qpair) {
    1680         103 :                         break;
    1681             :                 }
    1682             :         }
    1683             : 
    1684         103 :         return nvme_qpair;
    1685             : }
    1686             : 
    1687             : static void nvme_qpair_delete(struct nvme_qpair *nvme_qpair);
    1688             : 
    1689             : static void
    1690         103 : bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx)
    1691             : {
    1692         103 :         struct nvme_poll_group *group = poll_group_ctx;
    1693             :         struct nvme_qpair *nvme_qpair;
    1694             :         struct nvme_ctrlr_channel *ctrlr_ch;
    1695             :         int status;
    1696             : 
    1697         103 :         nvme_qpair = nvme_poll_group_get_qpair(group, qpair);
    1698         103 :         if (nvme_qpair == NULL) {
    1699           0 :                 return;
    1700             :         }
    1701             : 
    1702         103 :         if (nvme_qpair->qpair != NULL) {
    1703         103 :                 spdk_nvme_ctrlr_free_io_qpair(nvme_qpair->qpair);
    1704         103 :                 nvme_qpair->qpair = NULL;
    1705             :         }
    1706             : 
    1707         103 :         _bdev_nvme_clear_io_path_cache(nvme_qpair);
    1708             : 
    1709         103 :         ctrlr_ch = nvme_qpair->ctrlr_ch;
    1710             : 
    1711         103 :         if (ctrlr_ch != NULL) {
    1712          58 :                 if (ctrlr_ch->reset_iter != NULL) {
    1713             :                         /* We are in a full reset sequence. */
    1714          53 :                         if (ctrlr_ch->connect_poller != NULL) {
    1715             :                                 /* qpair was failed to connect. Abort the reset sequence. */
    1716           0 :                                 SPDK_DEBUGLOG(bdev_nvme, "qpair %p was failed to connect. abort the reset ctrlr sequence.\n",
    1717             :                                               qpair);
    1718           0 :                                 spdk_poller_unregister(&ctrlr_ch->connect_poller);
    1719           0 :                                 status = -1;
    1720             :                         } else {
    1721             :                                 /* qpair was completed to disconnect. Just move to the next ctrlr_channel. */
    1722          53 :                                 SPDK_DEBUGLOG(bdev_nvme, "qpair %p was disconnected and freed in a reset ctrlr sequence.\n",
    1723             :                                               qpair);
    1724          53 :                                 status = 0;
    1725             :                         }
    1726          53 :                         nvme_ctrlr_for_each_channel_continue(ctrlr_ch->reset_iter, status);
    1727          53 :                         ctrlr_ch->reset_iter = NULL;
    1728             :                 } else {
    1729             :                         /* qpair was disconnected unexpectedly. Reset controller for recovery. */
    1730           5 :                         SPDK_NOTICELOG("qpair %p was disconnected and freed. reset controller.\n", qpair);
    1731           5 :                         bdev_nvme_failover_ctrlr(nvme_qpair->ctrlr);
    1732             :                 }
    1733             :         } else {
    1734             :                 /* In this case, ctrlr_channel is already deleted. */
    1735          45 :                 SPDK_DEBUGLOG(bdev_nvme, "qpair %p was disconnected and freed. delete nvme_qpair.\n", qpair);
    1736          45 :                 nvme_qpair_delete(nvme_qpair);
    1737             :         }
    1738             : }
    1739             : 
    1740             : static void
    1741           0 : bdev_nvme_check_io_qpairs(struct nvme_poll_group *group)
    1742             : {
    1743             :         struct nvme_qpair *nvme_qpair;
    1744             : 
    1745           0 :         TAILQ_FOREACH(nvme_qpair, &group->qpair_list, tailq) {
    1746           0 :                 if (nvme_qpair->qpair == NULL || nvme_qpair->ctrlr_ch == NULL) {
    1747           0 :                         continue;
    1748             :                 }
    1749             : 
    1750           0 :                 if (spdk_nvme_qpair_get_failure_reason(nvme_qpair->qpair) !=
    1751             :                     SPDK_NVME_QPAIR_FAILURE_NONE) {
    1752           0 :                         _bdev_nvme_clear_io_path_cache(nvme_qpair);
    1753             :                 }
    1754             :         }
    1755           0 : }
    1756             : 
    1757             : static int
    1758        1089 : bdev_nvme_poll(void *arg)
    1759             : {
    1760        1089 :         struct nvme_poll_group *group = arg;
    1761             :         int64_t num_completions;
    1762             : 
    1763        1089 :         if (group->collect_spin_stat && group->start_ticks == 0) {
    1764           0 :                 group->start_ticks = spdk_get_ticks();
    1765             :         }
    1766             : 
    1767        1089 :         num_completions = spdk_nvme_poll_group_process_completions(group->group, 0,
    1768             :                           bdev_nvme_disconnected_qpair_cb);
    1769        1089 :         if (group->collect_spin_stat) {
    1770           0 :                 if (num_completions > 0) {
    1771           0 :                         if (group->end_ticks != 0) {
    1772           0 :                                 group->spin_ticks += (group->end_ticks - group->start_ticks);
    1773           0 :                                 group->end_ticks = 0;
    1774             :                         }
    1775           0 :                         group->start_ticks = 0;
    1776             :                 } else {
    1777           0 :                         group->end_ticks = spdk_get_ticks();
    1778             :                 }
    1779             :         }
    1780             : 
    1781        1089 :         if (spdk_unlikely(num_completions < 0)) {
    1782           0 :                 bdev_nvme_check_io_qpairs(group);
    1783             :         }
    1784             : 
    1785        1089 :         return num_completions > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
    1786             : }
    1787             : 
    1788             : static int bdev_nvme_poll_adminq(void *arg);
    1789             : 
    1790             : static void
    1791         102 : bdev_nvme_change_adminq_poll_period(struct nvme_ctrlr *nvme_ctrlr, uint64_t new_period_us)
    1792             : {
    1793         102 :         spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
    1794             : 
    1795         102 :         nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq,
    1796             :                                           nvme_ctrlr, new_period_us);
    1797         102 : }
    1798             : 
    1799             : static int
    1800         148 : bdev_nvme_poll_adminq(void *arg)
    1801             : {
    1802             :         int32_t rc;
    1803         148 :         struct nvme_ctrlr *nvme_ctrlr = arg;
    1804             :         nvme_ctrlr_disconnected_cb disconnected_cb;
    1805             : 
    1806         148 :         assert(nvme_ctrlr != NULL);
    1807             : 
    1808         148 :         rc = spdk_nvme_ctrlr_process_admin_completions(nvme_ctrlr->ctrlr);
    1809         148 :         if (rc < 0) {
    1810          54 :                 disconnected_cb = nvme_ctrlr->disconnected_cb;
    1811          54 :                 nvme_ctrlr->disconnected_cb = NULL;
    1812             : 
    1813          54 :                 if (disconnected_cb != NULL) {
    1814          51 :                         bdev_nvme_change_adminq_poll_period(nvme_ctrlr,
    1815             :                                                             g_opts.nvme_adminq_poll_period_us);
    1816          51 :                         disconnected_cb(nvme_ctrlr);
    1817             :                 } else {
    1818           3 :                         bdev_nvme_failover_ctrlr(nvme_ctrlr);
    1819             :                 }
    1820          94 :         } else if (spdk_nvme_ctrlr_get_admin_qp_failure_reason(nvme_ctrlr->ctrlr) !=
    1821             :                    SPDK_NVME_QPAIR_FAILURE_NONE) {
    1822           0 :                 bdev_nvme_clear_io_path_caches(nvme_ctrlr);
    1823             :         }
    1824             : 
    1825         148 :         return rc == 0 ? SPDK_POLLER_IDLE : SPDK_POLLER_BUSY;
    1826             : }
    1827             : 
    1828             : static void
    1829          38 : nvme_bdev_free(void *io_device)
    1830             : {
    1831          38 :         struct nvme_bdev *nvme_disk = io_device;
    1832             : 
    1833          38 :         pthread_mutex_destroy(&nvme_disk->mutex);
    1834          38 :         free(nvme_disk->disk.name);
    1835          38 :         free(nvme_disk->err_stat);
    1836          38 :         free(nvme_disk);
    1837          38 : }
    1838             : 
    1839             : static int
    1840          37 : bdev_nvme_destruct(void *ctx)
    1841             : {
    1842          37 :         struct nvme_bdev *nvme_disk = ctx;
    1843             :         struct nvme_ns *nvme_ns, *tmp_nvme_ns;
    1844             : 
    1845             :         SPDK_DTRACE_PROBE2(bdev_nvme_destruct, nvme_disk->nbdev_ctrlr->name, nvme_disk->nsid);
    1846             : 
    1847          75 :         TAILQ_FOREACH_SAFE(nvme_ns, &nvme_disk->nvme_ns_list, tailq, tmp_nvme_ns) {
    1848          38 :                 pthread_mutex_lock(&nvme_ns->ctrlr->mutex);
    1849             : 
    1850          38 :                 nvme_ns->bdev = NULL;
    1851             : 
    1852          38 :                 assert(nvme_ns->id > 0);
    1853             : 
    1854          38 :                 if (nvme_ctrlr_get_ns(nvme_ns->ctrlr, nvme_ns->id) == NULL) {
    1855           0 :                         pthread_mutex_unlock(&nvme_ns->ctrlr->mutex);
    1856             : 
    1857           0 :                         nvme_ctrlr_release(nvme_ns->ctrlr);
    1858           0 :                         nvme_ns_free(nvme_ns);
    1859             :                 } else {
    1860          38 :                         pthread_mutex_unlock(&nvme_ns->ctrlr->mutex);
    1861             :                 }
    1862             :         }
    1863             : 
    1864          37 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    1865          37 :         TAILQ_REMOVE(&nvme_disk->nbdev_ctrlr->bdevs, nvme_disk, tailq);
    1866          37 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    1867             : 
    1868          37 :         spdk_io_device_unregister(nvme_disk, nvme_bdev_free);
    1869             : 
    1870          37 :         return 0;
    1871             : }
    1872             : 
    1873             : static int
    1874         104 : bdev_nvme_create_qpair(struct nvme_qpair *nvme_qpair)
    1875             : {
    1876             :         struct nvme_ctrlr *nvme_ctrlr;
    1877         104 :         struct spdk_nvme_io_qpair_opts opts;
    1878             :         struct spdk_nvme_qpair *qpair;
    1879             :         int rc;
    1880             : 
    1881         104 :         nvme_ctrlr = nvme_qpair->ctrlr;
    1882             : 
    1883         104 :         spdk_nvme_ctrlr_get_default_io_qpair_opts(nvme_ctrlr->ctrlr, &opts, sizeof(opts));
    1884         104 :         opts.delay_cmd_submit = g_opts.delay_cmd_submit;
    1885         104 :         opts.create_only = true;
    1886         104 :         opts.async_mode = true;
    1887         104 :         opts.io_queue_requests = spdk_max(g_opts.io_queue_requests, opts.io_queue_requests);
    1888         104 :         g_opts.io_queue_requests = opts.io_queue_requests;
    1889             : 
    1890         104 :         qpair = spdk_nvme_ctrlr_alloc_io_qpair(nvme_ctrlr->ctrlr, &opts, sizeof(opts));
    1891         104 :         if (qpair == NULL) {
    1892           0 :                 return -1;
    1893             :         }
    1894             : 
    1895             :         SPDK_DTRACE_PROBE3(bdev_nvme_create_qpair, nvme_ctrlr->nbdev_ctrlr->name,
    1896             :                            spdk_nvme_qpair_get_id(qpair), spdk_thread_get_id(nvme_ctrlr->thread));
    1897             : 
    1898         104 :         assert(nvme_qpair->group != NULL);
    1899             : 
    1900         104 :         rc = spdk_nvme_poll_group_add(nvme_qpair->group->group, qpair);
    1901         104 :         if (rc != 0) {
    1902           0 :                 SPDK_ERRLOG("Unable to begin polling on NVMe Channel.\n");
    1903           0 :                 goto err;
    1904             :         }
    1905             : 
    1906         104 :         rc = spdk_nvme_ctrlr_connect_io_qpair(nvme_ctrlr->ctrlr, qpair);
    1907         104 :         if (rc != 0) {
    1908           0 :                 SPDK_ERRLOG("Unable to connect I/O qpair.\n");
    1909           0 :                 goto err;
    1910             :         }
    1911             : 
    1912         104 :         nvme_qpair->qpair = qpair;
    1913             : 
    1914         104 :         if (!g_opts.disable_auto_failback) {
    1915          71 :                 _bdev_nvme_clear_io_path_cache(nvme_qpair);
    1916             :         }
    1917             : 
    1918         104 :         return 0;
    1919             : 
    1920           0 : err:
    1921           0 :         spdk_nvme_ctrlr_free_io_qpair(qpair);
    1922             : 
    1923           0 :         return rc;
    1924             : }
    1925             : 
    1926             : static void bdev_nvme_reset_io_continue(void *cb_arg, int rc);
    1927             : 
    1928             : static void
    1929          84 : bdev_nvme_complete_pending_resets(struct nvme_ctrlr_channel_iter *i,
    1930             :                                   struct nvme_ctrlr *nvme_ctrlr,
    1931             :                                   struct nvme_ctrlr_channel *ctrlr_ch,
    1932             :                                   void *ctx)
    1933             : {
    1934          84 :         int rc = 0;
    1935             :         struct nvme_bdev_io *bio;
    1936             : 
    1937          84 :         if (ctx != NULL) {
    1938          35 :                 rc = -1;
    1939             :         }
    1940             : 
    1941          89 :         while (!TAILQ_EMPTY(&ctrlr_ch->pending_resets)) {
    1942           5 :                 bio = TAILQ_FIRST(&ctrlr_ch->pending_resets);
    1943           5 :                 TAILQ_REMOVE(&ctrlr_ch->pending_resets, bio, retry_link);
    1944             : 
    1945           5 :                 bdev_nvme_reset_io_continue(bio, rc);
    1946             :         }
    1947             : 
    1948          84 :         nvme_ctrlr_for_each_channel_continue(i, 0);
    1949          84 : }
    1950             : 
    1951             : /* This function marks the current trid as failed by storing the current ticks
    1952             :  * and then sets the next trid to the active trid within a controller if exists.
    1953             :  *
    1954             :  * The purpose of the boolean return value is to request the caller to disconnect
    1955             :  * the current trid now to try connecting the next trid.
    1956             :  */
    1957             : static bool
    1958          37 : bdev_nvme_failover_trid(struct nvme_ctrlr *nvme_ctrlr, bool remove, bool start)
    1959             : {
    1960             :         struct nvme_path_id *path_id, *next_path;
    1961             :         int rc __attribute__((unused));
    1962             : 
    1963          37 :         path_id = TAILQ_FIRST(&nvme_ctrlr->trids);
    1964          37 :         assert(path_id);
    1965          37 :         assert(path_id == nvme_ctrlr->active_path_id);
    1966          37 :         next_path = TAILQ_NEXT(path_id, link);
    1967             : 
    1968             :         /* Update the last failed time. It means the trid is failed if its last
    1969             :          * failed time is non-zero.
    1970             :          */
    1971          37 :         path_id->last_failed_tsc = spdk_get_ticks();
    1972             : 
    1973          37 :         if (next_path == NULL) {
    1974             :                 /* There is no alternate trid within a controller. */
    1975          26 :                 return false;
    1976             :         }
    1977             : 
    1978          11 :         if (!start && nvme_ctrlr->opts.reconnect_delay_sec == 0) {
    1979             :                 /* Connect is not retried in a controller reset sequence. Connecting
    1980             :                  * the next trid will be done by the next bdev_nvme_failover_ctrlr() call.
    1981             :                  */
    1982           3 :                 return false;
    1983             :         }
    1984             : 
    1985           8 :         assert(path_id->trid.trtype != SPDK_NVME_TRANSPORT_PCIE);
    1986             : 
    1987           8 :         SPDK_NOTICELOG("Start failover from %s:%s to %s:%s\n", path_id->trid.traddr,
    1988             :                        path_id->trid.trsvcid,        next_path->trid.traddr, next_path->trid.trsvcid);
    1989             : 
    1990           8 :         spdk_nvme_ctrlr_fail(nvme_ctrlr->ctrlr);
    1991           8 :         nvme_ctrlr->active_path_id = next_path;
    1992           8 :         rc = spdk_nvme_ctrlr_set_trid(nvme_ctrlr->ctrlr, &next_path->trid);
    1993           8 :         assert(rc == 0);
    1994           8 :         TAILQ_REMOVE(&nvme_ctrlr->trids, path_id, link);
    1995           8 :         if (!remove) {
    1996             :                 /** Shuffle the old trid to the end of the list and use the new one.
    1997             :                  * Allows for round robin through multiple connections.
    1998             :                  */
    1999           6 :                 TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, path_id, link);
    2000             :         } else {
    2001           2 :                 free(path_id);
    2002             :         }
    2003             : 
    2004           8 :         if (start || next_path->last_failed_tsc == 0) {
    2005             :                 /* bdev_nvme_failover_ctrlr() is just called or the next trid is not failed
    2006             :                  * or used yet. Try the next trid now.
    2007             :                  */
    2008           7 :                 return true;
    2009             :         }
    2010             : 
    2011           1 :         if (spdk_get_ticks() > next_path->last_failed_tsc + spdk_get_ticks_hz() *
    2012           1 :             nvme_ctrlr->opts.reconnect_delay_sec) {
    2013             :                 /* Enough backoff passed since the next trid failed. Try the next trid now. */
    2014           0 :                 return true;
    2015             :         }
    2016             : 
    2017             :         /* The next trid will be tried after reconnect_delay_sec seconds. */
    2018           1 :         return false;
    2019             : }
    2020             : 
    2021             : static bool
    2022          69 : bdev_nvme_check_ctrlr_loss_timeout(struct nvme_ctrlr *nvme_ctrlr)
    2023             : {
    2024             :         int32_t elapsed;
    2025             : 
    2026          69 :         if (nvme_ctrlr->opts.ctrlr_loss_timeout_sec == 0 ||
    2027          37 :             nvme_ctrlr->opts.ctrlr_loss_timeout_sec == -1) {
    2028          43 :                 return false;
    2029             :         }
    2030             : 
    2031          26 :         elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz();
    2032          26 :         if (elapsed >= nvme_ctrlr->opts.ctrlr_loss_timeout_sec) {
    2033           6 :                 return true;
    2034             :         } else {
    2035          20 :                 return false;
    2036             :         }
    2037             : }
    2038             : 
    2039             : static bool
    2040          12 : bdev_nvme_check_fast_io_fail_timeout(struct nvme_ctrlr *nvme_ctrlr)
    2041             : {
    2042             :         uint32_t elapsed;
    2043             : 
    2044          12 :         if (nvme_ctrlr->opts.fast_io_fail_timeout_sec == 0) {
    2045           8 :                 return false;
    2046             :         }
    2047             : 
    2048           4 :         elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz();
    2049           4 :         if (elapsed >= nvme_ctrlr->opts.fast_io_fail_timeout_sec) {
    2050           2 :                 return true;
    2051             :         } else {
    2052           2 :                 return false;
    2053             :         }
    2054             : }
    2055             : 
    2056             : static void bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success);
    2057             : 
    2058             : static void
    2059          52 : nvme_ctrlr_disconnect(struct nvme_ctrlr *nvme_ctrlr, nvme_ctrlr_disconnected_cb cb_fn)
    2060             : {
    2061             :         int rc;
    2062             : 
    2063          52 :         rc = spdk_nvme_ctrlr_disconnect(nvme_ctrlr->ctrlr);
    2064          52 :         if (rc != 0) {
    2065             :                 /* Disconnect fails if ctrlr is already resetting or removed. In this case,
    2066             :                  * fail the reset sequence immediately.
    2067             :                  */
    2068           1 :                 bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
    2069           1 :                 return;
    2070             :         }
    2071             : 
    2072             :         /* spdk_nvme_ctrlr_disconnect() may complete asynchronously later by polling adminq.
    2073             :          * Set callback here to execute the specified operation after ctrlr is really disconnected.
    2074             :          */
    2075          51 :         assert(nvme_ctrlr->disconnected_cb == NULL);
    2076          51 :         nvme_ctrlr->disconnected_cb = cb_fn;
    2077             : 
    2078             :         /* During disconnection, reduce the period to poll adminq more often. */
    2079          51 :         bdev_nvme_change_adminq_poll_period(nvme_ctrlr, 0);
    2080             : }
    2081             : 
    2082             : enum bdev_nvme_op_after_reset {
    2083             :         OP_NONE,
    2084             :         OP_COMPLETE_PENDING_DESTRUCT,
    2085             :         OP_DESTRUCT,
    2086             :         OP_DELAYED_RECONNECT,
    2087             :         OP_FAILOVER,
    2088             : };
    2089             : 
    2090             : typedef enum bdev_nvme_op_after_reset _bdev_nvme_op_after_reset;
    2091             : 
    2092             : static _bdev_nvme_op_after_reset
    2093          51 : bdev_nvme_check_op_after_reset(struct nvme_ctrlr *nvme_ctrlr, bool success)
    2094             : {
    2095          51 :         if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
    2096             :                 /* Complete pending destruct after reset completes. */
    2097           0 :                 return OP_COMPLETE_PENDING_DESTRUCT;
    2098          51 :         } else if (nvme_ctrlr->pending_failover) {
    2099           3 :                 nvme_ctrlr->pending_failover = false;
    2100           3 :                 nvme_ctrlr->reset_start_tsc = 0;
    2101           3 :                 return OP_FAILOVER;
    2102          48 :         } else if (success || nvme_ctrlr->opts.reconnect_delay_sec == 0) {
    2103          34 :                 nvme_ctrlr->reset_start_tsc = 0;
    2104          34 :                 return OP_NONE;
    2105          14 :         } else if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
    2106           2 :                 return OP_DESTRUCT;
    2107             :         } else {
    2108          12 :                 if (bdev_nvme_check_fast_io_fail_timeout(nvme_ctrlr)) {
    2109           2 :                         nvme_ctrlr->fast_io_fail_timedout = true;
    2110             :                 }
    2111          12 :                 return OP_DELAYED_RECONNECT;
    2112             :         }
    2113             : }
    2114             : 
    2115             : static int bdev_nvme_delete_ctrlr(struct nvme_ctrlr *nvme_ctrlr, bool hotplug);
    2116             : static void bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
    2117             : 
    2118             : static int
    2119           9 : bdev_nvme_reconnect_delay_timer_expired(void *ctx)
    2120             : {
    2121           9 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    2122             : 
    2123             :         SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect_delay, nvme_ctrlr->nbdev_ctrlr->name);
    2124           9 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2125             : 
    2126           9 :         spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
    2127             : 
    2128           9 :         if (!nvme_ctrlr->reconnect_is_delayed) {
    2129           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2130           0 :                 return SPDK_POLLER_BUSY;
    2131             :         }
    2132             : 
    2133           9 :         nvme_ctrlr->reconnect_is_delayed = false;
    2134             : 
    2135           9 :         if (nvme_ctrlr->destruct) {
    2136           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2137           0 :                 return SPDK_POLLER_BUSY;
    2138             :         }
    2139             : 
    2140           9 :         assert(nvme_ctrlr->resetting == false);
    2141           9 :         nvme_ctrlr->resetting = true;
    2142             : 
    2143           9 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2144             : 
    2145           9 :         spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
    2146             : 
    2147           9 :         bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
    2148           9 :         return SPDK_POLLER_BUSY;
    2149             : }
    2150             : 
    2151             : static void
    2152          12 : bdev_nvme_start_reconnect_delay_timer(struct nvme_ctrlr *nvme_ctrlr)
    2153             : {
    2154          12 :         spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);
    2155             : 
    2156          12 :         assert(nvme_ctrlr->reconnect_is_delayed == false);
    2157          12 :         nvme_ctrlr->reconnect_is_delayed = true;
    2158             : 
    2159          12 :         assert(nvme_ctrlr->reconnect_delay_timer == NULL);
    2160          12 :         nvme_ctrlr->reconnect_delay_timer = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_delay_timer_expired,
    2161             :                                             nvme_ctrlr,
    2162             :                                             nvme_ctrlr->opts.reconnect_delay_sec * SPDK_SEC_TO_USEC);
    2163          12 : }
    2164             : 
    2165             : static void remove_discovery_entry(struct nvme_ctrlr *nvme_ctrlr);
    2166             : 
    2167             : static void
    2168          49 : _bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
    2169             : {
    2170          49 :         bool success = (ctx == NULL);
    2171          49 :         bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
    2172          49 :         void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
    2173             :         enum bdev_nvme_op_after_reset op_after_reset;
    2174             : 
    2175          49 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2176             : 
    2177          49 :         nvme_ctrlr->ctrlr_op_cb_fn = NULL;
    2178          49 :         nvme_ctrlr->ctrlr_op_cb_arg = NULL;
    2179             : 
    2180          49 :         if (!success) {
    2181          21 :                 SPDK_ERRLOG("Resetting controller failed.\n");
    2182             :         } else {
    2183          28 :                 SPDK_NOTICELOG("Resetting controller successful.\n");
    2184             :         }
    2185             : 
    2186          49 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2187          49 :         nvme_ctrlr->resetting = false;
    2188          49 :         nvme_ctrlr->dont_retry = false;
    2189          49 :         nvme_ctrlr->in_failover = false;
    2190             : 
    2191          49 :         op_after_reset = bdev_nvme_check_op_after_reset(nvme_ctrlr, success);
    2192          49 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2193             : 
    2194             :         /* Delay callbacks when the next operation is a failover. */
    2195          49 :         if (ctrlr_op_cb_fn && op_after_reset != OP_FAILOVER) {
    2196          10 :                 ctrlr_op_cb_fn(ctrlr_op_cb_arg, success ? 0 : -1);
    2197             :         }
    2198             : 
    2199          49 :         switch (op_after_reset) {
    2200           0 :         case OP_COMPLETE_PENDING_DESTRUCT:
    2201           0 :                 nvme_ctrlr_unregister(nvme_ctrlr);
    2202           0 :                 break;
    2203           2 :         case OP_DESTRUCT:
    2204           2 :                 bdev_nvme_delete_ctrlr(nvme_ctrlr, false);
    2205           2 :                 remove_discovery_entry(nvme_ctrlr);
    2206           2 :                 break;
    2207          12 :         case OP_DELAYED_RECONNECT:
    2208          12 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_start_reconnect_delay_timer);
    2209          12 :                 break;
    2210           3 :         case OP_FAILOVER:
    2211           3 :                 nvme_ctrlr->ctrlr_op_cb_fn = ctrlr_op_cb_fn;
    2212           3 :                 nvme_ctrlr->ctrlr_op_cb_arg = ctrlr_op_cb_arg;
    2213           3 :                 bdev_nvme_failover_ctrlr(nvme_ctrlr);
    2214           3 :                 break;
    2215          32 :         default:
    2216          32 :                 break;
    2217             :         }
    2218          49 : }
    2219             : 
    2220             : static void
    2221          51 : bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success)
    2222             : {
    2223          51 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2224          51 :         if (!success) {
    2225             :                 /* Connecting the active trid failed. Set the next alternate trid to the
    2226             :                  * active trid if it exists.
    2227             :                  */
    2228          23 :                 if (bdev_nvme_failover_trid(nvme_ctrlr, false, false)) {
    2229             :                         /* The next alternate trid exists and is ready to try. Try it now. */
    2230           2 :                         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2231             : 
    2232           2 :                         nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reconnect_ctrlr);
    2233           2 :                         return;
    2234             :                 }
    2235             : 
    2236             :                 /* We came here if there is no alternate trid or if the next trid exists but
    2237             :                  * is not ready to try. We will try the active trid after reconnect_delay_sec
    2238             :                  * seconds if it is non-zero or at the next reset call otherwise.
    2239             :                  */
    2240             :         } else {
    2241             :                 /* Connecting the active trid succeeded. Clear the last failed time because it
    2242             :                  * means the trid is failed if its last failed time is non-zero.
    2243             :                  */
    2244          28 :                 nvme_ctrlr->active_path_id->last_failed_tsc = 0;
    2245             :         }
    2246          49 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2247             : 
    2248             :         /* Make sure we clear any pending resets before returning. */
    2249          49 :         nvme_ctrlr_for_each_channel(nvme_ctrlr,
    2250             :                                     bdev_nvme_complete_pending_resets,
    2251             :                                     success ? NULL : (void *)0x1,
    2252             :                                     _bdev_nvme_reset_ctrlr_complete);
    2253             : }
    2254             : 
    2255             : static void
    2256           0 : bdev_nvme_reset_create_qpairs_failed(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
    2257             : {
    2258           0 :         bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
    2259           0 : }
    2260             : 
    2261             : static void
    2262          64 : bdev_nvme_reset_destroy_qpair(struct nvme_ctrlr_channel_iter *i,
    2263             :                               struct nvme_ctrlr *nvme_ctrlr,
    2264             :                               struct nvme_ctrlr_channel *ctrlr_ch, void *ctx)
    2265             : {
    2266             :         struct nvme_qpair *nvme_qpair;
    2267             : 
    2268          64 :         nvme_qpair = ctrlr_ch->qpair;
    2269          64 :         assert(nvme_qpair != NULL);
    2270             : 
    2271          64 :         _bdev_nvme_clear_io_path_cache(nvme_qpair);
    2272             : 
    2273          64 :         if (nvme_qpair->qpair != NULL) {
    2274          53 :                 if (nvme_qpair->ctrlr->dont_retry) {
    2275          39 :                         spdk_nvme_qpair_set_abort_dnr(nvme_qpair->qpair, true);
    2276             :                 }
    2277          53 :                 spdk_nvme_ctrlr_disconnect_io_qpair(nvme_qpair->qpair);
    2278             : 
    2279             :                 /* The current full reset sequence will move to the next
    2280             :                  * ctrlr_channel after the qpair is actually disconnected.
    2281             :                  */
    2282          53 :                 assert(ctrlr_ch->reset_iter == NULL);
    2283          53 :                 ctrlr_ch->reset_iter = i;
    2284             :         } else {
    2285          11 :                 nvme_ctrlr_for_each_channel_continue(i, 0);
    2286             :         }
    2287          64 : }
    2288             : 
    2289             : static void
    2290          28 : bdev_nvme_reset_create_qpairs_done(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
    2291             : {
    2292          28 :         if (status == 0) {
    2293          28 :                 bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, true);
    2294             :         } else {
    2295             :                 /* Delete the added qpairs and quiesce ctrlr to make the states clean. */
    2296           0 :                 nvme_ctrlr_for_each_channel(nvme_ctrlr,
    2297             :                                             bdev_nvme_reset_destroy_qpair,
    2298             :                                             NULL,
    2299             :                                             bdev_nvme_reset_create_qpairs_failed);
    2300             :         }
    2301          28 : }
    2302             : 
    2303             : static int
    2304          45 : bdev_nvme_reset_check_qpair_connected(void *ctx)
    2305             : {
    2306          45 :         struct nvme_ctrlr_channel *ctrlr_ch = ctx;
    2307             : 
    2308          45 :         if (ctrlr_ch->reset_iter == NULL) {
    2309             :                 /* qpair was already failed to connect and the reset sequence is being aborted. */
    2310           0 :                 assert(ctrlr_ch->connect_poller == NULL);
    2311           0 :                 assert(ctrlr_ch->qpair->qpair == NULL);
    2312           0 :                 return SPDK_POLLER_BUSY;
    2313             :         }
    2314             : 
    2315          45 :         assert(ctrlr_ch->qpair->qpair != NULL);
    2316             : 
    2317          45 :         if (!spdk_nvme_qpair_is_connected(ctrlr_ch->qpair->qpair)) {
    2318           0 :                 return SPDK_POLLER_BUSY;
    2319             :         }
    2320             : 
    2321          45 :         spdk_poller_unregister(&ctrlr_ch->connect_poller);
    2322             : 
    2323             :         /* qpair was completed to connect. Move to the next ctrlr_channel */
    2324          45 :         nvme_ctrlr_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
    2325          45 :         ctrlr_ch->reset_iter = NULL;
    2326             : 
    2327          45 :         if (!g_opts.disable_auto_failback) {
    2328          30 :                 _bdev_nvme_clear_io_path_cache(ctrlr_ch->qpair);
    2329             :         }
    2330             : 
    2331          45 :         return SPDK_POLLER_BUSY;
    2332             : }
    2333             : 
    2334             : static void
    2335          45 : bdev_nvme_reset_create_qpair(struct nvme_ctrlr_channel_iter *i,
    2336             :                              struct nvme_ctrlr *nvme_ctrlr,
    2337             :                              struct nvme_ctrlr_channel *ctrlr_ch,
    2338             :                              void *ctx)
    2339             : {
    2340             :         int rc;
    2341             : 
    2342          45 :         rc = bdev_nvme_create_qpair(ctrlr_ch->qpair);
    2343          45 :         if (rc == 0) {
    2344          45 :                 ctrlr_ch->connect_poller = SPDK_POLLER_REGISTER(bdev_nvme_reset_check_qpair_connected,
    2345             :                                            ctrlr_ch, 0);
    2346             : 
    2347             :                 /* The current full reset sequence will move to the next
    2348             :                  * ctrlr_channel after the qpair is actually connected.
    2349             :                  */
    2350          45 :                 assert(ctrlr_ch->reset_iter == NULL);
    2351          45 :                 ctrlr_ch->reset_iter = i;
    2352             :         } else {
    2353           0 :                 nvme_ctrlr_for_each_channel_continue(i, rc);
    2354             :         }
    2355          45 : }
    2356             : 
    2357             : static void
    2358          28 : nvme_ctrlr_check_namespaces(struct nvme_ctrlr *nvme_ctrlr)
    2359             : {
    2360          28 :         struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
    2361             :         struct nvme_ns *nvme_ns;
    2362             : 
    2363          28 :         for (nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    2364          41 :              nvme_ns != NULL;
    2365          13 :              nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns)) {
    2366          13 :                 if (!spdk_nvme_ctrlr_is_active_ns(ctrlr, nvme_ns->id)) {
    2367           1 :                         SPDK_DEBUGLOG(bdev_nvme, "NSID %u was removed during reset.\n", nvme_ns->id);
    2368             :                         /* NS can be added again. Just nullify nvme_ns->ns. */
    2369           1 :                         nvme_ns->ns = NULL;
    2370             :                 }
    2371             :         }
    2372          28 : }
    2373             : 
    2374             : 
    2375             : static int
    2376          50 : bdev_nvme_reconnect_ctrlr_poll(void *arg)
    2377             : {
    2378          50 :         struct nvme_ctrlr *nvme_ctrlr = arg;
    2379          50 :         int rc = -ETIMEDOUT;
    2380             : 
    2381          50 :         if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
    2382             :                 /* Mark the ctrlr as failed. The next call to
    2383             :                  * spdk_nvme_ctrlr_reconnect_poll_async() will then
    2384             :                  * do the necessary cleanup and return failure.
    2385             :                  */
    2386           2 :                 spdk_nvme_ctrlr_fail(nvme_ctrlr->ctrlr);
    2387             :         }
    2388             : 
    2389          50 :         rc = spdk_nvme_ctrlr_reconnect_poll_async(nvme_ctrlr->ctrlr);
    2390          50 :         if (rc == -EAGAIN) {
    2391           0 :                 return SPDK_POLLER_BUSY;
    2392             :         }
    2393             : 
    2394          50 :         spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
    2395          50 :         if (rc == 0) {
    2396          28 :                 nvme_ctrlr_check_namespaces(nvme_ctrlr);
    2397             : 
    2398             :                 /* Recreate all of the I/O queue pairs */
    2399          28 :                 nvme_ctrlr_for_each_channel(nvme_ctrlr,
    2400             :                                             bdev_nvme_reset_create_qpair,
    2401             :                                             NULL,
    2402             :                                             bdev_nvme_reset_create_qpairs_done);
    2403             :         } else {
    2404          22 :                 bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
    2405             :         }
    2406          50 :         return SPDK_POLLER_BUSY;
    2407             : }
    2408             : 
    2409             : static void
    2410          50 : bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    2411             : {
    2412          50 :         spdk_nvme_ctrlr_reconnect_async(nvme_ctrlr->ctrlr);
    2413             : 
    2414             :         SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect, nvme_ctrlr->nbdev_ctrlr->name);
    2415          50 :         assert(nvme_ctrlr->reset_detach_poller == NULL);
    2416          50 :         nvme_ctrlr->reset_detach_poller = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_ctrlr_poll,
    2417             :                                           nvme_ctrlr, 0);
    2418          50 : }
    2419             : 
    2420             : static void
    2421          37 : bdev_nvme_reset_destroy_qpair_done(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
    2422             : {
    2423             :         SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reset, nvme_ctrlr->nbdev_ctrlr->name);
    2424          37 :         assert(status == 0);
    2425             : 
    2426          37 :         if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
    2427           0 :                 bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
    2428             :         } else {
    2429          37 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reconnect_ctrlr);
    2430             :         }
    2431          37 : }
    2432             : 
    2433             : static void
    2434          37 : bdev_nvme_reset_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
    2435             : {
    2436          37 :         nvme_ctrlr_for_each_channel(nvme_ctrlr,
    2437             :                                     bdev_nvme_reset_destroy_qpair,
    2438             :                                     NULL,
    2439             :                                     bdev_nvme_reset_destroy_qpair_done);
    2440          37 : }
    2441             : 
    2442             : static void
    2443           3 : bdev_nvme_reconnect_ctrlr_now(void *ctx)
    2444             : {
    2445           3 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    2446             : 
    2447           3 :         assert(nvme_ctrlr->resetting == true);
    2448           3 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2449             : 
    2450           3 :         spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
    2451             : 
    2452           3 :         spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
    2453             : 
    2454           3 :         bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
    2455           3 : }
    2456             : 
    2457             : static void
    2458          37 : _bdev_nvme_reset_ctrlr(void *ctx)
    2459             : {
    2460          37 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    2461             : 
    2462          37 :         assert(nvme_ctrlr->resetting == true);
    2463          37 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2464             : 
    2465          37 :         if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
    2466           0 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reset_destroy_qpairs);
    2467             :         } else {
    2468          37 :                 bdev_nvme_reset_destroy_qpairs(nvme_ctrlr);
    2469             :         }
    2470          37 : }
    2471             : 
    2472             : static int
    2473          35 : bdev_nvme_reset_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    2474             : {
    2475             :         spdk_msg_fn msg_fn;
    2476             : 
    2477          35 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2478          35 :         if (nvme_ctrlr->destruct) {
    2479           3 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2480           3 :                 return -ENXIO;
    2481             :         }
    2482             : 
    2483          32 :         if (nvme_ctrlr->resetting) {
    2484           7 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2485           7 :                 SPDK_NOTICELOG("Unable to perform reset, already in progress.\n");
    2486           7 :                 return -EBUSY;
    2487             :         }
    2488             : 
    2489          25 :         if (nvme_ctrlr->disabled) {
    2490           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2491           0 :                 SPDK_NOTICELOG("Unable to perform reset. Controller is disabled.\n");
    2492           0 :                 return -EALREADY;
    2493             :         }
    2494             : 
    2495          25 :         nvme_ctrlr->resetting = true;
    2496          25 :         nvme_ctrlr->dont_retry = true;
    2497             : 
    2498          25 :         if (nvme_ctrlr->reconnect_is_delayed) {
    2499           1 :                 SPDK_DEBUGLOG(bdev_nvme, "Reconnect is already scheduled.\n");
    2500           1 :                 msg_fn = bdev_nvme_reconnect_ctrlr_now;
    2501           1 :                 nvme_ctrlr->reconnect_is_delayed = false;
    2502             :         } else {
    2503          24 :                 msg_fn = _bdev_nvme_reset_ctrlr;
    2504          24 :                 assert(nvme_ctrlr->reset_start_tsc == 0);
    2505             :         }
    2506             : 
    2507          25 :         nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
    2508             : 
    2509          25 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2510             : 
    2511          25 :         spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
    2512          25 :         return 0;
    2513             : }
    2514             : 
    2515             : static int
    2516           3 : bdev_nvme_enable_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    2517             : {
    2518           3 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2519           3 :         if (nvme_ctrlr->destruct) {
    2520           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2521           0 :                 return -ENXIO;
    2522             :         }
    2523             : 
    2524           3 :         if (nvme_ctrlr->resetting) {
    2525           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2526           0 :                 return -EBUSY;
    2527             :         }
    2528             : 
    2529           3 :         if (!nvme_ctrlr->disabled) {
    2530           1 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2531           1 :                 return -EALREADY;
    2532             :         }
    2533             : 
    2534           2 :         nvme_ctrlr->disabled = false;
    2535           2 :         nvme_ctrlr->resetting = true;
    2536             : 
    2537           2 :         nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
    2538             : 
    2539           2 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2540             : 
    2541           2 :         spdk_thread_send_msg(nvme_ctrlr->thread, bdev_nvme_reconnect_ctrlr_now, nvme_ctrlr);
    2542           2 :         return 0;
    2543             : }
    2544             : 
    2545             : static void
    2546           2 : _bdev_nvme_disable_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
    2547             : {
    2548           2 :         bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
    2549           2 :         void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
    2550             :         enum bdev_nvme_op_after_reset op_after_disable;
    2551             : 
    2552           2 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2553             : 
    2554           2 :         nvme_ctrlr->ctrlr_op_cb_fn = NULL;
    2555           2 :         nvme_ctrlr->ctrlr_op_cb_arg = NULL;
    2556             : 
    2557           2 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2558             : 
    2559           2 :         nvme_ctrlr->resetting = false;
    2560           2 :         nvme_ctrlr->dont_retry = false;
    2561             : 
    2562           2 :         op_after_disable = bdev_nvme_check_op_after_reset(nvme_ctrlr, true);
    2563             : 
    2564           2 :         nvme_ctrlr->disabled = true;
    2565           2 :         spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);
    2566             : 
    2567           2 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2568             : 
    2569           2 :         if (ctrlr_op_cb_fn) {
    2570           0 :                 ctrlr_op_cb_fn(ctrlr_op_cb_arg, 0);
    2571             :         }
    2572             : 
    2573           2 :         switch (op_after_disable) {
    2574           0 :         case OP_COMPLETE_PENDING_DESTRUCT:
    2575           0 :                 nvme_ctrlr_unregister(nvme_ctrlr);
    2576           0 :                 break;
    2577           2 :         default:
    2578           2 :                 break;
    2579             :         }
    2580             : 
    2581           2 : }
    2582             : 
    2583             : static void
    2584           2 : bdev_nvme_disable_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr)
    2585             : {
    2586             :         /* Make sure we clear any pending resets before returning. */
    2587           2 :         nvme_ctrlr_for_each_channel(nvme_ctrlr,
    2588             :                                     bdev_nvme_complete_pending_resets,
    2589             :                                     NULL,
    2590             :                                     _bdev_nvme_disable_ctrlr_complete);
    2591           2 : }
    2592             : 
    2593             : static void
    2594           1 : bdev_nvme_disable_destroy_qpairs_done(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
    2595             : {
    2596           1 :         assert(status == 0);
    2597             : 
    2598           1 :         if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
    2599           0 :                 bdev_nvme_disable_ctrlr_complete(nvme_ctrlr);
    2600             :         } else {
    2601           1 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_disable_ctrlr_complete);
    2602             :         }
    2603           1 : }
    2604             : 
    2605             : static void
    2606           1 : bdev_nvme_disable_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
    2607             : {
    2608           1 :         nvme_ctrlr_for_each_channel(nvme_ctrlr,
    2609             :                                     bdev_nvme_reset_destroy_qpair,
    2610             :                                     NULL,
    2611             :                                     bdev_nvme_disable_destroy_qpairs_done);
    2612           1 : }
    2613             : 
    2614             : static void
    2615           1 : _bdev_nvme_cancel_reconnect_and_disable_ctrlr(void *ctx)
    2616             : {
    2617           1 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    2618             : 
    2619           1 :         assert(nvme_ctrlr->resetting == true);
    2620           1 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2621             : 
    2622           1 :         spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
    2623             : 
    2624           1 :         bdev_nvme_disable_ctrlr_complete(nvme_ctrlr);
    2625           1 : }
    2626             : 
    2627             : static void
    2628           1 : _bdev_nvme_disconnect_and_disable_ctrlr(void *ctx)
    2629             : {
    2630           1 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    2631             : 
    2632           1 :         assert(nvme_ctrlr->resetting == true);
    2633           1 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2634             : 
    2635           1 :         if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
    2636           0 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_disable_destroy_qpairs);
    2637             :         } else {
    2638           1 :                 bdev_nvme_disable_destroy_qpairs(nvme_ctrlr);
    2639             :         }
    2640           1 : }
    2641             : 
    2642             : static int
    2643           5 : bdev_nvme_disable_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    2644             : {
    2645             :         spdk_msg_fn msg_fn;
    2646             : 
    2647           5 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2648           5 :         if (nvme_ctrlr->destruct) {
    2649           1 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2650           1 :                 return -ENXIO;
    2651             :         }
    2652             : 
    2653           4 :         if (nvme_ctrlr->resetting) {
    2654           1 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2655           1 :                 return -EBUSY;
    2656             :         }
    2657             : 
    2658           3 :         if (nvme_ctrlr->disabled) {
    2659           1 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2660           1 :                 return -EALREADY;
    2661             :         }
    2662             : 
    2663           2 :         nvme_ctrlr->resetting = true;
    2664           2 :         nvme_ctrlr->dont_retry = true;
    2665             : 
    2666           2 :         if (nvme_ctrlr->reconnect_is_delayed) {
    2667           1 :                 msg_fn = _bdev_nvme_cancel_reconnect_and_disable_ctrlr;
    2668           1 :                 nvme_ctrlr->reconnect_is_delayed = false;
    2669             :         } else {
    2670           1 :                 msg_fn = _bdev_nvme_disconnect_and_disable_ctrlr;
    2671             :         }
    2672             : 
    2673           2 :         nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
    2674             : 
    2675           2 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2676             : 
    2677           2 :         spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
    2678           2 :         return 0;
    2679             : }
    2680             : 
    2681             : static int
    2682          17 : nvme_ctrlr_op(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op,
    2683             :               bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
    2684             : {
    2685             :         int rc;
    2686             : 
    2687          17 :         switch (op) {
    2688          16 :         case NVME_CTRLR_OP_RESET:
    2689          16 :                 rc = bdev_nvme_reset_ctrlr(nvme_ctrlr);
    2690          16 :                 break;
    2691           0 :         case NVME_CTRLR_OP_ENABLE:
    2692           0 :                 rc = bdev_nvme_enable_ctrlr(nvme_ctrlr);
    2693           0 :                 break;
    2694           0 :         case NVME_CTRLR_OP_DISABLE:
    2695           0 :                 rc = bdev_nvme_disable_ctrlr(nvme_ctrlr);
    2696           0 :                 break;
    2697           1 :         default:
    2698           1 :                 rc = -EINVAL;
    2699           1 :                 break;
    2700             :         }
    2701             : 
    2702          17 :         if (rc == 0) {
    2703           9 :                 assert(nvme_ctrlr->ctrlr_op_cb_fn == NULL);
    2704           9 :                 assert(nvme_ctrlr->ctrlr_op_cb_arg == NULL);
    2705           9 :                 nvme_ctrlr->ctrlr_op_cb_fn = cb_fn;
    2706           9 :                 nvme_ctrlr->ctrlr_op_cb_arg = cb_arg;
    2707             :         }
    2708          17 :         return rc;
    2709             : }
    2710             : 
    2711             : struct nvme_ctrlr_op_rpc_ctx {
    2712             :         struct nvme_ctrlr *nvme_ctrlr;
    2713             :         struct spdk_thread *orig_thread;
    2714             :         enum nvme_ctrlr_op op;
    2715             :         int rc;
    2716             :         bdev_nvme_ctrlr_op_cb cb_fn;
    2717             :         void *cb_arg;
    2718             : };
    2719             : 
    2720             : static void
    2721           4 : _nvme_ctrlr_op_rpc_complete(void *_ctx)
    2722             : {
    2723           4 :         struct nvme_ctrlr_op_rpc_ctx *ctx = _ctx;
    2724             : 
    2725           4 :         assert(ctx != NULL);
    2726           4 :         assert(ctx->cb_fn != NULL);
    2727             : 
    2728           4 :         ctx->cb_fn(ctx->cb_arg, ctx->rc);
    2729             : 
    2730           4 :         free(ctx);
    2731           4 : }
    2732             : 
    2733             : static void
    2734           4 : nvme_ctrlr_op_rpc_complete(void *cb_arg, int rc)
    2735             : {
    2736           4 :         struct nvme_ctrlr_op_rpc_ctx *ctx = cb_arg;
    2737             : 
    2738           4 :         ctx->rc = rc;
    2739             : 
    2740           4 :         spdk_thread_send_msg(ctx->orig_thread, _nvme_ctrlr_op_rpc_complete, ctx);
    2741           4 : }
    2742             : 
    2743             : void
    2744           4 : nvme_ctrlr_op_rpc(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op,
    2745             :                   bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
    2746             : {
    2747             :         struct nvme_ctrlr_op_rpc_ctx *ctx;
    2748             :         int rc;
    2749             : 
    2750           4 :         assert(cb_fn != NULL);
    2751             : 
    2752           4 :         ctx = calloc(1, sizeof(*ctx));
    2753           4 :         if (ctx == NULL) {
    2754           0 :                 SPDK_ERRLOG("Failed to allocate nvme_ctrlr_op_rpc_ctx.\n");
    2755           0 :                 cb_fn(cb_arg, -ENOMEM);
    2756           0 :                 return;
    2757             :         }
    2758             : 
    2759           4 :         ctx->orig_thread = spdk_get_thread();
    2760           4 :         ctx->cb_fn = cb_fn;
    2761           4 :         ctx->cb_arg = cb_arg;
    2762             : 
    2763           4 :         rc = nvme_ctrlr_op(nvme_ctrlr, op, nvme_ctrlr_op_rpc_complete, ctx);
    2764           4 :         if (rc == 0) {
    2765           1 :                 return;
    2766           3 :         } else if (rc == -EALREADY) {
    2767           0 :                 rc = 0;
    2768             :         }
    2769             : 
    2770           3 :         nvme_ctrlr_op_rpc_complete(ctx, rc);
    2771             : }
    2772             : 
    2773             : static void nvme_bdev_ctrlr_op_rpc_continue(void *cb_arg, int rc);
    2774             : 
    2775             : static void
    2776           2 : _nvme_bdev_ctrlr_op_rpc_continue(void *_ctx)
    2777             : {
    2778           2 :         struct nvme_ctrlr_op_rpc_ctx *ctx = _ctx;
    2779             :         struct nvme_ctrlr *prev_nvme_ctrlr, *next_nvme_ctrlr;
    2780             :         int rc;
    2781             : 
    2782           2 :         prev_nvme_ctrlr = ctx->nvme_ctrlr;
    2783           2 :         ctx->nvme_ctrlr = NULL;
    2784             : 
    2785           2 :         if (ctx->rc != 0) {
    2786           0 :                 goto complete;
    2787             :         }
    2788             : 
    2789           2 :         next_nvme_ctrlr = TAILQ_NEXT(prev_nvme_ctrlr, tailq);
    2790           2 :         if (next_nvme_ctrlr == NULL) {
    2791           1 :                 goto complete;
    2792             :         }
    2793             : 
    2794           1 :         rc = nvme_ctrlr_op(next_nvme_ctrlr, ctx->op, nvme_bdev_ctrlr_op_rpc_continue, ctx);
    2795           1 :         if (rc == 0) {
    2796           1 :                 ctx->nvme_ctrlr = next_nvme_ctrlr;
    2797           1 :                 return;
    2798           0 :         } else if (rc == -EALREADY) {
    2799           0 :                 ctx->nvme_ctrlr = next_nvme_ctrlr;
    2800           0 :                 rc = 0;
    2801             :         }
    2802             : 
    2803           0 :         ctx->rc = rc;
    2804             : 
    2805           1 : complete:
    2806           1 :         ctx->cb_fn(ctx->cb_arg, ctx->rc);
    2807           1 :         free(ctx);
    2808             : }
    2809             : 
    2810             : static void
    2811           2 : nvme_bdev_ctrlr_op_rpc_continue(void *cb_arg, int rc)
    2812             : {
    2813           2 :         struct nvme_ctrlr_op_rpc_ctx *ctx = cb_arg;
    2814             : 
    2815           2 :         ctx->rc = rc;
    2816             : 
    2817           2 :         spdk_thread_send_msg(ctx->orig_thread, _nvme_bdev_ctrlr_op_rpc_continue, ctx);
    2818           2 : }
    2819             : 
    2820             : void
    2821           1 : nvme_bdev_ctrlr_op_rpc(struct nvme_bdev_ctrlr *nbdev_ctrlr, enum nvme_ctrlr_op op,
    2822             :                        bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
    2823             : {
    2824             :         struct nvme_ctrlr_op_rpc_ctx *ctx;
    2825             :         struct nvme_ctrlr *nvme_ctrlr;
    2826             :         int rc;
    2827             : 
    2828           1 :         assert(cb_fn != NULL);
    2829             : 
    2830           1 :         ctx = calloc(1, sizeof(*ctx));
    2831           1 :         if (ctx == NULL) {
    2832           0 :                 SPDK_ERRLOG("Failed to allocate nvme_ctrlr_op_rpc_ctx.\n");
    2833           0 :                 cb_fn(cb_arg, -ENOMEM);
    2834           0 :                 return;
    2835             :         }
    2836             : 
    2837           1 :         ctx->orig_thread = spdk_get_thread();
    2838           1 :         ctx->op = op;
    2839           1 :         ctx->cb_fn = cb_fn;
    2840           1 :         ctx->cb_arg = cb_arg;
    2841             : 
    2842           1 :         nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
    2843           1 :         assert(nvme_ctrlr != NULL);
    2844             : 
    2845           1 :         rc = nvme_ctrlr_op(nvme_ctrlr, op, nvme_bdev_ctrlr_op_rpc_continue, ctx);
    2846           1 :         if (rc == 0) {
    2847           1 :                 ctx->nvme_ctrlr = nvme_ctrlr;
    2848           1 :                 return;
    2849           0 :         } else if (rc == -EALREADY) {
    2850           0 :                 ctx->nvme_ctrlr = nvme_ctrlr;
    2851           0 :                 rc = 0;
    2852             :         }
    2853             : 
    2854           0 :         nvme_bdev_ctrlr_op_rpc_continue(ctx, rc);
    2855             : }
    2856             : 
    2857             : static int _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio);
    2858             : 
    2859             : static void
    2860           8 : bdev_nvme_unfreeze_bdev_channel_done(struct nvme_bdev *nbdev, void *ctx, int status)
    2861             : {
    2862           8 :         struct nvme_bdev_io *bio = ctx;
    2863             :         enum spdk_bdev_io_status io_status;
    2864             : 
    2865           8 :         if (bio->cpl.cdw0 == 0) {
    2866           6 :                 io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
    2867             :         } else {
    2868           2 :                 io_status = SPDK_BDEV_IO_STATUS_FAILED;
    2869             :         }
    2870             : 
    2871           8 :         __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), io_status, NULL);
    2872           8 : }
    2873             : 
    2874             : static void
    2875          16 : bdev_nvme_unfreeze_bdev_channel(struct nvme_bdev_channel_iter *i,
    2876             :                                 struct nvme_bdev *nbdev,
    2877             :                                 struct nvme_bdev_channel *nbdev_ch, void *ctx)
    2878             : {
    2879          16 :         bdev_nvme_abort_retry_ios(nbdev_ch);
    2880          16 :         nbdev_ch->resetting = false;
    2881             : 
    2882          16 :         nvme_bdev_for_each_channel_continue(i, 0);
    2883          16 : }
    2884             : 
    2885             : static void
    2886           8 : bdev_nvme_reset_io_complete(struct nvme_bdev_io *bio)
    2887             : {
    2888           8 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    2889           8 :         struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt;
    2890             : 
    2891             :         /* Abort all queued I/Os for retry. */
    2892           8 :         nvme_bdev_for_each_channel(nbdev,
    2893             :                                    bdev_nvme_unfreeze_bdev_channel,
    2894             :                                    bio,
    2895             :                                    bdev_nvme_unfreeze_bdev_channel_done);
    2896           8 : }
    2897             : 
    2898             : static void
    2899          11 : _bdev_nvme_reset_io_continue(void *ctx)
    2900             : {
    2901          11 :         struct nvme_bdev_io *bio = ctx;
    2902             :         struct nvme_io_path *prev_io_path, *next_io_path;
    2903             :         int rc;
    2904             : 
    2905          11 :         prev_io_path = bio->io_path;
    2906          11 :         bio->io_path = NULL;
    2907             : 
    2908          11 :         if (bio->cpl.cdw0 != 0) {
    2909           2 :                 goto complete;
    2910             :         }
    2911             : 
    2912           9 :         next_io_path = STAILQ_NEXT(prev_io_path, stailq);
    2913           9 :         if (next_io_path == NULL) {
    2914           6 :                 goto complete;
    2915             :         }
    2916             : 
    2917           3 :         rc = _bdev_nvme_reset_io(next_io_path, bio);
    2918           3 :         if (rc == 0) {
    2919           3 :                 return;
    2920             :         }
    2921             : 
    2922           0 :         bio->cpl.cdw0 = 1;
    2923             : 
    2924           8 : complete:
    2925           8 :         bdev_nvme_reset_io_complete(bio);
    2926             : }
    2927             : 
    2928             : static void
    2929          11 : bdev_nvme_reset_io_continue(void *cb_arg, int rc)
    2930             : {
    2931          11 :         struct nvme_bdev_io *bio = cb_arg;
    2932          11 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    2933             : 
    2934          11 :         bio->cpl.cdw0 = (rc == 0) ? 0 : 1;
    2935             : 
    2936          11 :         spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), _bdev_nvme_reset_io_continue, bio);
    2937          11 : }
    2938             : 
    2939             : static int
    2940          11 : _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio)
    2941             : {
    2942             :         struct nvme_ctrlr_channel *ctrlr_ch;
    2943             :         int rc;
    2944             : 
    2945          11 :         rc = nvme_ctrlr_op(io_path->qpair->ctrlr, NVME_CTRLR_OP_RESET,
    2946             :                            bdev_nvme_reset_io_continue, bio);
    2947          11 :         if (rc != 0 && rc != -EBUSY) {
    2948           0 :                 return rc;
    2949             :         }
    2950             : 
    2951          11 :         assert(bio->io_path == NULL);
    2952          11 :         bio->io_path = io_path;
    2953             : 
    2954          11 :         if (rc == -EBUSY) {
    2955           5 :                 ctrlr_ch = io_path->qpair->ctrlr_ch;
    2956           5 :                 assert(ctrlr_ch != NULL);
    2957             :                 /*
    2958             :                  * Reset call is queued only if it is from the app framework. This is on purpose so that
    2959             :                  * we don't interfere with the app framework reset strategy. i.e. we are deferring to the
    2960             :                  * upper level. If they are in the middle of a reset, we won't try to schedule another one.
    2961             :                  */
    2962           5 :                 TAILQ_INSERT_TAIL(&ctrlr_ch->pending_resets, bio, retry_link);
    2963             :         }
    2964             : 
    2965          11 :         return 0;
    2966             : }
    2967             : 
    2968             : static void
    2969           8 : bdev_nvme_freeze_bdev_channel_done(struct nvme_bdev *nbdev, void *ctx, int status)
    2970             : {
    2971           8 :         struct nvme_bdev_io *bio = ctx;
    2972           8 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    2973             :         struct nvme_bdev_channel *nbdev_ch;
    2974             :         struct nvme_io_path *io_path;
    2975             :         int rc;
    2976             : 
    2977           8 :         nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
    2978             : 
    2979           8 :         bio->cpl.cdw0 = 0;
    2980             : 
    2981             :         /* Reset all nvme_ctrlrs of a bdev controller sequentially. */
    2982           8 :         io_path = STAILQ_FIRST(&nbdev_ch->io_path_list);
    2983           8 :         assert(io_path != NULL);
    2984             : 
    2985           8 :         rc = _bdev_nvme_reset_io(io_path, bio);
    2986           8 :         if (rc != 0) {
    2987             :                 /* If the current nvme_ctrlr is disabled, skip it and move to the next nvme_ctrlr. */
    2988           0 :                 rc = (rc == -EALREADY) ? 0 : rc;
    2989             : 
    2990           0 :                 bdev_nvme_reset_io_continue(bio, rc);
    2991             :         }
    2992           8 : }
    2993             : 
    2994             : static void
    2995          16 : bdev_nvme_freeze_bdev_channel(struct nvme_bdev_channel_iter *i,
    2996             :                               struct nvme_bdev *nbdev,
    2997             :                               struct nvme_bdev_channel *nbdev_ch, void *ctx)
    2998             : {
    2999          16 :         nbdev_ch->resetting = true;
    3000             : 
    3001          16 :         nvme_bdev_for_each_channel_continue(i, 0);
    3002          16 : }
    3003             : 
    3004             : static void
    3005           8 : bdev_nvme_reset_io(struct nvme_bdev *nbdev, struct nvme_bdev_io *bio)
    3006             : {
    3007           8 :         nvme_bdev_for_each_channel(nbdev,
    3008             :                                    bdev_nvme_freeze_bdev_channel,
    3009             :                                    bio,
    3010             :                                    bdev_nvme_freeze_bdev_channel_done);
    3011           8 : }
    3012             : 
    3013             : static int
    3014          19 : bdev_nvme_failover_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, bool remove)
    3015             : {
    3016          19 :         if (nvme_ctrlr->destruct) {
    3017             :                 /* Don't bother resetting if the controller is in the process of being destructed. */
    3018           2 :                 return -ENXIO;
    3019             :         }
    3020             : 
    3021          17 :         if (nvme_ctrlr->resetting) {
    3022           3 :                 if (!nvme_ctrlr->in_failover) {
    3023           3 :                         SPDK_NOTICELOG("Reset is already in progress. Defer failover until reset completes.\n");
    3024             : 
    3025             :                         /* Defer failover until reset completes. */
    3026           3 :                         nvme_ctrlr->pending_failover = true;
    3027           3 :                         return -EINPROGRESS;
    3028             :                 } else {
    3029           0 :                         SPDK_NOTICELOG("Unable to perform failover, already in progress.\n");
    3030           0 :                         return -EBUSY;
    3031             :                 }
    3032             :         }
    3033             : 
    3034          14 :         bdev_nvme_failover_trid(nvme_ctrlr, remove, true);
    3035             : 
    3036          14 :         if (nvme_ctrlr->reconnect_is_delayed) {
    3037           1 :                 SPDK_NOTICELOG("Reconnect is already scheduled.\n");
    3038             : 
    3039             :                 /* We rely on the next reconnect for the failover. */
    3040           1 :                 return -EALREADY;
    3041             :         }
    3042             : 
    3043          13 :         if (nvme_ctrlr->disabled) {
    3044           0 :                 SPDK_NOTICELOG("Controller is disabled.\n");
    3045             : 
    3046             :                 /* We rely on the enablement for the failover. */
    3047           0 :                 return -EALREADY;
    3048             :         }
    3049             : 
    3050          13 :         nvme_ctrlr->resetting = true;
    3051          13 :         nvme_ctrlr->in_failover = true;
    3052             : 
    3053          13 :         assert(nvme_ctrlr->reset_start_tsc == 0);
    3054          13 :         nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
    3055             : 
    3056          13 :         return 0;
    3057             : }
    3058             : 
    3059             : static int
    3060          17 : bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    3061             : {
    3062             :         int rc;
    3063             : 
    3064          17 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    3065          17 :         rc = bdev_nvme_failover_ctrlr_unsafe(nvme_ctrlr, false);
    3066          17 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    3067             : 
    3068          17 :         if (rc == 0) {
    3069          12 :                 spdk_thread_send_msg(nvme_ctrlr->thread, _bdev_nvme_reset_ctrlr, nvme_ctrlr);
    3070           5 :         } else if (rc == -EALREADY) {
    3071           0 :                 rc = 0;
    3072             :         }
    3073             : 
    3074          17 :         return rc;
    3075             : }
    3076             : 
    3077             : static int bdev_nvme_unmap(struct nvme_bdev_io *bio, uint64_t offset_blocks,
    3078             :                            uint64_t num_blocks);
    3079             : 
    3080             : static int bdev_nvme_write_zeroes(struct nvme_bdev_io *bio, uint64_t offset_blocks,
    3081             :                                   uint64_t num_blocks);
    3082             : 
    3083             : static int bdev_nvme_copy(struct nvme_bdev_io *bio, uint64_t dst_offset_blocks,
    3084             :                           uint64_t src_offset_blocks,
    3085             :                           uint64_t num_blocks);
    3086             : 
    3087             : static void
    3088           1 : bdev_nvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
    3089             :                      bool success)
    3090             : {
    3091           1 :         struct nvme_bdev_io *bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
    3092             :         int ret;
    3093             : 
    3094           1 :         if (!success) {
    3095           0 :                 ret = -EINVAL;
    3096           0 :                 goto exit;
    3097             :         }
    3098             : 
    3099           1 :         if (spdk_unlikely(!nvme_io_path_is_available(bio->io_path))) {
    3100           0 :                 ret = -ENXIO;
    3101           0 :                 goto exit;
    3102             :         }
    3103             : 
    3104           1 :         ret = bdev_nvme_readv(bio,
    3105             :                               bdev_io->u.bdev.iovs,
    3106             :                               bdev_io->u.bdev.iovcnt,
    3107             :                               bdev_io->u.bdev.md_buf,
    3108             :                               bdev_io->u.bdev.num_blocks,
    3109             :                               bdev_io->u.bdev.offset_blocks,
    3110             :                               bdev_io->u.bdev.dif_check_flags,
    3111             :                               bdev_io->u.bdev.memory_domain,
    3112             :                               bdev_io->u.bdev.memory_domain_ctx,
    3113             :                               bdev_io->u.bdev.accel_sequence);
    3114             : 
    3115           1 : exit:
    3116           1 :         if (spdk_unlikely(ret != 0)) {
    3117           0 :                 bdev_nvme_io_complete(bio, ret);
    3118             :         }
    3119           1 : }
    3120             : 
    3121             : static inline void
    3122          52 : _bdev_nvme_submit_request(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io)
    3123             : {
    3124          52 :         struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
    3125          52 :         struct spdk_bdev *bdev = bdev_io->bdev;
    3126             :         struct nvme_bdev_io *nbdev_io_to_abort;
    3127          52 :         int rc = 0;
    3128             : 
    3129          52 :         switch (bdev_io->type) {
    3130           3 :         case SPDK_BDEV_IO_TYPE_READ:
    3131           3 :                 if (bdev_io->u.bdev.iovs && bdev_io->u.bdev.iovs[0].iov_base) {
    3132             : 
    3133           2 :                         rc = bdev_nvme_readv(nbdev_io,
    3134             :                                              bdev_io->u.bdev.iovs,
    3135             :                                              bdev_io->u.bdev.iovcnt,
    3136             :                                              bdev_io->u.bdev.md_buf,
    3137             :                                              bdev_io->u.bdev.num_blocks,
    3138             :                                              bdev_io->u.bdev.offset_blocks,
    3139             :                                              bdev_io->u.bdev.dif_check_flags,
    3140             :                                              bdev_io->u.bdev.memory_domain,
    3141             :                                              bdev_io->u.bdev.memory_domain_ctx,
    3142             :                                              bdev_io->u.bdev.accel_sequence);
    3143             :                 } else {
    3144           1 :                         spdk_bdev_io_get_buf(bdev_io, bdev_nvme_get_buf_cb,
    3145           1 :                                              bdev_io->u.bdev.num_blocks * bdev->blocklen);
    3146           1 :                         rc = 0;
    3147             :                 }
    3148           3 :                 break;
    3149          25 :         case SPDK_BDEV_IO_TYPE_WRITE:
    3150          25 :                 rc = bdev_nvme_writev(nbdev_io,
    3151             :                                       bdev_io->u.bdev.iovs,
    3152             :                                       bdev_io->u.bdev.iovcnt,
    3153             :                                       bdev_io->u.bdev.md_buf,
    3154             :                                       bdev_io->u.bdev.num_blocks,
    3155             :                                       bdev_io->u.bdev.offset_blocks,
    3156             :                                       bdev_io->u.bdev.dif_check_flags,
    3157             :                                       bdev_io->u.bdev.memory_domain,
    3158             :                                       bdev_io->u.bdev.memory_domain_ctx,
    3159             :                                       bdev_io->u.bdev.accel_sequence,
    3160             :                                       bdev_io->u.bdev.nvme_cdw12,
    3161             :                                       bdev_io->u.bdev.nvme_cdw13);
    3162          25 :                 break;
    3163           1 :         case SPDK_BDEV_IO_TYPE_COMPARE:
    3164           1 :                 rc = bdev_nvme_comparev(nbdev_io,
    3165             :                                         bdev_io->u.bdev.iovs,
    3166             :                                         bdev_io->u.bdev.iovcnt,
    3167             :                                         bdev_io->u.bdev.md_buf,
    3168             :                                         bdev_io->u.bdev.num_blocks,
    3169             :                                         bdev_io->u.bdev.offset_blocks,
    3170             :                                         bdev_io->u.bdev.dif_check_flags);
    3171           1 :                 break;
    3172           2 :         case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
    3173           2 :                 rc = bdev_nvme_comparev_and_writev(nbdev_io,
    3174             :                                                    bdev_io->u.bdev.iovs,
    3175             :                                                    bdev_io->u.bdev.iovcnt,
    3176             :                                                    bdev_io->u.bdev.fused_iovs,
    3177             :                                                    bdev_io->u.bdev.fused_iovcnt,
    3178             :                                                    bdev_io->u.bdev.md_buf,
    3179             :                                                    bdev_io->u.bdev.num_blocks,
    3180             :                                                    bdev_io->u.bdev.offset_blocks,
    3181             :                                                    bdev_io->u.bdev.dif_check_flags);
    3182           2 :                 break;
    3183           1 :         case SPDK_BDEV_IO_TYPE_UNMAP:
    3184           1 :                 rc = bdev_nvme_unmap(nbdev_io,
    3185             :                                      bdev_io->u.bdev.offset_blocks,
    3186             :                                      bdev_io->u.bdev.num_blocks);
    3187           1 :                 break;
    3188           0 :         case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
    3189           0 :                 rc =  bdev_nvme_write_zeroes(nbdev_io,
    3190             :                                              bdev_io->u.bdev.offset_blocks,
    3191             :                                              bdev_io->u.bdev.num_blocks);
    3192           0 :                 break;
    3193           8 :         case SPDK_BDEV_IO_TYPE_RESET:
    3194           8 :                 nbdev_io->io_path = NULL;
    3195           8 :                 bdev_nvme_reset_io(bdev->ctxt, nbdev_io);
    3196           8 :                 return;
    3197             : 
    3198           1 :         case SPDK_BDEV_IO_TYPE_FLUSH:
    3199           1 :                 bdev_nvme_io_complete(nbdev_io, 0);
    3200           1 :                 return;
    3201             : 
    3202           0 :         case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
    3203           0 :                 rc = bdev_nvme_zone_appendv(nbdev_io,
    3204             :                                             bdev_io->u.bdev.iovs,
    3205             :                                             bdev_io->u.bdev.iovcnt,
    3206             :                                             bdev_io->u.bdev.md_buf,
    3207             :                                             bdev_io->u.bdev.num_blocks,
    3208             :                                             bdev_io->u.bdev.offset_blocks,
    3209             :                                             bdev_io->u.bdev.dif_check_flags);
    3210           0 :                 break;
    3211           0 :         case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
    3212           0 :                 rc = bdev_nvme_get_zone_info(nbdev_io,
    3213             :                                              bdev_io->u.zone_mgmt.zone_id,
    3214             :                                              bdev_io->u.zone_mgmt.num_zones,
    3215           0 :                                              bdev_io->u.zone_mgmt.buf);
    3216           0 :                 break;
    3217           0 :         case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
    3218           0 :                 rc = bdev_nvme_zone_management(nbdev_io,
    3219             :                                                bdev_io->u.zone_mgmt.zone_id,
    3220             :                                                bdev_io->u.zone_mgmt.zone_action);
    3221           0 :                 break;
    3222           5 :         case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
    3223           5 :                 nbdev_io->io_path = NULL;
    3224           5 :                 bdev_nvme_admin_passthru(nbdev_ch,
    3225             :                                          nbdev_io,
    3226             :                                          &bdev_io->u.nvme_passthru.cmd,
    3227             :                                          bdev_io->u.nvme_passthru.buf,
    3228             :                                          bdev_io->u.nvme_passthru.nbytes);
    3229           5 :                 return;
    3230             : 
    3231           0 :         case SPDK_BDEV_IO_TYPE_NVME_IO:
    3232           0 :                 rc = bdev_nvme_io_passthru(nbdev_io,
    3233             :                                            &bdev_io->u.nvme_passthru.cmd,
    3234             :                                            bdev_io->u.nvme_passthru.buf,
    3235             :                                            bdev_io->u.nvme_passthru.nbytes);
    3236           0 :                 break;
    3237           0 :         case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
    3238           0 :                 rc = bdev_nvme_io_passthru_md(nbdev_io,
    3239             :                                               &bdev_io->u.nvme_passthru.cmd,
    3240             :                                               bdev_io->u.nvme_passthru.buf,
    3241             :                                               bdev_io->u.nvme_passthru.nbytes,
    3242             :                                               bdev_io->u.nvme_passthru.md_buf,
    3243             :                                               bdev_io->u.nvme_passthru.md_len);
    3244           0 :                 break;
    3245           0 :         case SPDK_BDEV_IO_TYPE_NVME_IOV_MD:
    3246           0 :                 rc = bdev_nvme_iov_passthru_md(nbdev_io,
    3247             :                                                &bdev_io->u.nvme_passthru.cmd,
    3248             :                                                bdev_io->u.nvme_passthru.iovs,
    3249             :                                                bdev_io->u.nvme_passthru.iovcnt,
    3250             :                                                bdev_io->u.nvme_passthru.nbytes,
    3251             :                                                bdev_io->u.nvme_passthru.md_buf,
    3252             :                                                bdev_io->u.nvme_passthru.md_len);
    3253           0 :                 break;
    3254           6 :         case SPDK_BDEV_IO_TYPE_ABORT:
    3255           6 :                 nbdev_io->io_path = NULL;
    3256           6 :                 nbdev_io_to_abort = (struct nvme_bdev_io *)bdev_io->u.abort.bio_to_abort->driver_ctx;
    3257           6 :                 bdev_nvme_abort(nbdev_ch,
    3258             :                                 nbdev_io,
    3259             :                                 nbdev_io_to_abort);
    3260           6 :                 return;
    3261             : 
    3262           0 :         case SPDK_BDEV_IO_TYPE_COPY:
    3263           0 :                 rc = bdev_nvme_copy(nbdev_io,
    3264             :                                     bdev_io->u.bdev.offset_blocks,
    3265             :                                     bdev_io->u.bdev.copy.src_offset_blocks,
    3266             :                                     bdev_io->u.bdev.num_blocks);
    3267           0 :                 break;
    3268           0 :         default:
    3269           0 :                 rc = -EINVAL;
    3270           0 :                 break;
    3271             :         }
    3272             : 
    3273          32 :         if (spdk_unlikely(rc != 0)) {
    3274           0 :                 bdev_nvme_io_complete(nbdev_io, rc);
    3275             :         }
    3276             : }
    3277             : 
    3278             : static void
    3279          61 : bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
    3280             : {
    3281          61 :         struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
    3282          61 :         struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
    3283             : 
    3284          61 :         if (spdk_likely(nbdev_io->submit_tsc == 0)) {
    3285          61 :                 nbdev_io->submit_tsc = spdk_bdev_io_get_submit_tsc(bdev_io);
    3286             :         } else {
    3287             :                 /* There are cases where submit_tsc != 0, i.e. retry I/O.
    3288             :                  * We need to update submit_tsc here.
    3289             :                  */
    3290           0 :                 nbdev_io->submit_tsc = spdk_get_ticks();
    3291             :         }
    3292             : 
    3293          61 :         spdk_trace_record(TRACE_BDEV_NVME_IO_START, 0, 0, (uintptr_t)nbdev_io, (uintptr_t)bdev_io);
    3294          61 :         nbdev_io->io_path = bdev_nvme_find_io_path(nbdev_ch);
    3295          61 :         if (spdk_unlikely(!nbdev_io->io_path)) {
    3296          13 :                 if (!bdev_nvme_io_type_is_admin(bdev_io->type)) {
    3297          12 :                         bdev_nvme_io_complete(nbdev_io, -ENXIO);
    3298          12 :                         return;
    3299             :                 }
    3300             : 
    3301             :                 /* Admin commands do not use the optimal I/O path.
    3302             :                  * Simply fall through even if it is not found.
    3303             :                  */
    3304             :         }
    3305             : 
    3306          49 :         _bdev_nvme_submit_request(nbdev_ch, bdev_io);
    3307             : }
    3308             : 
    3309             : static bool
    3310           0 : bdev_nvme_is_supported_csi(enum spdk_nvme_csi csi)
    3311             : {
    3312           0 :         switch (csi) {
    3313           0 :         case SPDK_NVME_CSI_NVM:
    3314           0 :                 return true;
    3315           0 :         case SPDK_NVME_CSI_ZNS:
    3316           0 :                 return true;
    3317           0 :         default:
    3318           0 :                 return false;
    3319             :         }
    3320             : }
    3321             : 
    3322             : static bool
    3323           0 : bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
    3324             : {
    3325           0 :         struct nvme_bdev *nbdev = ctx;
    3326             :         struct nvme_ns *nvme_ns;
    3327             :         struct spdk_nvme_ns *ns;
    3328             :         struct spdk_nvme_ctrlr *ctrlr;
    3329             :         const struct spdk_nvme_ctrlr_data *cdata;
    3330             : 
    3331           0 :         nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
    3332           0 :         assert(nvme_ns != NULL);
    3333           0 :         ns = nvme_ns->ns;
    3334           0 :         if (ns == NULL) {
    3335           0 :                 return false;
    3336             :         }
    3337             : 
    3338           0 :         if (!bdev_nvme_is_supported_csi(spdk_nvme_ns_get_csi(ns))) {
    3339           0 :                 switch (io_type) {
    3340           0 :                 case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
    3341             :                 case SPDK_BDEV_IO_TYPE_NVME_IO:
    3342           0 :                         return true;
    3343             : 
    3344           0 :                 case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
    3345           0 :                         return spdk_nvme_ns_get_md_size(ns) ? true : false;
    3346             : 
    3347           0 :                 default:
    3348           0 :                         return false;
    3349             :                 }
    3350             :         }
    3351             : 
    3352           0 :         ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    3353             : 
    3354           0 :         switch (io_type) {
    3355           0 :         case SPDK_BDEV_IO_TYPE_READ:
    3356             :         case SPDK_BDEV_IO_TYPE_WRITE:
    3357             :         case SPDK_BDEV_IO_TYPE_RESET:
    3358             :         case SPDK_BDEV_IO_TYPE_FLUSH:
    3359             :         case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
    3360             :         case SPDK_BDEV_IO_TYPE_NVME_IO:
    3361             :         case SPDK_BDEV_IO_TYPE_ABORT:
    3362           0 :                 return true;
    3363             : 
    3364           0 :         case SPDK_BDEV_IO_TYPE_COMPARE:
    3365           0 :                 return spdk_nvme_ns_supports_compare(ns);
    3366             : 
    3367           0 :         case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
    3368           0 :                 return spdk_nvme_ns_get_md_size(ns) ? true : false;
    3369             : 
    3370           0 :         case SPDK_BDEV_IO_TYPE_UNMAP:
    3371           0 :                 cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    3372           0 :                 return cdata->oncs.dsm;
    3373             : 
    3374           0 :         case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
    3375           0 :                 cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    3376           0 :                 return cdata->oncs.write_zeroes;
    3377             : 
    3378           0 :         case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
    3379           0 :                 if (spdk_nvme_ctrlr_get_flags(ctrlr) &
    3380             :                     SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED) {
    3381           0 :                         return true;
    3382             :                 }
    3383           0 :                 return false;
    3384             : 
    3385           0 :         case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
    3386             :         case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
    3387           0 :                 return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS;
    3388             : 
    3389           0 :         case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
    3390           0 :                 return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS &&
    3391           0 :                        spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED;
    3392             : 
    3393           0 :         case SPDK_BDEV_IO_TYPE_COPY:
    3394           0 :                 cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    3395           0 :                 return cdata->oncs.copy;
    3396             : 
    3397           0 :         default:
    3398           0 :                 return false;
    3399             :         }
    3400             : }
    3401             : 
    3402             : static int
    3403          59 : nvme_qpair_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ctrlr_channel *ctrlr_ch)
    3404             : {
    3405             :         struct nvme_qpair *nvme_qpair;
    3406             :         struct spdk_io_channel *pg_ch;
    3407             :         int rc;
    3408             : 
    3409          59 :         nvme_qpair = calloc(1, sizeof(*nvme_qpair));
    3410          59 :         if (!nvme_qpair) {
    3411           0 :                 SPDK_ERRLOG("Failed to alloc nvme_qpair.\n");
    3412           0 :                 return -1;
    3413             :         }
    3414             : 
    3415          59 :         TAILQ_INIT(&nvme_qpair->io_path_list);
    3416             : 
    3417          59 :         nvme_qpair->ctrlr = nvme_ctrlr;
    3418          59 :         nvme_qpair->ctrlr_ch = ctrlr_ch;
    3419             : 
    3420          59 :         pg_ch = spdk_get_io_channel(&g_nvme_bdev_ctrlrs);
    3421          59 :         if (!pg_ch) {
    3422           0 :                 free(nvme_qpair);
    3423           0 :                 return -1;
    3424             :         }
    3425             : 
    3426          59 :         nvme_qpair->group = spdk_io_channel_get_ctx(pg_ch);
    3427             : 
    3428             : #ifdef SPDK_CONFIG_VTUNE
    3429             :         nvme_qpair->group->collect_spin_stat = true;
    3430             : #else
    3431          59 :         nvme_qpair->group->collect_spin_stat = false;
    3432             : #endif
    3433             : 
    3434          59 :         if (!nvme_ctrlr->disabled) {
    3435             :                 /* If a nvme_ctrlr is disabled, don't try to create qpair for it. Qpair will
    3436             :                  * be created when it's enabled.
    3437             :                  */
    3438          59 :                 rc = bdev_nvme_create_qpair(nvme_qpair);
    3439          59 :                 if (rc != 0) {
    3440             :                         /* nvme_ctrlr can't create IO qpair if connection is down.
    3441             :                          * If reconnect_delay_sec is non-zero, creating IO qpair is retried
    3442             :                          * after reconnect_delay_sec seconds. If bdev_retry_count is non-zero,
    3443             :                          * submitted IO will be queued until IO qpair is successfully created.
    3444             :                          *
    3445             :                          * Hence, if both are satisfied, ignore the failure.
    3446             :                          */
    3447           0 :                         if (nvme_ctrlr->opts.reconnect_delay_sec == 0 || g_opts.bdev_retry_count == 0) {
    3448           0 :                                 spdk_put_io_channel(pg_ch);
    3449           0 :                                 free(nvme_qpair);
    3450           0 :                                 return rc;
    3451             :                         }
    3452             :                 }
    3453             :         }
    3454             : 
    3455          59 :         TAILQ_INSERT_TAIL(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);
    3456             : 
    3457          59 :         ctrlr_ch->qpair = nvme_qpair;
    3458             : 
    3459          59 :         pthread_mutex_lock(&nvme_qpair->ctrlr->mutex);
    3460          59 :         nvme_qpair->ctrlr->ref++;
    3461          59 :         pthread_mutex_unlock(&nvme_qpair->ctrlr->mutex);
    3462             : 
    3463          59 :         return 0;
    3464             : }
    3465             : 
    3466             : static int
    3467          59 : bdev_nvme_create_ctrlr_channel_cb(void *io_device, void *ctx_buf)
    3468             : {
    3469          59 :         struct nvme_ctrlr *nvme_ctrlr = io_device;
    3470          59 :         struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf;
    3471             : 
    3472          59 :         TAILQ_INIT(&ctrlr_ch->pending_resets);
    3473             : 
    3474          59 :         return nvme_qpair_create(nvme_ctrlr, ctrlr_ch);
    3475             : }
    3476             : 
    3477             : static void
    3478          59 : nvme_qpair_delete(struct nvme_qpair *nvme_qpair)
    3479             : {
    3480             :         struct nvme_io_path *io_path, *next;
    3481             : 
    3482          59 :         assert(nvme_qpair->group != NULL);
    3483             : 
    3484          96 :         TAILQ_FOREACH_SAFE(io_path, &nvme_qpair->io_path_list, tailq, next) {
    3485          37 :                 TAILQ_REMOVE(&nvme_qpair->io_path_list, io_path, tailq);
    3486          37 :                 nvme_io_path_free(io_path);
    3487             :         }
    3488             : 
    3489          59 :         TAILQ_REMOVE(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);
    3490             : 
    3491          59 :         spdk_put_io_channel(spdk_io_channel_from_ctx(nvme_qpair->group));
    3492             : 
    3493          59 :         nvme_ctrlr_release(nvme_qpair->ctrlr);
    3494             : 
    3495          59 :         free(nvme_qpair);
    3496          59 : }
    3497             : 
    3498             : static void
    3499          59 : bdev_nvme_destroy_ctrlr_channel_cb(void *io_device, void *ctx_buf)
    3500             : {
    3501          59 :         struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf;
    3502             :         struct nvme_qpair *nvme_qpair;
    3503             : 
    3504          59 :         nvme_qpair = ctrlr_ch->qpair;
    3505          59 :         assert(nvme_qpair != NULL);
    3506             : 
    3507          59 :         _bdev_nvme_clear_io_path_cache(nvme_qpair);
    3508             : 
    3509          59 :         if (nvme_qpair->qpair != NULL) {
    3510          45 :                 if (ctrlr_ch->reset_iter == NULL) {
    3511          45 :                         spdk_nvme_ctrlr_disconnect_io_qpair(nvme_qpair->qpair);
    3512             :                 } else {
    3513             :                         /* Skip current ctrlr_channel in a full reset sequence because
    3514             :                          * it is being deleted now. The qpair is already being disconnected.
    3515             :                          * We do not have to restart disconnecting it.
    3516             :                          */
    3517           0 :                         nvme_ctrlr_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
    3518             :                 }
    3519             : 
    3520             :                 /* We cannot release a reference to the poll group now.
    3521             :                  * The qpair may be disconnected asynchronously later.
    3522             :                  * We need to poll it until it is actually disconnected.
    3523             :                  * Just detach the qpair from the deleting ctrlr_channel.
    3524             :                  */
    3525          45 :                 nvme_qpair->ctrlr_ch = NULL;
    3526             :         } else {
    3527          14 :                 assert(ctrlr_ch->reset_iter == NULL);
    3528             : 
    3529          14 :                 nvme_qpair_delete(nvme_qpair);
    3530             :         }
    3531          59 : }
    3532             : 
    3533             : static inline struct spdk_io_channel *
    3534           0 : bdev_nvme_get_accel_channel(struct nvme_poll_group *group)
    3535             : {
    3536           0 :         if (spdk_unlikely(!group->accel_channel)) {
    3537           0 :                 group->accel_channel = spdk_accel_get_io_channel();
    3538           0 :                 if (!group->accel_channel) {
    3539           0 :                         SPDK_ERRLOG("Cannot get the accel_channel for bdev nvme polling group=%p\n",
    3540             :                                     group);
    3541           0 :                         return NULL;
    3542             :                 }
    3543             :         }
    3544             : 
    3545           0 :         return group->accel_channel;
    3546             : }
    3547             : 
    3548             : static void
    3549           0 : bdev_nvme_finish_sequence(void *seq, spdk_nvme_accel_completion_cb cb_fn, void *cb_arg)
    3550             : {
    3551           0 :         spdk_accel_sequence_finish(seq, cb_fn, cb_arg);
    3552           0 : }
    3553             : 
    3554             : static void
    3555           0 : bdev_nvme_abort_sequence(void *seq)
    3556             : {
    3557           0 :         spdk_accel_sequence_abort(seq);
    3558           0 : }
    3559             : 
    3560             : static void
    3561           0 : bdev_nvme_reverse_sequence(void *seq)
    3562             : {
    3563           0 :         spdk_accel_sequence_reverse(seq);
    3564           0 : }
    3565             : 
    3566             : static int
    3567           0 : bdev_nvme_append_crc32c(void *ctx, void **seq, uint32_t *dst, struct iovec *iovs, uint32_t iovcnt,
    3568             :                         struct spdk_memory_domain *domain, void *domain_ctx, uint32_t seed,
    3569             :                         spdk_nvme_accel_step_cb cb_fn, void *cb_arg)
    3570             : {
    3571             :         struct spdk_io_channel *ch;
    3572           0 :         struct nvme_poll_group *group = ctx;
    3573             : 
    3574           0 :         ch = bdev_nvme_get_accel_channel(group);
    3575           0 :         if (spdk_unlikely(ch == NULL)) {
    3576           0 :                 return -ENOMEM;
    3577             :         }
    3578             : 
    3579           0 :         return spdk_accel_append_crc32c((struct spdk_accel_sequence **)seq, ch, dst, iovs, iovcnt,
    3580             :                                         domain, domain_ctx, seed, cb_fn, cb_arg);
    3581             : }
    3582             : 
    3583             : static int
    3584           0 : bdev_nvme_append_copy(void *ctx, void **seq, struct iovec *dst_iovs, uint32_t dst_iovcnt,
    3585             :                       struct spdk_memory_domain *dst_domain, void *dst_domain_ctx,
    3586             :                       struct iovec *src_iovs, uint32_t src_iovcnt,
    3587             :                       struct spdk_memory_domain *src_domain, void *src_domain_ctx,
    3588             :                       spdk_nvme_accel_step_cb cb_fn, void *cb_arg)
    3589             : {
    3590             :         struct spdk_io_channel *ch;
    3591           0 :         struct nvme_poll_group *group = ctx;
    3592             : 
    3593           0 :         ch = bdev_nvme_get_accel_channel(group);
    3594           0 :         if (spdk_unlikely(ch == NULL)) {
    3595           0 :                 return -ENOMEM;
    3596             :         }
    3597             : 
    3598           0 :         return spdk_accel_append_copy((struct spdk_accel_sequence **)seq, ch,
    3599             :                                       dst_iovs, dst_iovcnt, dst_domain, dst_domain_ctx,
    3600             :                                       src_iovs, src_iovcnt, src_domain, src_domain_ctx,
    3601             :                                       cb_fn, cb_arg);
    3602             : }
    3603             : 
    3604             : static struct spdk_nvme_accel_fn_table g_bdev_nvme_accel_fn_table = {
    3605             :         .table_size             = sizeof(struct spdk_nvme_accel_fn_table),
    3606             :         .append_crc32c          = bdev_nvme_append_crc32c,
    3607             :         .append_copy            = bdev_nvme_append_copy,
    3608             :         .finish_sequence        = bdev_nvme_finish_sequence,
    3609             :         .reverse_sequence       = bdev_nvme_reverse_sequence,
    3610             :         .abort_sequence         = bdev_nvme_abort_sequence,
    3611             : };
    3612             : 
    3613             : static int
    3614          44 : bdev_nvme_create_poll_group_cb(void *io_device, void *ctx_buf)
    3615             : {
    3616          44 :         struct nvme_poll_group *group = ctx_buf;
    3617             : 
    3618          44 :         TAILQ_INIT(&group->qpair_list);
    3619             : 
    3620          44 :         group->group = spdk_nvme_poll_group_create(group, &g_bdev_nvme_accel_fn_table);
    3621          44 :         if (group->group == NULL) {
    3622           0 :                 return -1;
    3623             :         }
    3624             : 
    3625          44 :         group->poller = SPDK_POLLER_REGISTER(bdev_nvme_poll, group, g_opts.nvme_ioq_poll_period_us);
    3626             : 
    3627          44 :         if (group->poller == NULL) {
    3628           0 :                 spdk_nvme_poll_group_destroy(group->group);
    3629           0 :                 return -1;
    3630             :         }
    3631             : 
    3632          44 :         return 0;
    3633             : }
    3634             : 
    3635             : static void
    3636          44 : bdev_nvme_destroy_poll_group_cb(void *io_device, void *ctx_buf)
    3637             : {
    3638          44 :         struct nvme_poll_group *group = ctx_buf;
    3639             : 
    3640          44 :         assert(TAILQ_EMPTY(&group->qpair_list));
    3641             : 
    3642          44 :         if (group->accel_channel) {
    3643           0 :                 spdk_put_io_channel(group->accel_channel);
    3644             :         }
    3645             : 
    3646          44 :         spdk_poller_unregister(&group->poller);
    3647          44 :         if (spdk_nvme_poll_group_destroy(group->group)) {
    3648           0 :                 SPDK_ERRLOG("Unable to destroy a poll group for the NVMe bdev module.\n");
    3649           0 :                 assert(false);
    3650             :         }
    3651          44 : }
    3652             : 
    3653             : static struct spdk_io_channel *
    3654           0 : bdev_nvme_get_io_channel(void *ctx)
    3655             : {
    3656           0 :         struct nvme_bdev *nvme_bdev = ctx;
    3657             : 
    3658           0 :         return spdk_get_io_channel(nvme_bdev);
    3659             : }
    3660             : 
    3661             : static void *
    3662           0 : bdev_nvme_get_module_ctx(void *ctx)
    3663             : {
    3664           0 :         struct nvme_bdev *nvme_bdev = ctx;
    3665             :         struct nvme_ns *nvme_ns;
    3666             : 
    3667           0 :         if (!nvme_bdev || nvme_bdev->disk.module != &nvme_if) {
    3668           0 :                 return NULL;
    3669             :         }
    3670             : 
    3671           0 :         nvme_ns = TAILQ_FIRST(&nvme_bdev->nvme_ns_list);
    3672           0 :         if (!nvme_ns) {
    3673           0 :                 return NULL;
    3674             :         }
    3675             : 
    3676           0 :         return nvme_ns->ns;
    3677             : }
    3678             : 
    3679             : static const char *
    3680           0 : _nvme_ana_state_str(enum spdk_nvme_ana_state ana_state)
    3681             : {
    3682           0 :         switch (ana_state) {
    3683           0 :         case SPDK_NVME_ANA_OPTIMIZED_STATE:
    3684           0 :                 return "optimized";
    3685           0 :         case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
    3686           0 :                 return "non_optimized";
    3687           0 :         case SPDK_NVME_ANA_INACCESSIBLE_STATE:
    3688           0 :                 return "inaccessible";
    3689           0 :         case SPDK_NVME_ANA_PERSISTENT_LOSS_STATE:
    3690           0 :                 return "persistent_loss";
    3691           0 :         case SPDK_NVME_ANA_CHANGE_STATE:
    3692           0 :                 return "change";
    3693           0 :         default:
    3694           0 :                 return NULL;
    3695             :         }
    3696             : }
    3697             : 
    3698             : static int
    3699           8 : bdev_nvme_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size)
    3700             : {
    3701           8 :         struct spdk_memory_domain **_domains = NULL;
    3702           8 :         struct nvme_bdev *nbdev = ctx;
    3703             :         struct nvme_ns *nvme_ns;
    3704           8 :         int i = 0, _array_size = array_size;
    3705           8 :         int rc = 0;
    3706             : 
    3707          22 :         TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
    3708          14 :                 if (domains && array_size >= i) {
    3709          11 :                         _domains = &domains[i];
    3710             :                 } else {
    3711           3 :                         _domains = NULL;
    3712             :                 }
    3713          14 :                 rc = spdk_nvme_ctrlr_get_memory_domains(nvme_ns->ctrlr->ctrlr, _domains, _array_size);
    3714          14 :                 if (rc > 0) {
    3715          13 :                         i += rc;
    3716          13 :                         if (_array_size >= rc) {
    3717           9 :                                 _array_size -= rc;
    3718             :                         } else {
    3719           4 :                                 _array_size = 0;
    3720             :                         }
    3721           1 :                 } else if (rc < 0) {
    3722           0 :                         return rc;
    3723             :                 }
    3724             :         }
    3725             : 
    3726           8 :         return i;
    3727             : }
    3728             : 
    3729             : static const char *
    3730           0 : nvme_ctrlr_get_state_str(struct nvme_ctrlr *nvme_ctrlr)
    3731             : {
    3732           0 :         if (nvme_ctrlr->destruct) {
    3733           0 :                 return "deleting";
    3734           0 :         } else if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
    3735           0 :                 return "failed";
    3736           0 :         } else if (nvme_ctrlr->resetting) {
    3737           0 :                 return "resetting";
    3738           0 :         } else if (nvme_ctrlr->reconnect_is_delayed > 0) {
    3739           0 :                 return "reconnect_is_delayed";
    3740           0 :         } else if (nvme_ctrlr->disabled) {
    3741           0 :                 return "disabled";
    3742             :         } else {
    3743           0 :                 return "enabled";
    3744             :         }
    3745             : }
    3746             : 
    3747             : void
    3748           0 : nvme_ctrlr_info_json(struct spdk_json_write_ctx *w, struct nvme_ctrlr *nvme_ctrlr)
    3749           0 : {
    3750             :         struct spdk_nvme_transport_id *trid;
    3751             :         const struct spdk_nvme_ctrlr_opts *opts;
    3752             :         const struct spdk_nvme_ctrlr_data *cdata;
    3753             :         struct nvme_path_id *path_id;
    3754             :         int32_t numa_id;
    3755             : 
    3756           0 :         spdk_json_write_object_begin(w);
    3757             : 
    3758           0 :         spdk_json_write_named_string(w, "state", nvme_ctrlr_get_state_str(nvme_ctrlr));
    3759             : 
    3760             : #ifdef SPDK_CONFIG_NVME_CUSE
    3761           0 :         size_t cuse_name_size = 128;
    3762           0 :         char cuse_name[cuse_name_size];
    3763             : 
    3764           0 :         int rc = spdk_nvme_cuse_get_ctrlr_name(nvme_ctrlr->ctrlr, cuse_name, &cuse_name_size);
    3765           0 :         if (rc == 0) {
    3766           0 :                 spdk_json_write_named_string(w, "cuse_device", cuse_name);
    3767             :         }
    3768             : #endif
    3769           0 :         trid = &nvme_ctrlr->active_path_id->trid;
    3770           0 :         spdk_json_write_named_object_begin(w, "trid");
    3771           0 :         nvme_bdev_dump_trid_json(trid, w);
    3772           0 :         spdk_json_write_object_end(w);
    3773             : 
    3774           0 :         path_id = TAILQ_NEXT(nvme_ctrlr->active_path_id, link);
    3775           0 :         if (path_id != NULL) {
    3776           0 :                 spdk_json_write_named_array_begin(w, "alternate_trids");
    3777             :                 do {
    3778           0 :                         trid = &path_id->trid;
    3779           0 :                         spdk_json_write_object_begin(w);
    3780           0 :                         nvme_bdev_dump_trid_json(trid, w);
    3781           0 :                         spdk_json_write_object_end(w);
    3782             : 
    3783           0 :                         path_id = TAILQ_NEXT(path_id, link);
    3784           0 :                 } while (path_id != NULL);
    3785           0 :                 spdk_json_write_array_end(w);
    3786             :         }
    3787             : 
    3788           0 :         cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
    3789           0 :         spdk_json_write_named_uint16(w, "cntlid", cdata->cntlid);
    3790             : 
    3791           0 :         opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
    3792           0 :         spdk_json_write_named_object_begin(w, "host");
    3793           0 :         spdk_json_write_named_string(w, "nqn", opts->hostnqn);
    3794           0 :         spdk_json_write_named_string(w, "addr", opts->src_addr);
    3795           0 :         spdk_json_write_named_string(w, "svcid", opts->src_svcid);
    3796           0 :         spdk_json_write_object_end(w);
    3797             : 
    3798           0 :         numa_id = spdk_nvme_ctrlr_get_numa_id(nvme_ctrlr->ctrlr);
    3799           0 :         if (numa_id != SPDK_ENV_NUMA_ID_ANY) {
    3800           0 :                 spdk_json_write_named_uint32(w, "numa_id", numa_id);
    3801             :         }
    3802           0 :         spdk_json_write_object_end(w);
    3803           0 : }
    3804             : 
    3805             : static void
    3806           0 : nvme_namespace_info_json(struct spdk_json_write_ctx *w,
    3807             :                          struct nvme_ns *nvme_ns)
    3808           0 : {
    3809             :         struct spdk_nvme_ns *ns;
    3810             :         struct spdk_nvme_ctrlr *ctrlr;
    3811             :         const struct spdk_nvme_ctrlr_data *cdata;
    3812             :         const struct spdk_nvme_transport_id *trid;
    3813             :         union spdk_nvme_vs_register vs;
    3814             :         const struct spdk_nvme_ns_data *nsdata;
    3815           0 :         char buf[128];
    3816             : 
    3817           0 :         ns = nvme_ns->ns;
    3818           0 :         if (ns == NULL) {
    3819           0 :                 return;
    3820             :         }
    3821             : 
    3822           0 :         ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    3823             : 
    3824           0 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    3825           0 :         trid = spdk_nvme_ctrlr_get_transport_id(ctrlr);
    3826           0 :         vs = spdk_nvme_ctrlr_get_regs_vs(ctrlr);
    3827             : 
    3828           0 :         spdk_json_write_object_begin(w);
    3829             : 
    3830           0 :         if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
    3831           0 :                 spdk_json_write_named_string(w, "pci_address", trid->traddr);
    3832             :         }
    3833             : 
    3834           0 :         spdk_json_write_named_object_begin(w, "trid");
    3835             : 
    3836           0 :         nvme_bdev_dump_trid_json(trid, w);
    3837             : 
    3838           0 :         spdk_json_write_object_end(w);
    3839             : 
    3840             : #ifdef SPDK_CONFIG_NVME_CUSE
    3841           0 :         size_t cuse_name_size = 128;
    3842           0 :         char cuse_name[cuse_name_size];
    3843             : 
    3844           0 :         int rc = spdk_nvme_cuse_get_ns_name(ctrlr, spdk_nvme_ns_get_id(ns),
    3845             :                                             cuse_name, &cuse_name_size);
    3846           0 :         if (rc == 0) {
    3847           0 :                 spdk_json_write_named_string(w, "cuse_device", cuse_name);
    3848             :         }
    3849             : #endif
    3850             : 
    3851           0 :         spdk_json_write_named_object_begin(w, "ctrlr_data");
    3852             : 
    3853           0 :         spdk_json_write_named_uint16(w, "cntlid", cdata->cntlid);
    3854             : 
    3855           0 :         spdk_json_write_named_string_fmt(w, "vendor_id", "0x%04x", cdata->vid);
    3856             : 
    3857           0 :         snprintf(buf, sizeof(cdata->mn) + 1, "%s", cdata->mn);
    3858           0 :         spdk_str_trim(buf);
    3859           0 :         spdk_json_write_named_string(w, "model_number", buf);
    3860             : 
    3861           0 :         snprintf(buf, sizeof(cdata->sn) + 1, "%s", cdata->sn);
    3862           0 :         spdk_str_trim(buf);
    3863           0 :         spdk_json_write_named_string(w, "serial_number", buf);
    3864             : 
    3865           0 :         snprintf(buf, sizeof(cdata->fr) + 1, "%s", cdata->fr);
    3866           0 :         spdk_str_trim(buf);
    3867           0 :         spdk_json_write_named_string(w, "firmware_revision", buf);
    3868             : 
    3869           0 :         if (cdata->subnqn[0] != '\0') {
    3870           0 :                 spdk_json_write_named_string(w, "subnqn", cdata->subnqn);
    3871             :         }
    3872             : 
    3873           0 :         spdk_json_write_named_object_begin(w, "oacs");
    3874             : 
    3875           0 :         spdk_json_write_named_uint32(w, "security", cdata->oacs.security);
    3876           0 :         spdk_json_write_named_uint32(w, "format", cdata->oacs.format);
    3877           0 :         spdk_json_write_named_uint32(w, "firmware", cdata->oacs.firmware);
    3878           0 :         spdk_json_write_named_uint32(w, "ns_manage", cdata->oacs.ns_manage);
    3879             : 
    3880           0 :         spdk_json_write_object_end(w);
    3881             : 
    3882           0 :         spdk_json_write_named_bool(w, "multi_ctrlr", cdata->cmic.multi_ctrlr);
    3883           0 :         spdk_json_write_named_bool(w, "ana_reporting", cdata->cmic.ana_reporting);
    3884             : 
    3885           0 :         spdk_json_write_object_end(w);
    3886             : 
    3887           0 :         spdk_json_write_named_object_begin(w, "vs");
    3888             : 
    3889           0 :         spdk_json_write_name(w, "nvme_version");
    3890           0 :         if (vs.bits.ter) {
    3891           0 :                 spdk_json_write_string_fmt(w, "%u.%u.%u", vs.bits.mjr, vs.bits.mnr, vs.bits.ter);
    3892             :         } else {
    3893           0 :                 spdk_json_write_string_fmt(w, "%u.%u", vs.bits.mjr, vs.bits.mnr);
    3894             :         }
    3895             : 
    3896           0 :         spdk_json_write_object_end(w);
    3897             : 
    3898           0 :         nsdata = spdk_nvme_ns_get_data(ns);
    3899             : 
    3900           0 :         spdk_json_write_named_object_begin(w, "ns_data");
    3901             : 
    3902           0 :         spdk_json_write_named_uint32(w, "id", spdk_nvme_ns_get_id(ns));
    3903             : 
    3904           0 :         if (cdata->cmic.ana_reporting) {
    3905           0 :                 spdk_json_write_named_string(w, "ana_state",
    3906             :                                              _nvme_ana_state_str(nvme_ns->ana_state));
    3907             :         }
    3908             : 
    3909           0 :         spdk_json_write_named_bool(w, "can_share", nsdata->nmic.can_share);
    3910             : 
    3911           0 :         spdk_json_write_object_end(w);
    3912             : 
    3913           0 :         if (cdata->oacs.security) {
    3914           0 :                 spdk_json_write_named_object_begin(w, "security");
    3915             : 
    3916           0 :                 spdk_json_write_named_bool(w, "opal", nvme_ns->bdev->opal);
    3917             : 
    3918           0 :                 spdk_json_write_object_end(w);
    3919             :         }
    3920             : 
    3921           0 :         spdk_json_write_object_end(w);
    3922             : }
    3923             : 
    3924             : static const char *
    3925           0 : nvme_bdev_get_mp_policy_str(struct nvme_bdev *nbdev)
    3926             : {
    3927           0 :         switch (nbdev->mp_policy) {
    3928           0 :         case BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE:
    3929           0 :                 return "active_passive";
    3930           0 :         case BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE:
    3931           0 :                 return "active_active";
    3932           0 :         default:
    3933           0 :                 assert(false);
    3934             :                 return "invalid";
    3935             :         }
    3936             : }
    3937             : 
    3938             : static const char *
    3939           0 : nvme_bdev_get_mp_selector_str(struct nvme_bdev *nbdev)
    3940             : {
    3941           0 :         switch (nbdev->mp_selector) {
    3942           0 :         case BDEV_NVME_MP_SELECTOR_ROUND_ROBIN:
    3943           0 :                 return "round_robin";
    3944           0 :         case BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH:
    3945           0 :                 return "queue_depth";
    3946           0 :         default:
    3947           0 :                 assert(false);
    3948             :                 return "invalid";
    3949             :         }
    3950             : }
    3951             : 
    3952             : static int
    3953           0 : bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
    3954             : {
    3955           0 :         struct nvme_bdev *nvme_bdev = ctx;
    3956             :         struct nvme_ns *nvme_ns;
    3957             : 
    3958           0 :         pthread_mutex_lock(&nvme_bdev->mutex);
    3959           0 :         spdk_json_write_named_array_begin(w, "nvme");
    3960           0 :         TAILQ_FOREACH(nvme_ns, &nvme_bdev->nvme_ns_list, tailq) {
    3961           0 :                 nvme_namespace_info_json(w, nvme_ns);
    3962             :         }
    3963           0 :         spdk_json_write_array_end(w);
    3964           0 :         spdk_json_write_named_string(w, "mp_policy", nvme_bdev_get_mp_policy_str(nvme_bdev));
    3965           0 :         if (nvme_bdev->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
    3966           0 :                 spdk_json_write_named_string(w, "selector", nvme_bdev_get_mp_selector_str(nvme_bdev));
    3967           0 :                 if (nvme_bdev->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
    3968           0 :                         spdk_json_write_named_uint32(w, "rr_min_io", nvme_bdev->rr_min_io);
    3969             :                 }
    3970             :         }
    3971           0 :         pthread_mutex_unlock(&nvme_bdev->mutex);
    3972             : 
    3973           0 :         return 0;
    3974             : }
    3975             : 
    3976             : static void
    3977           0 : bdev_nvme_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
    3978             : {
    3979             :         /* No config per bdev needed */
    3980           0 : }
    3981             : 
    3982             : static uint64_t
    3983           0 : bdev_nvme_get_spin_time(struct spdk_io_channel *ch)
    3984             : {
    3985           0 :         struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
    3986             :         struct nvme_io_path *io_path;
    3987             :         struct nvme_poll_group *group;
    3988           0 :         uint64_t spin_time = 0;
    3989             : 
    3990           0 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    3991           0 :                 group = io_path->qpair->group;
    3992             : 
    3993           0 :                 if (!group || !group->collect_spin_stat) {
    3994           0 :                         continue;
    3995             :                 }
    3996             : 
    3997           0 :                 if (group->end_ticks != 0) {
    3998           0 :                         group->spin_ticks += (group->end_ticks - group->start_ticks);
    3999           0 :                         group->end_ticks = 0;
    4000             :                 }
    4001             : 
    4002           0 :                 spin_time += group->spin_ticks;
    4003           0 :                 group->start_ticks = 0;
    4004           0 :                 group->spin_ticks = 0;
    4005             :         }
    4006             : 
    4007           0 :         return (spin_time * 1000000ULL) / spdk_get_ticks_hz();
    4008             : }
    4009             : 
    4010             : static void
    4011           0 : bdev_nvme_reset_device_stat(void *ctx)
    4012             : {
    4013           0 :         struct nvme_bdev *nbdev = ctx;
    4014             : 
    4015           0 :         if (nbdev->err_stat != NULL) {
    4016           0 :                 memset(nbdev->err_stat, 0, sizeof(struct nvme_error_stat));
    4017             :         }
    4018           0 : }
    4019             : 
    4020             : /* JSON string should be lowercases and underscore delimited string. */
    4021             : static void
    4022           0 : bdev_nvme_format_nvme_status(char *dst, const char *src)
    4023             : {
    4024           0 :         char tmp[256];
    4025             : 
    4026           0 :         spdk_strcpy_replace(dst, 256, src, " - ", "_");
    4027           0 :         spdk_strcpy_replace(tmp, 256, dst, "-", "_");
    4028           0 :         spdk_strcpy_replace(dst, 256, tmp, " ", "_");
    4029           0 :         spdk_strlwr(dst);
    4030           0 : }
    4031             : 
    4032             : static void
    4033           0 : bdev_nvme_dump_device_stat_json(void *ctx, struct spdk_json_write_ctx *w)
    4034             : {
    4035           0 :         struct nvme_bdev *nbdev = ctx;
    4036           0 :         struct spdk_nvme_status status = {};
    4037             :         uint16_t sct, sc;
    4038           0 :         char status_json[256];
    4039             :         const char *status_str;
    4040             : 
    4041           0 :         if (nbdev->err_stat == NULL) {
    4042           0 :                 return;
    4043             :         }
    4044             : 
    4045           0 :         spdk_json_write_named_object_begin(w, "nvme_error");
    4046             : 
    4047           0 :         spdk_json_write_named_object_begin(w, "status_type");
    4048           0 :         for (sct = 0; sct < 8; sct++) {
    4049           0 :                 if (nbdev->err_stat->status_type[sct] == 0) {
    4050           0 :                         continue;
    4051             :                 }
    4052           0 :                 status.sct = sct;
    4053             : 
    4054           0 :                 status_str = spdk_nvme_cpl_get_status_type_string(&status);
    4055           0 :                 assert(status_str != NULL);
    4056           0 :                 bdev_nvme_format_nvme_status(status_json, status_str);
    4057             : 
    4058           0 :                 spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status_type[sct]);
    4059             :         }
    4060           0 :         spdk_json_write_object_end(w);
    4061             : 
    4062           0 :         spdk_json_write_named_object_begin(w, "status_code");
    4063           0 :         for (sct = 0; sct < 4; sct++) {
    4064           0 :                 status.sct = sct;
    4065           0 :                 for (sc = 0; sc < 256; sc++) {
    4066           0 :                         if (nbdev->err_stat->status[sct][sc] == 0) {
    4067           0 :                                 continue;
    4068             :                         }
    4069           0 :                         status.sc = sc;
    4070             : 
    4071           0 :                         status_str = spdk_nvme_cpl_get_status_string(&status);
    4072           0 :                         assert(status_str != NULL);
    4073           0 :                         bdev_nvme_format_nvme_status(status_json, status_str);
    4074             : 
    4075           0 :                         spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status[sct][sc]);
    4076             :                 }
    4077             :         }
    4078           0 :         spdk_json_write_object_end(w);
    4079             : 
    4080           0 :         spdk_json_write_object_end(w);
    4081             : }
    4082             : 
    4083             : static bool
    4084           0 : bdev_nvme_accel_sequence_supported(void *ctx, enum spdk_bdev_io_type type)
    4085             : {
    4086           0 :         struct nvme_bdev *nbdev = ctx;
    4087             :         struct spdk_nvme_ctrlr *ctrlr;
    4088             : 
    4089           0 :         if (!g_opts.allow_accel_sequence) {
    4090           0 :                 return false;
    4091             :         }
    4092             : 
    4093           0 :         switch (type) {
    4094           0 :         case SPDK_BDEV_IO_TYPE_WRITE:
    4095             :         case SPDK_BDEV_IO_TYPE_READ:
    4096           0 :                 break;
    4097           0 :         default:
    4098           0 :                 return false;
    4099             :         }
    4100             : 
    4101           0 :         ctrlr = bdev_nvme_get_ctrlr(&nbdev->disk);
    4102           0 :         assert(ctrlr != NULL);
    4103             : 
    4104           0 :         return spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ACCEL_SEQUENCE_SUPPORTED;
    4105             : }
    4106             : 
    4107             : static const struct spdk_bdev_fn_table nvmelib_fn_table = {
    4108             :         .destruct                       = bdev_nvme_destruct,
    4109             :         .submit_request                 = bdev_nvme_submit_request,
    4110             :         .io_type_supported              = bdev_nvme_io_type_supported,
    4111             :         .get_io_channel                 = bdev_nvme_get_io_channel,
    4112             :         .dump_info_json                 = bdev_nvme_dump_info_json,
    4113             :         .write_config_json              = bdev_nvme_write_config_json,
    4114             :         .get_spin_time                  = bdev_nvme_get_spin_time,
    4115             :         .get_module_ctx                 = bdev_nvme_get_module_ctx,
    4116             :         .get_memory_domains             = bdev_nvme_get_memory_domains,
    4117             :         .accel_sequence_supported       = bdev_nvme_accel_sequence_supported,
    4118             :         .reset_device_stat              = bdev_nvme_reset_device_stat,
    4119             :         .dump_device_stat_json          = bdev_nvme_dump_device_stat_json,
    4120             : };
    4121             : 
    4122             : typedef int (*bdev_nvme_parse_ana_log_page_cb)(
    4123             :         const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg);
    4124             : 
    4125             : static int
    4126          41 : bdev_nvme_parse_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
    4127             :                              bdev_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg)
    4128             : {
    4129             :         struct spdk_nvme_ana_group_descriptor *copied_desc;
    4130             :         uint8_t *orig_desc;
    4131             :         uint32_t i, desc_size, copy_len;
    4132          41 :         int rc = 0;
    4133             : 
    4134          41 :         if (nvme_ctrlr->ana_log_page == NULL) {
    4135           0 :                 return -EINVAL;
    4136             :         }
    4137             : 
    4138          41 :         copied_desc = nvme_ctrlr->copied_ana_desc;
    4139             : 
    4140          41 :         orig_desc = (uint8_t *)nvme_ctrlr->ana_log_page + sizeof(struct spdk_nvme_ana_page);
    4141          41 :         copy_len = nvme_ctrlr->max_ana_log_page_size - sizeof(struct spdk_nvme_ana_page);
    4142             : 
    4143          71 :         for (i = 0; i < nvme_ctrlr->ana_log_page->num_ana_group_desc; i++) {
    4144          66 :                 memcpy(copied_desc, orig_desc, copy_len);
    4145             : 
    4146          66 :                 rc = cb_fn(copied_desc, cb_arg);
    4147          66 :                 if (rc != 0) {
    4148          36 :                         break;
    4149             :                 }
    4150             : 
    4151          30 :                 desc_size = sizeof(struct spdk_nvme_ana_group_descriptor) +
    4152          30 :                             copied_desc->num_of_nsid * sizeof(uint32_t);
    4153          30 :                 orig_desc += desc_size;
    4154          30 :                 copy_len -= desc_size;
    4155             :         }
    4156             : 
    4157          41 :         return rc;
    4158             : }
    4159             : 
    4160             : static int
    4161           5 : nvme_ns_ana_transition_timedout(void *ctx)
    4162             : {
    4163           5 :         struct nvme_ns *nvme_ns = ctx;
    4164             : 
    4165           5 :         spdk_poller_unregister(&nvme_ns->anatt_timer);
    4166           5 :         nvme_ns->ana_transition_timedout = true;
    4167             : 
    4168           5 :         return SPDK_POLLER_BUSY;
    4169             : }
    4170             : 
    4171             : static void
    4172          45 : _nvme_ns_set_ana_state(struct nvme_ns *nvme_ns,
    4173             :                        const struct spdk_nvme_ana_group_descriptor *desc)
    4174             : {
    4175             :         const struct spdk_nvme_ctrlr_data *cdata;
    4176             : 
    4177          45 :         nvme_ns->ana_group_id = desc->ana_group_id;
    4178          45 :         nvme_ns->ana_state = desc->ana_state;
    4179          45 :         nvme_ns->ana_state_updating = false;
    4180             : 
    4181          45 :         switch (nvme_ns->ana_state) {
    4182          38 :         case SPDK_NVME_ANA_OPTIMIZED_STATE:
    4183             :         case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
    4184          38 :                 nvme_ns->ana_transition_timedout = false;
    4185          38 :                 spdk_poller_unregister(&nvme_ns->anatt_timer);
    4186          38 :                 break;
    4187             : 
    4188           6 :         case SPDK_NVME_ANA_INACCESSIBLE_STATE:
    4189             :         case SPDK_NVME_ANA_CHANGE_STATE:
    4190           6 :                 if (nvme_ns->anatt_timer != NULL) {
    4191           1 :                         break;
    4192             :                 }
    4193             : 
    4194           5 :                 cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
    4195           5 :                 nvme_ns->anatt_timer = SPDK_POLLER_REGISTER(nvme_ns_ana_transition_timedout,
    4196             :                                        nvme_ns,
    4197             :                                        cdata->anatt * SPDK_SEC_TO_USEC);
    4198           5 :                 break;
    4199           1 :         default:
    4200           1 :                 break;
    4201             :         }
    4202          45 : }
    4203             : 
    4204             : static int
    4205          59 : nvme_ns_set_ana_state(const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg)
    4206             : {
    4207          59 :         struct nvme_ns *nvme_ns = cb_arg;
    4208             :         uint32_t i;
    4209             : 
    4210          59 :         assert(nvme_ns->ns != NULL);
    4211             : 
    4212          81 :         for (i = 0; i < desc->num_of_nsid; i++) {
    4213          58 :                 if (desc->nsid[i] != spdk_nvme_ns_get_id(nvme_ns->ns)) {
    4214          22 :                         continue;
    4215             :                 }
    4216             : 
    4217          36 :                 _nvme_ns_set_ana_state(nvme_ns, desc);
    4218          36 :                 return 1;
    4219             :         }
    4220             : 
    4221          23 :         return 0;
    4222             : }
    4223             : 
    4224             : static int
    4225           5 : nvme_generate_uuid(const char *sn, uint32_t nsid, struct spdk_uuid *uuid)
    4226             : {
    4227           5 :         int rc = 0;
    4228           5 :         struct spdk_uuid new_uuid, namespace_uuid;
    4229           5 :         char merged_str[SPDK_NVME_CTRLR_SN_LEN + NSID_STR_LEN + 1] = {'\0'};
    4230             :         /* This namespace UUID was generated using uuid_generate() method. */
    4231           5 :         const char *namespace_str = {"edaed2de-24bc-4b07-b559-f47ecbe730fd"};
    4232             :         int size;
    4233             : 
    4234           5 :         assert(strlen(sn) <= SPDK_NVME_CTRLR_SN_LEN);
    4235             : 
    4236           5 :         spdk_uuid_set_null(&new_uuid);
    4237           5 :         spdk_uuid_set_null(&namespace_uuid);
    4238             : 
    4239           5 :         size = snprintf(merged_str, sizeof(merged_str), "%s%"PRIu32, sn, nsid);
    4240           5 :         if (size <= 0 || (unsigned long)size >= sizeof(merged_str)) {
    4241           0 :                 return -EINVAL;
    4242             :         }
    4243             : 
    4244           5 :         spdk_uuid_parse(&namespace_uuid, namespace_str);
    4245             : 
    4246           5 :         rc = spdk_uuid_generate_sha1(&new_uuid, &namespace_uuid, merged_str, size);
    4247           5 :         if (rc == 0) {
    4248           5 :                 memcpy(uuid, &new_uuid, sizeof(struct spdk_uuid));
    4249             :         }
    4250             : 
    4251           5 :         return rc;
    4252             : }
    4253             : 
    4254             : static int
    4255          38 : nvme_disk_create(struct spdk_bdev *disk, const char *base_name,
    4256             :                  struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns,
    4257             :                  struct spdk_bdev_nvme_ctrlr_opts *bdev_opts, void *ctx)
    4258             : {
    4259             :         const struct spdk_uuid          *uuid;
    4260             :         const uint8_t *nguid;
    4261             :         const struct spdk_nvme_ctrlr_data *cdata;
    4262             :         const struct spdk_nvme_ns_data  *nsdata;
    4263             :         const struct spdk_nvme_ctrlr_opts *opts;
    4264             :         enum spdk_nvme_csi              csi;
    4265             :         uint32_t atomic_bs, phys_bs, bs;
    4266          38 :         char sn_tmp[SPDK_NVME_CTRLR_SN_LEN + 1] = {'\0'};
    4267             :         int rc;
    4268             : 
    4269          38 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    4270          38 :         csi = spdk_nvme_ns_get_csi(ns);
    4271          38 :         opts = spdk_nvme_ctrlr_get_opts(ctrlr);
    4272             : 
    4273          38 :         switch (csi) {
    4274          38 :         case SPDK_NVME_CSI_NVM:
    4275          38 :                 disk->product_name = "NVMe disk";
    4276          38 :                 break;
    4277           0 :         case SPDK_NVME_CSI_ZNS:
    4278           0 :                 disk->product_name = "NVMe ZNS disk";
    4279           0 :                 disk->zoned = true;
    4280           0 :                 disk->zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
    4281           0 :                 disk->max_zone_append_size = spdk_nvme_zns_ctrlr_get_max_zone_append_size(ctrlr) /
    4282           0 :                                              spdk_nvme_ns_get_extended_sector_size(ns);
    4283           0 :                 disk->max_open_zones = spdk_nvme_zns_ns_get_max_open_zones(ns);
    4284           0 :                 disk->max_active_zones = spdk_nvme_zns_ns_get_max_active_zones(ns);
    4285           0 :                 break;
    4286           0 :         default:
    4287           0 :                 if (bdev_opts->allow_unrecognized_csi) {
    4288           0 :                         disk->product_name = "NVMe Passthrough disk";
    4289           0 :                         break;
    4290             :                 }
    4291           0 :                 SPDK_ERRLOG("unsupported CSI: %u\n", csi);
    4292           0 :                 return -ENOTSUP;
    4293             :         }
    4294             : 
    4295          38 :         nguid = spdk_nvme_ns_get_nguid(ns);
    4296          38 :         if (!nguid) {
    4297          38 :                 uuid = spdk_nvme_ns_get_uuid(ns);
    4298          38 :                 if (uuid) {
    4299          12 :                         disk->uuid = *uuid;
    4300          26 :                 } else if (g_opts.generate_uuids) {
    4301           0 :                         spdk_strcpy_pad(sn_tmp, cdata->sn, SPDK_NVME_CTRLR_SN_LEN, '\0');
    4302           0 :                         rc = nvme_generate_uuid(sn_tmp, spdk_nvme_ns_get_id(ns), &disk->uuid);
    4303           0 :                         if (rc < 0) {
    4304           0 :                                 SPDK_ERRLOG("UUID generation failed (%s)\n", spdk_strerror(-rc));
    4305           0 :                                 return rc;
    4306             :                         }
    4307             :                 }
    4308             :         } else {
    4309           0 :                 memcpy(&disk->uuid, nguid, sizeof(disk->uuid));
    4310             :         }
    4311             : 
    4312          38 :         disk->name = spdk_sprintf_alloc("%sn%d", base_name, spdk_nvme_ns_get_id(ns));
    4313          38 :         if (!disk->name) {
    4314           0 :                 return -ENOMEM;
    4315             :         }
    4316             : 
    4317          38 :         disk->write_cache = 0;
    4318          38 :         if (cdata->vwc.present) {
    4319             :                 /* Enable if the Volatile Write Cache exists */
    4320           0 :                 disk->write_cache = 1;
    4321             :         }
    4322          38 :         if (cdata->oncs.write_zeroes) {
    4323           0 :                 disk->max_write_zeroes = UINT16_MAX + 1;
    4324             :         }
    4325          38 :         disk->blocklen = spdk_nvme_ns_get_extended_sector_size(ns);
    4326          38 :         disk->blockcnt = spdk_nvme_ns_get_num_sectors(ns);
    4327          38 :         disk->max_segment_size = spdk_nvme_ctrlr_get_max_xfer_size(ctrlr);
    4328          38 :         disk->ctratt.raw = cdata->ctratt.raw;
    4329             :         /* NVMe driver will split one request into multiple requests
    4330             :          * based on MDTS and stripe boundary, the bdev layer will use
    4331             :          * max_segment_size and max_num_segments to split one big IO
    4332             :          * into multiple requests, then small request can't run out
    4333             :          * of NVMe internal requests data structure.
    4334             :          */
    4335          38 :         if (opts && opts->io_queue_requests) {
    4336           0 :                 disk->max_num_segments = opts->io_queue_requests / 2;
    4337             :         }
    4338          38 :         if (spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_SGL_SUPPORTED) {
    4339             :                 /* The nvme driver will try to split I/O that have too many
    4340             :                  * SGEs, but it doesn't work if that last SGE doesn't end on
    4341             :                  * an aggregate total that is block aligned. The bdev layer has
    4342             :                  * a more robust splitting framework, so use that instead for
    4343             :                  * this case. (See issue #3269.)
    4344             :                  */
    4345           0 :                 uint16_t max_sges = spdk_nvme_ctrlr_get_max_sges(ctrlr);
    4346             : 
    4347           0 :                 if (disk->max_num_segments == 0) {
    4348           0 :                         disk->max_num_segments = max_sges;
    4349             :                 } else {
    4350           0 :                         disk->max_num_segments = spdk_min(disk->max_num_segments, max_sges);
    4351             :                 }
    4352             :         }
    4353          38 :         disk->optimal_io_boundary = spdk_nvme_ns_get_optimal_io_boundary(ns);
    4354             : 
    4355          38 :         nsdata = spdk_nvme_ns_get_data(ns);
    4356          38 :         bs = spdk_nvme_ns_get_sector_size(ns);
    4357          38 :         atomic_bs = bs;
    4358          38 :         phys_bs = bs;
    4359          38 :         if (nsdata->nabo == 0) {
    4360          38 :                 if (nsdata->nsfeat.ns_atomic_write_unit && nsdata->nawupf) {
    4361           0 :                         atomic_bs = bs * (1 + nsdata->nawupf);
    4362             :                 } else {
    4363          38 :                         atomic_bs = bs * (1 + cdata->awupf);
    4364             :                 }
    4365             :         }
    4366          38 :         if (nsdata->nsfeat.optperf) {
    4367           0 :                 phys_bs = bs * (1 + nsdata->npwg);
    4368             :         }
    4369          38 :         disk->phys_blocklen = spdk_min(phys_bs, atomic_bs);
    4370             : 
    4371          38 :         disk->md_len = spdk_nvme_ns_get_md_size(ns);
    4372          38 :         if (disk->md_len != 0) {
    4373           0 :                 disk->md_interleave = nsdata->flbas.extended;
    4374           0 :                 disk->dif_type = (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns);
    4375           0 :                 if (disk->dif_type != SPDK_DIF_DISABLE) {
    4376           0 :                         disk->dif_is_head_of_md = nsdata->dps.md_start;
    4377           0 :                         disk->dif_check_flags = bdev_opts->prchk_flags;
    4378           0 :                         disk->dif_pi_format = (enum spdk_dif_pi_format)spdk_nvme_ns_get_pi_format(ns);
    4379             :                 }
    4380             :         }
    4381             : 
    4382          38 :         if (!(spdk_nvme_ctrlr_get_flags(ctrlr) &
    4383             :               SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED)) {
    4384          38 :                 disk->acwu = 0;
    4385           0 :         } else if (nsdata->nsfeat.ns_atomic_write_unit) {
    4386           0 :                 disk->acwu = nsdata->nacwu + 1; /* 0-based */
    4387             :         } else {
    4388           0 :                 disk->acwu = cdata->acwu + 1; /* 0-based */
    4389             :         }
    4390             : 
    4391          38 :         if (cdata->oncs.copy) {
    4392             :                 /* For now bdev interface allows only single segment copy */
    4393           0 :                 disk->max_copy = nsdata->mssrl;
    4394             :         }
    4395             : 
    4396          38 :         disk->ctxt = ctx;
    4397          38 :         disk->fn_table = &nvmelib_fn_table;
    4398          38 :         disk->module = &nvme_if;
    4399             : 
    4400          38 :         disk->numa.id_valid = 1;
    4401          38 :         disk->numa.id = spdk_nvme_ctrlr_get_numa_id(ctrlr);
    4402             : 
    4403          38 :         return 0;
    4404             : }
    4405             : 
    4406             : static struct nvme_bdev *
    4407          38 : nvme_bdev_alloc(void)
    4408             : {
    4409             :         struct nvme_bdev *bdev;
    4410             :         int rc;
    4411             : 
    4412          38 :         bdev = calloc(1, sizeof(*bdev));
    4413          38 :         if (!bdev) {
    4414           0 :                 SPDK_ERRLOG("bdev calloc() failed\n");
    4415           0 :                 return NULL;
    4416             :         }
    4417             : 
    4418          38 :         if (g_opts.nvme_error_stat) {
    4419           0 :                 bdev->err_stat = calloc(1, sizeof(struct nvme_error_stat));
    4420           0 :                 if (!bdev->err_stat) {
    4421           0 :                         SPDK_ERRLOG("err_stat calloc() failed\n");
    4422           0 :                         free(bdev);
    4423           0 :                         return NULL;
    4424             :                 }
    4425             :         }
    4426             : 
    4427          38 :         rc = pthread_mutex_init(&bdev->mutex, NULL);
    4428          38 :         if (rc != 0) {
    4429           0 :                 free(bdev->err_stat);
    4430           0 :                 free(bdev);
    4431           0 :                 return NULL;
    4432             :         }
    4433             : 
    4434          38 :         bdev->ref = 1;
    4435          38 :         bdev->mp_policy = BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE;
    4436          38 :         bdev->mp_selector = BDEV_NVME_MP_SELECTOR_ROUND_ROBIN;
    4437          38 :         bdev->rr_min_io = UINT32_MAX;
    4438          38 :         TAILQ_INIT(&bdev->nvme_ns_list);
    4439             : 
    4440          38 :         return bdev;
    4441             : }
    4442             : 
    4443             : static int
    4444          38 : nvme_bdev_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
    4445             : {
    4446             :         struct nvme_bdev *bdev;
    4447          38 :         struct nvme_bdev_ctrlr *nbdev_ctrlr = nvme_ctrlr->nbdev_ctrlr;
    4448             :         int rc;
    4449             : 
    4450          38 :         bdev = nvme_bdev_alloc();
    4451          38 :         if (bdev == NULL) {
    4452           0 :                 SPDK_ERRLOG("Failed to allocate NVMe bdev\n");
    4453           0 :                 return -ENOMEM;
    4454             :         }
    4455             : 
    4456          38 :         bdev->opal = nvme_ctrlr->opal_dev != NULL;
    4457             : 
    4458          38 :         rc = nvme_disk_create(&bdev->disk, nbdev_ctrlr->name, nvme_ctrlr->ctrlr,
    4459             :                               nvme_ns->ns, &nvme_ctrlr->opts, bdev);
    4460          38 :         if (rc != 0) {
    4461           0 :                 SPDK_ERRLOG("Failed to create NVMe disk\n");
    4462           0 :                 nvme_bdev_free(bdev);
    4463           0 :                 return rc;
    4464             :         }
    4465             : 
    4466          38 :         spdk_io_device_register(bdev,
    4467             :                                 bdev_nvme_create_bdev_channel_cb,
    4468             :                                 bdev_nvme_destroy_bdev_channel_cb,
    4469             :                                 sizeof(struct nvme_bdev_channel),
    4470          38 :                                 bdev->disk.name);
    4471             : 
    4472          38 :         nvme_ns->bdev = bdev;
    4473          38 :         bdev->nsid = nvme_ns->id;
    4474          38 :         TAILQ_INSERT_TAIL(&bdev->nvme_ns_list, nvme_ns, tailq);
    4475             : 
    4476          38 :         bdev->nbdev_ctrlr = nbdev_ctrlr;
    4477          38 :         TAILQ_INSERT_TAIL(&nbdev_ctrlr->bdevs, bdev, tailq);
    4478             : 
    4479          38 :         rc = spdk_bdev_register(&bdev->disk);
    4480          38 :         if (rc != 0) {
    4481           1 :                 SPDK_ERRLOG("spdk_bdev_register() failed\n");
    4482           1 :                 spdk_io_device_unregister(bdev, NULL);
    4483           1 :                 nvme_ns->bdev = NULL;
    4484           1 :                 TAILQ_REMOVE(&nbdev_ctrlr->bdevs, bdev, tailq);
    4485           1 :                 nvme_bdev_free(bdev);
    4486           1 :                 return rc;
    4487             :         }
    4488             : 
    4489          37 :         return 0;
    4490             : }
    4491             : 
    4492             : static bool
    4493          23 : bdev_nvme_compare_ns(struct spdk_nvme_ns *ns1, struct spdk_nvme_ns *ns2)
    4494             : {
    4495             :         const struct spdk_nvme_ns_data *nsdata1, *nsdata2;
    4496             :         const struct spdk_uuid *uuid1, *uuid2;
    4497             : 
    4498          23 :         nsdata1 = spdk_nvme_ns_get_data(ns1);
    4499          23 :         nsdata2 = spdk_nvme_ns_get_data(ns2);
    4500          23 :         uuid1 = spdk_nvme_ns_get_uuid(ns1);
    4501          23 :         uuid2 = spdk_nvme_ns_get_uuid(ns2);
    4502             : 
    4503          45 :         return memcmp(nsdata1->nguid, nsdata2->nguid, sizeof(nsdata1->nguid)) == 0 &&
    4504          22 :                nsdata1->eui64 == nsdata2->eui64 &&
    4505          21 :                ((uuid1 == NULL && uuid2 == NULL) ||
    4506          59 :                 (uuid1 != NULL && uuid2 != NULL && spdk_uuid_compare(uuid1, uuid2) == 0)) &&
    4507          18 :                spdk_nvme_ns_get_csi(ns1) == spdk_nvme_ns_get_csi(ns2);
    4508             : }
    4509             : 
    4510             : static bool
    4511           0 : hotplug_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    4512             :                  struct spdk_nvme_ctrlr_opts *opts)
    4513             : {
    4514             :         struct nvme_probe_skip_entry *entry;
    4515             : 
    4516           0 :         TAILQ_FOREACH(entry, &g_skipped_nvme_ctrlrs, tailq) {
    4517           0 :                 if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
    4518           0 :                         return false;
    4519             :                 }
    4520             :         }
    4521             : 
    4522           0 :         opts->arbitration_burst = (uint8_t)g_opts.arbitration_burst;
    4523           0 :         opts->low_priority_weight = (uint8_t)g_opts.low_priority_weight;
    4524           0 :         opts->medium_priority_weight = (uint8_t)g_opts.medium_priority_weight;
    4525           0 :         opts->high_priority_weight = (uint8_t)g_opts.high_priority_weight;
    4526           0 :         opts->disable_read_ana_log_page = true;
    4527             : 
    4528           0 :         SPDK_DEBUGLOG(bdev_nvme, "Attaching to %s\n", trid->traddr);
    4529             : 
    4530           0 :         return true;
    4531             : }
    4532             : 
    4533             : static void
    4534           0 : nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl)
    4535             : {
    4536           0 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    4537             : 
    4538           0 :         if (spdk_nvme_cpl_is_error(cpl)) {
    4539           0 :                 SPDK_WARNLOG("Abort failed. Resetting controller. sc is %u, sct is %u.\n", cpl->status.sc,
    4540             :                              cpl->status.sct);
    4541           0 :                 bdev_nvme_reset_ctrlr(nvme_ctrlr);
    4542           0 :         } else if (cpl->cdw0 & 0x1) {
    4543           0 :                 SPDK_WARNLOG("Specified command could not be aborted.\n");
    4544           0 :                 bdev_nvme_reset_ctrlr(nvme_ctrlr);
    4545             :         }
    4546           0 : }
    4547             : 
    4548             : static void
    4549           0 : timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
    4550             :            struct spdk_nvme_qpair *qpair, uint16_t cid)
    4551             : {
    4552           0 :         struct nvme_ctrlr *nvme_ctrlr = cb_arg;
    4553             :         union spdk_nvme_csts_register csts;
    4554             :         int rc;
    4555             : 
    4556           0 :         assert(nvme_ctrlr->ctrlr == ctrlr);
    4557             : 
    4558           0 :         SPDK_WARNLOG("Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n", ctrlr, qpair, cid);
    4559             : 
    4560             :         /* Only try to read CSTS if it's a PCIe controller or we have a timeout on an I/O
    4561             :          * queue.  (Note: qpair == NULL when there's an admin cmd timeout.)  Otherwise we
    4562             :          * would submit another fabrics cmd on the admin queue to read CSTS and check for its
    4563             :          * completion recursively.
    4564             :          */
    4565           0 :         if (nvme_ctrlr->active_path_id->trid.trtype == SPDK_NVME_TRANSPORT_PCIE || qpair != NULL) {
    4566           0 :                 csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr);
    4567           0 :                 if (csts.bits.cfs) {
    4568           0 :                         SPDK_ERRLOG("Controller Fatal Status, reset required\n");
    4569           0 :                         bdev_nvme_reset_ctrlr(nvme_ctrlr);
    4570           0 :                         return;
    4571             :                 }
    4572             :         }
    4573             : 
    4574           0 :         switch (g_opts.action_on_timeout) {
    4575           0 :         case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT:
    4576           0 :                 if (qpair) {
    4577             :                         /* Don't send abort to ctrlr when ctrlr is not available. */
    4578           0 :                         pthread_mutex_lock(&nvme_ctrlr->mutex);
    4579           0 :                         if (!nvme_ctrlr_is_available(nvme_ctrlr)) {
    4580           0 :                                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4581           0 :                                 SPDK_NOTICELOG("Quit abort. Ctrlr is not available.\n");
    4582           0 :                                 return;
    4583             :                         }
    4584           0 :                         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4585             : 
    4586           0 :                         rc = spdk_nvme_ctrlr_cmd_abort(ctrlr, qpair, cid,
    4587             :                                                        nvme_abort_cpl, nvme_ctrlr);
    4588           0 :                         if (rc == 0) {
    4589           0 :                                 return;
    4590             :                         }
    4591             : 
    4592           0 :                         SPDK_ERRLOG("Unable to send abort. Resetting, rc is %d.\n", rc);
    4593             :                 }
    4594             : 
    4595             :         /* FALLTHROUGH */
    4596             :         case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET:
    4597           0 :                 bdev_nvme_reset_ctrlr(nvme_ctrlr);
    4598           0 :                 break;
    4599           0 :         case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE:
    4600           0 :                 SPDK_DEBUGLOG(bdev_nvme, "No action for nvme controller timeout.\n");
    4601           0 :                 break;
    4602           0 :         default:
    4603           0 :                 SPDK_ERRLOG("An invalid timeout action value is found.\n");
    4604           0 :                 break;
    4605             :         }
    4606             : }
    4607             : 
    4608             : static struct nvme_ns *
    4609          51 : nvme_ns_alloc(void)
    4610             : {
    4611             :         struct nvme_ns *nvme_ns;
    4612             : 
    4613          51 :         nvme_ns = calloc(1, sizeof(struct nvme_ns));
    4614          51 :         if (nvme_ns == NULL) {
    4615           0 :                 return NULL;
    4616             :         }
    4617             : 
    4618          51 :         if (g_opts.io_path_stat) {
    4619           0 :                 nvme_ns->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
    4620           0 :                 if (nvme_ns->stat == NULL) {
    4621           0 :                         free(nvme_ns);
    4622           0 :                         return NULL;
    4623             :                 }
    4624           0 :                 spdk_bdev_reset_io_stat(nvme_ns->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
    4625             :         }
    4626             : 
    4627          51 :         return nvme_ns;
    4628             : }
    4629             : 
    4630             : static void
    4631          51 : nvme_ns_free(struct nvme_ns *nvme_ns)
    4632             : {
    4633          51 :         free(nvme_ns->stat);
    4634          51 :         free(nvme_ns);
    4635          51 : }
    4636             : 
    4637             : static void
    4638          51 : nvme_ctrlr_populate_namespace_done(struct nvme_ns *nvme_ns, int rc)
    4639             : {
    4640          51 :         struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr;
    4641          51 :         struct nvme_async_probe_ctx *ctx = nvme_ns->probe_ctx;
    4642             : 
    4643          51 :         if (rc == 0) {
    4644          49 :                 nvme_ns->probe_ctx = NULL;
    4645          49 :                 pthread_mutex_lock(&nvme_ctrlr->mutex);
    4646          49 :                 nvme_ctrlr->ref++;
    4647          49 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4648             :         } else {
    4649           2 :                 RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
    4650           2 :                 nvme_ns_free(nvme_ns);
    4651             :         }
    4652             : 
    4653          51 :         if (ctx) {
    4654          50 :                 ctx->populates_in_progress--;
    4655          50 :                 if (ctx->populates_in_progress == 0) {
    4656          12 :                         nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx);
    4657             :                 }
    4658             :         }
    4659          51 : }
    4660             : 
    4661             : static void
    4662           2 : bdev_nvme_add_io_path(struct nvme_bdev_channel_iter *i,
    4663             :                       struct nvme_bdev *nbdev,
    4664             :                       struct nvme_bdev_channel *nbdev_ch, void *ctx)
    4665             : {
    4666           2 :         struct nvme_ns *nvme_ns = ctx;
    4667             :         int rc;
    4668             : 
    4669           2 :         rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns);
    4670           2 :         if (rc != 0) {
    4671           0 :                 SPDK_ERRLOG("Failed to add I/O path to bdev_channel dynamically.\n");
    4672             :         }
    4673             : 
    4674           2 :         nvme_bdev_for_each_channel_continue(i, rc);
    4675           2 : }
    4676             : 
    4677             : static void
    4678           2 : bdev_nvme_delete_io_path(struct nvme_bdev_channel_iter *i,
    4679             :                          struct nvme_bdev *nbdev,
    4680             :                          struct nvme_bdev_channel *nbdev_ch, void *ctx)
    4681             : {
    4682           2 :         struct nvme_ns *nvme_ns = ctx;
    4683             :         struct nvme_io_path *io_path;
    4684             : 
    4685           2 :         io_path = _bdev_nvme_get_io_path(nbdev_ch, nvme_ns);
    4686           2 :         if (io_path != NULL) {
    4687           2 :                 _bdev_nvme_delete_io_path(nbdev_ch, io_path);
    4688             :         }
    4689             : 
    4690           2 :         nvme_bdev_for_each_channel_continue(i, 0);
    4691           2 : }
    4692             : 
    4693             : static void
    4694           0 : bdev_nvme_add_io_path_failed(struct nvme_bdev *nbdev, void *ctx, int status)
    4695             : {
    4696           0 :         struct nvme_ns *nvme_ns = ctx;
    4697             : 
    4698           0 :         nvme_ctrlr_populate_namespace_done(nvme_ns, -1);
    4699           0 : }
    4700             : 
    4701             : static void
    4702          12 : bdev_nvme_add_io_path_done(struct nvme_bdev *nbdev, void *ctx, int status)
    4703             : {
    4704          12 :         struct nvme_ns *nvme_ns = ctx;
    4705             : 
    4706          12 :         if (status == 0) {
    4707          12 :                 nvme_ctrlr_populate_namespace_done(nvme_ns, 0);
    4708             :         } else {
    4709             :                 /* Delete the added io_paths and fail populating the namespace. */
    4710           0 :                 nvme_bdev_for_each_channel(nbdev,
    4711             :                                            bdev_nvme_delete_io_path,
    4712             :                                            nvme_ns,
    4713             :                                            bdev_nvme_add_io_path_failed);
    4714             :         }
    4715          12 : }
    4716             : 
    4717             : static int
    4718          13 : nvme_bdev_add_ns(struct nvme_bdev *bdev, struct nvme_ns *nvme_ns)
    4719             : {
    4720             :         struct nvme_ns *tmp_ns;
    4721             :         const struct spdk_nvme_ns_data *nsdata;
    4722             : 
    4723          13 :         nsdata = spdk_nvme_ns_get_data(nvme_ns->ns);
    4724          13 :         if (!nsdata->nmic.can_share) {
    4725           0 :                 SPDK_ERRLOG("Namespace cannot be shared.\n");
    4726           0 :                 return -EINVAL;
    4727             :         }
    4728             : 
    4729          13 :         pthread_mutex_lock(&bdev->mutex);
    4730             : 
    4731          13 :         tmp_ns = TAILQ_FIRST(&bdev->nvme_ns_list);
    4732          13 :         assert(tmp_ns != NULL);
    4733             : 
    4734          13 :         if (tmp_ns->ns != NULL && !bdev_nvme_compare_ns(nvme_ns->ns, tmp_ns->ns)) {
    4735           1 :                 pthread_mutex_unlock(&bdev->mutex);
    4736           1 :                 SPDK_ERRLOG("Namespaces are not identical.\n");
    4737           1 :                 return -EINVAL;
    4738             :         }
    4739             : 
    4740          12 :         bdev->ref++;
    4741          12 :         TAILQ_INSERT_TAIL(&bdev->nvme_ns_list, nvme_ns, tailq);
    4742          12 :         nvme_ns->bdev = bdev;
    4743             : 
    4744          12 :         pthread_mutex_unlock(&bdev->mutex);
    4745             : 
    4746             :         /* Add nvme_io_path to nvme_bdev_channels dynamically. */
    4747          12 :         nvme_bdev_for_each_channel(bdev,
    4748             :                                    bdev_nvme_add_io_path,
    4749             :                                    nvme_ns,
    4750             :                                    bdev_nvme_add_io_path_done);
    4751             : 
    4752          12 :         return 0;
    4753             : }
    4754             : 
    4755             : static void
    4756          51 : nvme_ctrlr_populate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
    4757             : {
    4758             :         struct spdk_nvme_ns     *ns;
    4759             :         struct nvme_bdev        *bdev;
    4760          51 :         int                     rc = 0;
    4761             : 
    4762          51 :         ns = spdk_nvme_ctrlr_get_ns(nvme_ctrlr->ctrlr, nvme_ns->id);
    4763          51 :         if (!ns) {
    4764           0 :                 SPDK_DEBUGLOG(bdev_nvme, "Invalid NS %d\n", nvme_ns->id);
    4765           0 :                 rc = -EINVAL;
    4766           0 :                 goto done;
    4767             :         }
    4768             : 
    4769          51 :         nvme_ns->ns = ns;
    4770          51 :         nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
    4771             : 
    4772          51 :         if (nvme_ctrlr->ana_log_page != NULL) {
    4773          37 :                 bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ns_set_ana_state, nvme_ns);
    4774             :         }
    4775             : 
    4776          51 :         bdev = nvme_bdev_ctrlr_get_bdev(nvme_ctrlr->nbdev_ctrlr, nvme_ns->id);
    4777          51 :         if (bdev == NULL) {
    4778          38 :                 rc = nvme_bdev_create(nvme_ctrlr, nvme_ns);
    4779             :         } else {
    4780          13 :                 rc = nvme_bdev_add_ns(bdev, nvme_ns);
    4781          13 :                 if (rc == 0) {
    4782          12 :                         return;
    4783             :                 }
    4784             :         }
    4785           1 : done:
    4786          39 :         nvme_ctrlr_populate_namespace_done(nvme_ns, rc);
    4787             : }
    4788             : 
    4789             : static void
    4790          49 : nvme_ctrlr_depopulate_namespace_done(struct nvme_ns *nvme_ns)
    4791             : {
    4792          49 :         struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr;
    4793             : 
    4794          49 :         assert(nvme_ctrlr != NULL);
    4795             : 
    4796          49 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    4797             : 
    4798          49 :         RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
    4799             : 
    4800          49 :         if (nvme_ns->bdev != NULL) {
    4801           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4802           0 :                 return;
    4803             :         }
    4804             : 
    4805          49 :         nvme_ns_free(nvme_ns);
    4806          49 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4807             : 
    4808          49 :         nvme_ctrlr_release(nvme_ctrlr);
    4809             : }
    4810             : 
    4811             : static void
    4812          11 : bdev_nvme_delete_io_path_done(struct nvme_bdev *nbdev, void *ctx, int status)
    4813             : {
    4814          11 :         struct nvme_ns *nvme_ns = ctx;
    4815             : 
    4816          11 :         nvme_ctrlr_depopulate_namespace_done(nvme_ns);
    4817          11 : }
    4818             : 
    4819             : static void
    4820          49 : nvme_ctrlr_depopulate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
    4821             : {
    4822             :         struct nvme_bdev *bdev;
    4823             : 
    4824          49 :         spdk_poller_unregister(&nvme_ns->anatt_timer);
    4825             : 
    4826          49 :         bdev = nvme_ns->bdev;
    4827          49 :         if (bdev != NULL) {
    4828          45 :                 pthread_mutex_lock(&bdev->mutex);
    4829             : 
    4830          45 :                 assert(bdev->ref > 0);
    4831          45 :                 bdev->ref--;
    4832          45 :                 if (bdev->ref == 0) {
    4833          34 :                         pthread_mutex_unlock(&bdev->mutex);
    4834             : 
    4835          34 :                         spdk_bdev_unregister(&bdev->disk, NULL, NULL);
    4836             :                 } else {
    4837             :                         /* spdk_bdev_unregister() is not called until the last nvme_ns is
    4838             :                          * depopulated. Hence we need to remove nvme_ns from bdev->nvme_ns_list
    4839             :                          * and clear nvme_ns->bdev here.
    4840             :                          */
    4841          11 :                         TAILQ_REMOVE(&bdev->nvme_ns_list, nvme_ns, tailq);
    4842          11 :                         nvme_ns->bdev = NULL;
    4843             : 
    4844          11 :                         pthread_mutex_unlock(&bdev->mutex);
    4845             : 
    4846             :                         /* Delete nvme_io_paths from nvme_bdev_channels dynamically. After that,
    4847             :                          * we call depopulate_namespace_done() to avoid use-after-free.
    4848             :                          */
    4849          11 :                         nvme_bdev_for_each_channel(bdev,
    4850             :                                                    bdev_nvme_delete_io_path,
    4851             :                                                    nvme_ns,
    4852             :                                                    bdev_nvme_delete_io_path_done);
    4853          11 :                         return;
    4854             :                 }
    4855             :         }
    4856             : 
    4857          38 :         nvme_ctrlr_depopulate_namespace_done(nvme_ns);
    4858             : }
    4859             : 
    4860             : static void
    4861          62 : nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr,
    4862             :                                struct nvme_async_probe_ctx *ctx)
    4863             : {
    4864          62 :         struct spdk_nvme_ctrlr  *ctrlr = nvme_ctrlr->ctrlr;
    4865             :         struct nvme_ns  *nvme_ns, *next;
    4866             :         struct spdk_nvme_ns     *ns;
    4867             :         struct nvme_bdev        *bdev;
    4868             :         uint32_t                nsid;
    4869             :         int                     rc;
    4870             :         uint64_t                num_sectors;
    4871             : 
    4872          62 :         if (ctx) {
    4873             :                 /* Initialize this count to 1 to handle the populate functions
    4874             :                  * calling nvme_ctrlr_populate_namespace_done() immediately.
    4875             :                  */
    4876          46 :                 ctx->populates_in_progress = 1;
    4877             :         }
    4878             : 
    4879             :         /* First loop over our existing namespaces and see if they have been
    4880             :          * removed. */
    4881          62 :         nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    4882          66 :         while (nvme_ns != NULL) {
    4883           4 :                 next = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
    4884             : 
    4885           4 :                 if (spdk_nvme_ctrlr_is_active_ns(ctrlr, nvme_ns->id)) {
    4886             :                         /* NS is still there or added again. Its attributes may have changed. */
    4887           3 :                         ns = spdk_nvme_ctrlr_get_ns(ctrlr, nvme_ns->id);
    4888           3 :                         if (nvme_ns->ns != ns) {
    4889           1 :                                 assert(nvme_ns->ns == NULL);
    4890           1 :                                 nvme_ns->ns = ns;
    4891           1 :                                 SPDK_DEBUGLOG(bdev_nvme, "NSID %u was added\n", nvme_ns->id);
    4892             :                         }
    4893             : 
    4894           3 :                         num_sectors = spdk_nvme_ns_get_num_sectors(ns);
    4895           3 :                         bdev = nvme_ns->bdev;
    4896           3 :                         assert(bdev != NULL);
    4897           3 :                         if (bdev->disk.blockcnt != num_sectors) {
    4898           1 :                                 SPDK_NOTICELOG("NSID %u is resized: bdev name %s, old size %" PRIu64 ", new size %" PRIu64 "\n",
    4899             :                                                nvme_ns->id,
    4900             :                                                bdev->disk.name,
    4901             :                                                bdev->disk.blockcnt,
    4902             :                                                num_sectors);
    4903           1 :                                 rc = spdk_bdev_notify_blockcnt_change(&bdev->disk, num_sectors);
    4904           1 :                                 if (rc != 0) {
    4905           0 :                                         SPDK_ERRLOG("Could not change num blocks for nvme bdev: name %s, errno: %d.\n",
    4906             :                                                     bdev->disk.name, rc);
    4907             :                                 }
    4908             :                         }
    4909             :                 } else {
    4910             :                         /* Namespace was removed */
    4911           1 :                         nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns);
    4912             :                 }
    4913             : 
    4914           4 :                 nvme_ns = next;
    4915             :         }
    4916             : 
    4917             :         /* Loop through all of the namespaces at the nvme level and see if any of them are new */
    4918          62 :         nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
    4919         116 :         while (nsid != 0) {
    4920          54 :                 nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid);
    4921             : 
    4922          54 :                 if (nvme_ns == NULL) {
    4923             :                         /* Found a new one */
    4924          51 :                         nvme_ns = nvme_ns_alloc();
    4925          51 :                         if (nvme_ns == NULL) {
    4926           0 :                                 SPDK_ERRLOG("Failed to allocate namespace\n");
    4927             :                                 /* This just fails to attach the namespace. It may work on a future attempt. */
    4928           0 :                                 continue;
    4929             :                         }
    4930             : 
    4931          51 :                         nvme_ns->id = nsid;
    4932          51 :                         nvme_ns->ctrlr = nvme_ctrlr;
    4933             : 
    4934          51 :                         nvme_ns->bdev = NULL;
    4935             : 
    4936          51 :                         if (ctx) {
    4937          50 :                                 ctx->populates_in_progress++;
    4938             :                         }
    4939          51 :                         nvme_ns->probe_ctx = ctx;
    4940             : 
    4941          51 :                         RB_INSERT(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
    4942             : 
    4943          51 :                         nvme_ctrlr_populate_namespace(nvme_ctrlr, nvme_ns);
    4944             :                 }
    4945             : 
    4946          54 :                 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid);
    4947             :         }
    4948             : 
    4949          62 :         if (ctx) {
    4950             :                 /* Decrement this count now that the loop is over to account
    4951             :                  * for the one we started with.  If the count is then 0, we
    4952             :                  * know any populate_namespace functions completed immediately,
    4953             :                  * so we'll kick the callback here.
    4954             :                  */
    4955          46 :                 ctx->populates_in_progress--;
    4956          46 :                 if (ctx->populates_in_progress == 0) {
    4957          34 :                         nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx);
    4958             :                 }
    4959             :         }
    4960             : 
    4961          62 : }
    4962             : 
    4963             : static void
    4964          61 : nvme_ctrlr_depopulate_namespaces(struct nvme_ctrlr *nvme_ctrlr)
    4965             : {
    4966             :         struct nvme_ns *nvme_ns, *tmp;
    4967             : 
    4968         109 :         RB_FOREACH_SAFE(nvme_ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp) {
    4969          48 :                 nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns);
    4970             :         }
    4971          61 : }
    4972             : 
    4973             : static uint32_t
    4974          36 : nvme_ctrlr_get_ana_log_page_size(struct nvme_ctrlr *nvme_ctrlr)
    4975             : {
    4976          36 :         struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
    4977             :         const struct spdk_nvme_ctrlr_data *cdata;
    4978          36 :         uint32_t nsid, ns_count = 0;
    4979             : 
    4980          36 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    4981             : 
    4982          36 :         for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
    4983          80 :              nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) {
    4984          44 :                 ns_count++;
    4985             :         }
    4986             : 
    4987          36 :         return sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid *
    4988          36 :                sizeof(struct spdk_nvme_ana_group_descriptor) + ns_count *
    4989             :                sizeof(uint32_t);
    4990             : }
    4991             : 
    4992             : static int
    4993           7 : nvme_ctrlr_set_ana_states(const struct spdk_nvme_ana_group_descriptor *desc,
    4994             :                           void *cb_arg)
    4995             : {
    4996           7 :         struct nvme_ctrlr *nvme_ctrlr = cb_arg;
    4997             :         struct nvme_ns *nvme_ns;
    4998             :         uint32_t i, nsid;
    4999             : 
    5000          13 :         for (i = 0; i < desc->num_of_nsid; i++) {
    5001           6 :                 nsid = desc->nsid[i];
    5002           6 :                 if (nsid == 0) {
    5003           0 :                         continue;
    5004             :                 }
    5005             : 
    5006           6 :                 nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid);
    5007             : 
    5008           6 :                 if (nvme_ns == NULL) {
    5009             :                         /* Target told us that an inactive namespace had an ANA change */
    5010           1 :                         continue;
    5011             :                 }
    5012             : 
    5013           5 :                 _nvme_ns_set_ana_state(nvme_ns, desc);
    5014             :         }
    5015             : 
    5016           7 :         return 0;
    5017             : }
    5018             : 
    5019             : static void
    5020           0 : bdev_nvme_disable_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
    5021             : {
    5022             :         struct nvme_ns *nvme_ns;
    5023             : 
    5024           0 :         spdk_free(nvme_ctrlr->ana_log_page);
    5025           0 :         nvme_ctrlr->ana_log_page = NULL;
    5026             : 
    5027           0 :         for (nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    5028           0 :              nvme_ns != NULL;
    5029           0 :              nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns)) {
    5030           0 :                 nvme_ns->ana_state_updating = false;
    5031           0 :                 nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
    5032             :         }
    5033           0 : }
    5034             : 
    5035             : static void
    5036           3 : nvme_ctrlr_read_ana_log_page_done(void *ctx, const struct spdk_nvme_cpl *cpl)
    5037             : {
    5038           3 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    5039             : 
    5040           3 :         if (cpl != NULL && spdk_nvme_cpl_is_success(cpl)) {
    5041           3 :                 bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ctrlr_set_ana_states,
    5042             :                                              nvme_ctrlr);
    5043             :         } else {
    5044           0 :                 bdev_nvme_disable_read_ana_log_page(nvme_ctrlr);
    5045             :         }
    5046             : 
    5047           3 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    5048             : 
    5049           3 :         assert(nvme_ctrlr->ana_log_page_updating == true);
    5050           3 :         nvme_ctrlr->ana_log_page_updating = false;
    5051             : 
    5052           3 :         if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
    5053           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    5054             : 
    5055           0 :                 nvme_ctrlr_unregister(nvme_ctrlr);
    5056             :         } else {
    5057           3 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    5058             : 
    5059           3 :                 bdev_nvme_clear_io_path_caches(nvme_ctrlr);
    5060             :         }
    5061           3 : }
    5062             : 
    5063             : static int
    5064           6 : nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
    5065             : {
    5066             :         uint32_t ana_log_page_size;
    5067             :         int rc;
    5068             : 
    5069           6 :         if (nvme_ctrlr->ana_log_page == NULL) {
    5070           0 :                 return -EINVAL;
    5071             :         }
    5072             : 
    5073           6 :         ana_log_page_size = nvme_ctrlr_get_ana_log_page_size(nvme_ctrlr);
    5074             : 
    5075           6 :         if (ana_log_page_size > nvme_ctrlr->max_ana_log_page_size) {
    5076           0 :                 SPDK_ERRLOG("ANA log page size %" PRIu32 " is larger than allowed %" PRIu32 "\n",
    5077             :                             ana_log_page_size, nvme_ctrlr->max_ana_log_page_size);
    5078           0 :                 return -EINVAL;
    5079             :         }
    5080             : 
    5081           6 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    5082           6 :         if (!nvme_ctrlr_is_available(nvme_ctrlr) ||
    5083             :             nvme_ctrlr->ana_log_page_updating) {
    5084           3 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    5085           3 :                 return -EBUSY;
    5086             :         }
    5087             : 
    5088           3 :         nvme_ctrlr->ana_log_page_updating = true;
    5089           3 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    5090             : 
    5091           3 :         rc = spdk_nvme_ctrlr_cmd_get_log_page(nvme_ctrlr->ctrlr,
    5092             :                                               SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
    5093             :                                               SPDK_NVME_GLOBAL_NS_TAG,
    5094           3 :                                               nvme_ctrlr->ana_log_page,
    5095             :                                               ana_log_page_size, 0,
    5096             :                                               nvme_ctrlr_read_ana_log_page_done,
    5097             :                                               nvme_ctrlr);
    5098           3 :         if (rc != 0) {
    5099           0 :                 nvme_ctrlr_read_ana_log_page_done(nvme_ctrlr, NULL);
    5100             :         }
    5101             : 
    5102           3 :         return rc;
    5103             : }
    5104             : 
    5105             : static void
    5106           0 : dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
    5107             : {
    5108           0 : }
    5109             : 
    5110             : struct bdev_nvme_set_preferred_path_ctx {
    5111             :         struct spdk_bdev_desc *desc;
    5112             :         struct nvme_ns *nvme_ns;
    5113             :         bdev_nvme_set_preferred_path_cb cb_fn;
    5114             :         void *cb_arg;
    5115             : };
    5116             : 
    5117             : static void
    5118           3 : bdev_nvme_set_preferred_path_done(struct nvme_bdev *nbdev, void *_ctx, int status)
    5119             : {
    5120           3 :         struct bdev_nvme_set_preferred_path_ctx *ctx = _ctx;
    5121             : 
    5122           3 :         assert(ctx != NULL);
    5123           3 :         assert(ctx->desc != NULL);
    5124           3 :         assert(ctx->cb_fn != NULL);
    5125             : 
    5126           3 :         spdk_bdev_close(ctx->desc);
    5127             : 
    5128           3 :         ctx->cb_fn(ctx->cb_arg, status);
    5129             : 
    5130           3 :         free(ctx);
    5131           3 : }
    5132             : 
    5133             : static void
    5134           2 : _bdev_nvme_set_preferred_path(struct nvme_bdev_channel_iter *i,
    5135             :                               struct nvme_bdev *nbdev,
    5136             :                               struct nvme_bdev_channel *nbdev_ch, void *_ctx)
    5137             : {
    5138           2 :         struct bdev_nvme_set_preferred_path_ctx *ctx = _ctx;
    5139             :         struct nvme_io_path *io_path, *prev;
    5140             : 
    5141           2 :         prev = NULL;
    5142           3 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    5143           3 :                 if (io_path->nvme_ns == ctx->nvme_ns) {
    5144           2 :                         break;
    5145             :                 }
    5146           1 :                 prev = io_path;
    5147             :         }
    5148             : 
    5149           2 :         if (io_path != NULL) {
    5150           2 :                 if (prev != NULL) {
    5151           1 :                         STAILQ_REMOVE_AFTER(&nbdev_ch->io_path_list, prev, stailq);
    5152           1 :                         STAILQ_INSERT_HEAD(&nbdev_ch->io_path_list, io_path, stailq);
    5153             :                 }
    5154             : 
    5155             :                 /* We can set io_path to nbdev_ch->current_io_path directly here.
    5156             :                  * However, it needs to be conditional. To simplify the code,
    5157             :                  * just clear nbdev_ch->current_io_path and let find_io_path()
    5158             :                  * fill it.
    5159             :                  *
    5160             :                  * Automatic failback may be disabled. Hence even if the io_path is
    5161             :                  * already at the head, clear nbdev_ch->current_io_path.
    5162             :                  */
    5163           2 :                 bdev_nvme_clear_current_io_path(nbdev_ch);
    5164             :         }
    5165             : 
    5166           2 :         nvme_bdev_for_each_channel_continue(i, 0);
    5167           2 : }
    5168             : 
    5169             : static struct nvme_ns *
    5170           3 : bdev_nvme_set_preferred_ns(struct nvme_bdev *nbdev, uint16_t cntlid)
    5171             : {
    5172             :         struct nvme_ns *nvme_ns, *prev;
    5173             :         const struct spdk_nvme_ctrlr_data *cdata;
    5174             : 
    5175           3 :         prev = NULL;
    5176           6 :         TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
    5177           6 :                 cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
    5178             : 
    5179           6 :                 if (cdata->cntlid == cntlid) {
    5180           3 :                         break;
    5181             :                 }
    5182           3 :                 prev = nvme_ns;
    5183             :         }
    5184             : 
    5185           3 :         if (nvme_ns != NULL && prev != NULL) {
    5186           2 :                 TAILQ_REMOVE(&nbdev->nvme_ns_list, nvme_ns, tailq);
    5187           2 :                 TAILQ_INSERT_HEAD(&nbdev->nvme_ns_list, nvme_ns, tailq);
    5188             :         }
    5189             : 
    5190           3 :         return nvme_ns;
    5191             : }
    5192             : 
    5193             : /* This function supports only multipath mode. There is only a single I/O path
    5194             :  * for each NVMe-oF controller. Hence, just move the matched I/O path to the
    5195             :  * head of the I/O path list for each NVMe bdev channel.
    5196             :  *
    5197             :  * NVMe bdev channel may be acquired after completing this function. move the
    5198             :  * matched namespace to the head of the namespace list for the NVMe bdev too.
    5199             :  */
    5200             : void
    5201           3 : bdev_nvme_set_preferred_path(const char *name, uint16_t cntlid,
    5202             :                              bdev_nvme_set_preferred_path_cb cb_fn, void *cb_arg)
    5203             : {
    5204             :         struct bdev_nvme_set_preferred_path_ctx *ctx;
    5205             :         struct spdk_bdev *bdev;
    5206             :         struct nvme_bdev *nbdev;
    5207           3 :         int rc = 0;
    5208             : 
    5209           3 :         assert(cb_fn != NULL);
    5210             : 
    5211           3 :         ctx = calloc(1, sizeof(*ctx));
    5212           3 :         if (ctx == NULL) {
    5213           0 :                 SPDK_ERRLOG("Failed to alloc context.\n");
    5214           0 :                 rc = -ENOMEM;
    5215           0 :                 goto err_alloc;
    5216             :         }
    5217             : 
    5218           3 :         ctx->cb_fn = cb_fn;
    5219           3 :         ctx->cb_arg = cb_arg;
    5220             : 
    5221           3 :         rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &ctx->desc);
    5222           3 :         if (rc != 0) {
    5223           0 :                 SPDK_ERRLOG("Failed to open bdev %s.\n", name);
    5224           0 :                 goto err_open;
    5225             :         }
    5226             : 
    5227           3 :         bdev = spdk_bdev_desc_get_bdev(ctx->desc);
    5228             : 
    5229           3 :         if (bdev->module != &nvme_if) {
    5230           0 :                 SPDK_ERRLOG("bdev %s is not registered in this module.\n", name);
    5231           0 :                 rc = -ENODEV;
    5232           0 :                 goto err_bdev;
    5233             :         }
    5234             : 
    5235           3 :         nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
    5236             : 
    5237           3 :         pthread_mutex_lock(&nbdev->mutex);
    5238             : 
    5239           3 :         ctx->nvme_ns = bdev_nvme_set_preferred_ns(nbdev, cntlid);
    5240           3 :         if (ctx->nvme_ns == NULL) {
    5241           0 :                 pthread_mutex_unlock(&nbdev->mutex);
    5242             : 
    5243           0 :                 SPDK_ERRLOG("bdev %s does not have namespace to controller %u.\n", name, cntlid);
    5244           0 :                 rc = -ENODEV;
    5245           0 :                 goto err_bdev;
    5246             :         }
    5247             : 
    5248           3 :         pthread_mutex_unlock(&nbdev->mutex);
    5249             : 
    5250           3 :         nvme_bdev_for_each_channel(nbdev,
    5251             :                                    _bdev_nvme_set_preferred_path,
    5252             :                                    ctx,
    5253             :                                    bdev_nvme_set_preferred_path_done);
    5254           3 :         return;
    5255             : 
    5256           0 : err_bdev:
    5257           0 :         spdk_bdev_close(ctx->desc);
    5258           0 : err_open:
    5259           0 :         free(ctx);
    5260           0 : err_alloc:
    5261           0 :         cb_fn(cb_arg, rc);
    5262             : }
    5263             : 
    5264             : struct bdev_nvme_set_multipath_policy_ctx {
    5265             :         struct spdk_bdev_desc *desc;
    5266             :         spdk_bdev_nvme_set_multipath_policy_cb cb_fn;
    5267             :         void *cb_arg;
    5268             : };
    5269             : 
    5270             : static void
    5271           3 : bdev_nvme_set_multipath_policy_done(struct nvme_bdev *nbdev, void *_ctx, int status)
    5272             : {
    5273           3 :         struct bdev_nvme_set_multipath_policy_ctx *ctx = _ctx;
    5274             : 
    5275           3 :         assert(ctx != NULL);
    5276           3 :         assert(ctx->desc != NULL);
    5277           3 :         assert(ctx->cb_fn != NULL);
    5278             : 
    5279           3 :         spdk_bdev_close(ctx->desc);
    5280             : 
    5281           3 :         ctx->cb_fn(ctx->cb_arg, status);
    5282             : 
    5283           3 :         free(ctx);
    5284           3 : }
    5285             : 
    5286             : static void
    5287           1 : _bdev_nvme_set_multipath_policy(struct nvme_bdev_channel_iter *i,
    5288             :                                 struct nvme_bdev *nbdev,
    5289             :                                 struct nvme_bdev_channel *nbdev_ch, void *ctx)
    5290             : {
    5291           1 :         nbdev_ch->mp_policy = nbdev->mp_policy;
    5292           1 :         nbdev_ch->mp_selector = nbdev->mp_selector;
    5293           1 :         nbdev_ch->rr_min_io = nbdev->rr_min_io;
    5294           1 :         bdev_nvme_clear_current_io_path(nbdev_ch);
    5295             : 
    5296           1 :         nvme_bdev_for_each_channel_continue(i, 0);
    5297           1 : }
    5298             : 
    5299             : void
    5300           3 : spdk_bdev_nvme_set_multipath_policy(const char *name, enum spdk_bdev_nvme_multipath_policy policy,
    5301             :                                     enum spdk_bdev_nvme_multipath_selector selector, uint32_t rr_min_io,
    5302             :                                     spdk_bdev_nvme_set_multipath_policy_cb cb_fn, void *cb_arg)
    5303             : {
    5304             :         struct bdev_nvme_set_multipath_policy_ctx *ctx;
    5305             :         struct spdk_bdev *bdev;
    5306             :         struct nvme_bdev *nbdev;
    5307             :         int rc;
    5308             : 
    5309           3 :         assert(cb_fn != NULL);
    5310             : 
    5311           3 :         switch (policy) {
    5312           1 :         case BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE:
    5313           1 :                 break;
    5314           2 :         case BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE:
    5315             :                 switch (selector) {
    5316           1 :                 case BDEV_NVME_MP_SELECTOR_ROUND_ROBIN:
    5317           1 :                         if (rr_min_io == UINT32_MAX) {
    5318           0 :                                 rr_min_io = 1;
    5319           1 :                         } else if (rr_min_io == 0) {
    5320           0 :                                 rc = -EINVAL;
    5321           0 :                                 goto exit;
    5322             :                         }
    5323           1 :                         break;
    5324           1 :                 case BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH:
    5325           1 :                         break;
    5326           0 :                 default:
    5327           0 :                         rc = -EINVAL;
    5328           0 :                         goto exit;
    5329             :                 }
    5330           2 :                 break;
    5331           0 :         default:
    5332           0 :                 rc = -EINVAL;
    5333           0 :                 goto exit;
    5334             :         }
    5335             : 
    5336           3 :         ctx = calloc(1, sizeof(*ctx));
    5337           3 :         if (ctx == NULL) {
    5338           0 :                 SPDK_ERRLOG("Failed to alloc context.\n");
    5339           0 :                 rc = -ENOMEM;
    5340           0 :                 goto exit;
    5341             :         }
    5342             : 
    5343           3 :         ctx->cb_fn = cb_fn;
    5344           3 :         ctx->cb_arg = cb_arg;
    5345             : 
    5346           3 :         rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &ctx->desc);
    5347           3 :         if (rc != 0) {
    5348           0 :                 SPDK_ERRLOG("Failed to open bdev %s.\n", name);
    5349           0 :                 rc = -ENODEV;
    5350           0 :                 goto err_open;
    5351             :         }
    5352             : 
    5353           3 :         bdev = spdk_bdev_desc_get_bdev(ctx->desc);
    5354           3 :         if (bdev->module != &nvme_if) {
    5355           0 :                 SPDK_ERRLOG("bdev %s is not registered in this module.\n", name);
    5356           0 :                 rc = -ENODEV;
    5357           0 :                 goto err_module;
    5358             :         }
    5359           3 :         nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
    5360             : 
    5361           3 :         pthread_mutex_lock(&nbdev->mutex);
    5362           3 :         nbdev->mp_policy = policy;
    5363           3 :         nbdev->mp_selector = selector;
    5364           3 :         nbdev->rr_min_io = rr_min_io;
    5365           3 :         pthread_mutex_unlock(&nbdev->mutex);
    5366             : 
    5367           3 :         nvme_bdev_for_each_channel(nbdev,
    5368             :                                    _bdev_nvme_set_multipath_policy,
    5369             :                                    ctx,
    5370             :                                    bdev_nvme_set_multipath_policy_done);
    5371           3 :         return;
    5372             : 
    5373           0 : err_module:
    5374           0 :         spdk_bdev_close(ctx->desc);
    5375           0 : err_open:
    5376           0 :         free(ctx);
    5377           0 : exit:
    5378           0 :         cb_fn(cb_arg, rc);
    5379             : }
    5380             : 
    5381             : static void
    5382           3 : aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
    5383             : {
    5384           3 :         struct nvme_ctrlr *nvme_ctrlr           = arg;
    5385             :         union spdk_nvme_async_event_completion  event;
    5386             : 
    5387           3 :         if (spdk_nvme_cpl_is_error(cpl)) {
    5388           0 :                 SPDK_WARNLOG("AER request execute failed\n");
    5389           0 :                 return;
    5390             :         }
    5391             : 
    5392           3 :         event.raw = cpl->cdw0;
    5393           3 :         if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
    5394           3 :             (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) {
    5395           2 :                 nvme_ctrlr_populate_namespaces(nvme_ctrlr, NULL);
    5396           1 :         } else if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
    5397           1 :                    (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_ANA_CHANGE)) {
    5398           1 :                 nvme_ctrlr_read_ana_log_page(nvme_ctrlr);
    5399             :         }
    5400             : }
    5401             : 
    5402             : static void
    5403          52 : free_nvme_async_probe_ctx(struct nvme_async_probe_ctx *ctx)
    5404             : {
    5405          52 :         spdk_keyring_put_key(ctx->drv_opts.tls_psk);
    5406          52 :         spdk_keyring_put_key(ctx->drv_opts.dhchap_key);
    5407          52 :         spdk_keyring_put_key(ctx->drv_opts.dhchap_ctrlr_key);
    5408          52 :         free(ctx);
    5409          52 : }
    5410             : 
    5411             : static void
    5412          52 : populate_namespaces_cb(struct nvme_async_probe_ctx *ctx, int rc)
    5413             : {
    5414          52 :         if (ctx->cb_fn) {
    5415          52 :                 ctx->cb_fn(ctx->cb_ctx, ctx->reported_bdevs, rc);
    5416             :         }
    5417             : 
    5418          52 :         ctx->namespaces_populated = true;
    5419          52 :         if (ctx->probe_done) {
    5420             :                 /* The probe was already completed, so we need to free the context
    5421             :                  * here.  This can happen for cases like OCSSD, where we need to
    5422             :                  * send additional commands to the SSD after attach.
    5423             :                  */
    5424          31 :                 free_nvme_async_probe_ctx(ctx);
    5425             :         }
    5426          52 : }
    5427             : 
    5428             : static int
    5429          19 : bdev_nvme_remove_poller(void *ctx)
    5430             : {
    5431          19 :         struct spdk_nvme_transport_id trid_pcie;
    5432             : 
    5433          19 :         if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
    5434           1 :                 spdk_poller_unregister(&g_hotplug_poller);
    5435           1 :                 return SPDK_POLLER_IDLE;
    5436             :         }
    5437             : 
    5438          18 :         memset(&trid_pcie, 0, sizeof(trid_pcie));
    5439          18 :         spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
    5440             : 
    5441          18 :         if (spdk_nvme_scan_attached(&trid_pcie)) {
    5442           0 :                 SPDK_ERRLOG_RATELIMIT("spdk_nvme_scan_attached() failed\n");
    5443             :         }
    5444             : 
    5445          18 :         return SPDK_POLLER_BUSY;
    5446             : }
    5447             : 
    5448             : static void
    5449          60 : nvme_ctrlr_create_done(struct nvme_ctrlr *nvme_ctrlr,
    5450             :                        struct nvme_async_probe_ctx *ctx)
    5451             : {
    5452          60 :         spdk_io_device_register(nvme_ctrlr,
    5453             :                                 bdev_nvme_create_ctrlr_channel_cb,
    5454             :                                 bdev_nvme_destroy_ctrlr_channel_cb,
    5455             :                                 sizeof(struct nvme_ctrlr_channel),
    5456          60 :                                 nvme_ctrlr->nbdev_ctrlr->name);
    5457             : 
    5458          60 :         nvme_ctrlr_populate_namespaces(nvme_ctrlr, ctx);
    5459             : 
    5460          60 :         if (g_hotplug_poller == NULL) {
    5461           2 :                 g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_remove_poller, NULL,
    5462             :                                                         NVME_HOTPLUG_POLL_PERIOD_DEFAULT);
    5463             :         }
    5464          60 : }
    5465             : 
    5466             : static void
    5467          30 : nvme_ctrlr_init_ana_log_page_done(void *_ctx, const struct spdk_nvme_cpl *cpl)
    5468             : {
    5469          30 :         struct nvme_ctrlr *nvme_ctrlr = _ctx;
    5470          30 :         struct nvme_async_probe_ctx *ctx = nvme_ctrlr->probe_ctx;
    5471             : 
    5472          30 :         nvme_ctrlr->probe_ctx = NULL;
    5473             : 
    5474          30 :         if (spdk_nvme_cpl_is_error(cpl)) {
    5475           0 :                 nvme_ctrlr_delete(nvme_ctrlr);
    5476             : 
    5477           0 :                 if (ctx != NULL) {
    5478           0 :                         ctx->reported_bdevs = 0;
    5479           0 :                         populate_namespaces_cb(ctx, -1);
    5480             :                 }
    5481           0 :                 return;
    5482             :         }
    5483             : 
    5484          30 :         nvme_ctrlr_create_done(nvme_ctrlr, ctx);
    5485             : }
    5486             : 
    5487             : static int
    5488          30 : nvme_ctrlr_init_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
    5489             :                              struct nvme_async_probe_ctx *ctx)
    5490             : {
    5491          30 :         struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
    5492             :         const struct spdk_nvme_ctrlr_data *cdata;
    5493             :         uint32_t ana_log_page_size;
    5494             : 
    5495          30 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    5496             : 
    5497             :         /* Set buffer size enough to include maximum number of allowed namespaces. */
    5498          30 :         ana_log_page_size = sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid *
    5499          30 :                             sizeof(struct spdk_nvme_ana_group_descriptor) + cdata->mnan *
    5500             :                             sizeof(uint32_t);
    5501             : 
    5502          30 :         nvme_ctrlr->ana_log_page = spdk_zmalloc(ana_log_page_size, 64, NULL,
    5503             :                                                 SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    5504          30 :         if (nvme_ctrlr->ana_log_page == NULL) {
    5505           0 :                 SPDK_ERRLOG("could not allocate ANA log page buffer\n");
    5506           0 :                 return -ENXIO;
    5507             :         }
    5508             : 
    5509             :         /* Each descriptor in a ANA log page is not ensured to be 8-bytes aligned.
    5510             :          * Hence copy each descriptor to a temporary area when parsing it.
    5511             :          *
    5512             :          * Allocate a buffer whose size is as large as ANA log page buffer because
    5513             :          * we do not know the size of a descriptor until actually reading it.
    5514             :          */
    5515          30 :         nvme_ctrlr->copied_ana_desc = calloc(1, ana_log_page_size);
    5516          30 :         if (nvme_ctrlr->copied_ana_desc == NULL) {
    5517           0 :                 SPDK_ERRLOG("could not allocate a buffer to parse ANA descriptor\n");
    5518           0 :                 return -ENOMEM;
    5519             :         }
    5520             : 
    5521          30 :         nvme_ctrlr->max_ana_log_page_size = ana_log_page_size;
    5522             : 
    5523          30 :         nvme_ctrlr->probe_ctx = ctx;
    5524             : 
    5525             :         /* Then, set the read size only to include the current active namespaces. */
    5526          30 :         ana_log_page_size = nvme_ctrlr_get_ana_log_page_size(nvme_ctrlr);
    5527             : 
    5528          30 :         if (ana_log_page_size > nvme_ctrlr->max_ana_log_page_size) {
    5529           0 :                 SPDK_ERRLOG("ANA log page size %" PRIu32 " is larger than allowed %" PRIu32 "\n",
    5530             :                             ana_log_page_size, nvme_ctrlr->max_ana_log_page_size);
    5531           0 :                 return -EINVAL;
    5532             :         }
    5533             : 
    5534          30 :         return spdk_nvme_ctrlr_cmd_get_log_page(ctrlr,
    5535             :                                                 SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
    5536             :                                                 SPDK_NVME_GLOBAL_NS_TAG,
    5537          30 :                                                 nvme_ctrlr->ana_log_page,
    5538             :                                                 ana_log_page_size, 0,
    5539             :                                                 nvme_ctrlr_init_ana_log_page_done,
    5540             :                                                 nvme_ctrlr);
    5541             : }
    5542             : 
    5543             : /* hostnqn and subnqn were already verified before attaching a controller.
    5544             :  * Hence check only the multipath capability and cntlid here.
    5545             :  */
    5546             : static bool
    5547          16 : bdev_nvme_check_multipath(struct nvme_bdev_ctrlr *nbdev_ctrlr, struct spdk_nvme_ctrlr *ctrlr)
    5548             : {
    5549             :         struct nvme_ctrlr *tmp;
    5550             :         const struct spdk_nvme_ctrlr_data *cdata, *tmp_cdata;
    5551             : 
    5552          16 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    5553             : 
    5554          16 :         if (!cdata->cmic.multi_ctrlr) {
    5555           0 :                 SPDK_ERRLOG("Ctrlr%u does not support multipath.\n", cdata->cntlid);
    5556           0 :                 return false;
    5557             :         }
    5558             : 
    5559          33 :         TAILQ_FOREACH(tmp, &nbdev_ctrlr->ctrlrs, tailq) {
    5560          18 :                 tmp_cdata = spdk_nvme_ctrlr_get_data(tmp->ctrlr);
    5561             : 
    5562          18 :                 if (!tmp_cdata->cmic.multi_ctrlr) {
    5563           0 :                         SPDK_ERRLOG("Ctrlr%u does not support multipath.\n", cdata->cntlid);
    5564           0 :                         return false;
    5565             :                 }
    5566          18 :                 if (cdata->cntlid == tmp_cdata->cntlid) {
    5567           1 :                         SPDK_ERRLOG("cntlid %u are duplicated.\n", tmp_cdata->cntlid);
    5568           1 :                         return false;
    5569             :                 }
    5570             :         }
    5571             : 
    5572          15 :         return true;
    5573             : }
    5574             : 
    5575           1 : SPDK_LOG_DEPRECATION_REGISTER(multipath_config,
    5576             :                               "bdev_nvme_attach_controller.multipath configuration mismatch", "v25.01", 0);
    5577             : 
    5578             : static int
    5579          61 : nvme_bdev_ctrlr_create(const char *name, struct nvme_ctrlr *nvme_ctrlr)
    5580             : {
    5581             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
    5582          61 :         struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
    5583             :         struct nvme_ctrlr      *nctrlr;
    5584          61 :         int rc = 0;
    5585             : 
    5586          61 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    5587             : 
    5588          61 :         nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
    5589          61 :         if (nbdev_ctrlr != NULL) {
    5590          16 :                 if (!bdev_nvme_check_multipath(nbdev_ctrlr, ctrlr)) {
    5591           1 :                         rc = -EINVAL;
    5592           1 :                         goto exit;
    5593             :                 }
    5594          32 :                 TAILQ_FOREACH(nctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
    5595          17 :                         if (nctrlr->opts.multipath != nvme_ctrlr->opts.multipath) {
    5596             :                                 /* All controllers created with the same name must be configured either
    5597             :                                  * for multipath or for failover. Otherwise we have configuration mismatch.
    5598             :                                  * While this is currently still supported, support for configuration where some
    5599             :                                  * controllers with the same name are configured for multipath, while others
    5600             :                                  * are configured for failover will be removed in release 25.01.
    5601             :                                  * Default mode change: starting from SPDK 25.01, if the user will not provide
    5602             :                                  * '-x <mode>' parameter in the bdev_nvme_attach_controller RPC call, default
    5603             :                                  * mode assigned to the controller will be 'multipath'
    5604             :                                  */
    5605           0 :                                 SPDK_LOG_DEPRECATED(multipath_config);
    5606           0 :                                 break;
    5607             :                         }
    5608             :                 }
    5609             :         } else {
    5610          45 :                 nbdev_ctrlr = calloc(1, sizeof(*nbdev_ctrlr));
    5611          45 :                 if (nbdev_ctrlr == NULL) {
    5612           0 :                         SPDK_ERRLOG("Failed to allocate nvme_bdev_ctrlr.\n");
    5613           0 :                         rc = -ENOMEM;
    5614           0 :                         goto exit;
    5615             :                 }
    5616          45 :                 nbdev_ctrlr->name = strdup(name);
    5617          45 :                 if (nbdev_ctrlr->name == NULL) {
    5618           0 :                         SPDK_ERRLOG("Failed to allocate name of nvme_bdev_ctrlr.\n");
    5619           0 :                         free(nbdev_ctrlr);
    5620           0 :                         goto exit;
    5621             :                 }
    5622          45 :                 TAILQ_INIT(&nbdev_ctrlr->ctrlrs);
    5623          45 :                 TAILQ_INIT(&nbdev_ctrlr->bdevs);
    5624          45 :                 TAILQ_INSERT_TAIL(&g_nvme_bdev_ctrlrs, nbdev_ctrlr, tailq);
    5625             :         }
    5626          60 :         nvme_ctrlr->nbdev_ctrlr = nbdev_ctrlr;
    5627          60 :         TAILQ_INSERT_TAIL(&nbdev_ctrlr->ctrlrs, nvme_ctrlr, tailq);
    5628          61 : exit:
    5629          61 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    5630          61 :         return rc;
    5631             : }
    5632             : 
    5633             : static int
    5634          61 : nvme_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr,
    5635             :                   const char *name,
    5636             :                   const struct spdk_nvme_transport_id *trid,
    5637             :                   struct nvme_async_probe_ctx *ctx)
    5638             : {
    5639             :         struct nvme_ctrlr *nvme_ctrlr;
    5640             :         struct nvme_path_id *path_id;
    5641             :         const struct spdk_nvme_ctrlr_data *cdata;
    5642             :         int rc;
    5643             : 
    5644          61 :         nvme_ctrlr = calloc(1, sizeof(*nvme_ctrlr));
    5645          61 :         if (nvme_ctrlr == NULL) {
    5646           0 :                 SPDK_ERRLOG("Failed to allocate device struct\n");
    5647           0 :                 return -ENOMEM;
    5648             :         }
    5649             : 
    5650          61 :         rc = pthread_mutex_init(&nvme_ctrlr->mutex, NULL);
    5651          61 :         if (rc != 0) {
    5652           0 :                 free(nvme_ctrlr);
    5653           0 :                 return rc;
    5654             :         }
    5655             : 
    5656          61 :         TAILQ_INIT(&nvme_ctrlr->trids);
    5657          61 :         RB_INIT(&nvme_ctrlr->namespaces);
    5658             : 
    5659             :         /* Get another reference to the key, so the first one can be released from probe_ctx */
    5660          61 :         if (ctx != NULL) {
    5661          47 :                 if (ctx->drv_opts.tls_psk != NULL) {
    5662           0 :                         nvme_ctrlr->psk = spdk_keyring_get_key(
    5663             :                                                   spdk_key_get_name(ctx->drv_opts.tls_psk));
    5664           0 :                         if (nvme_ctrlr->psk == NULL) {
    5665             :                                 /* Could only happen if the key was removed in the meantime */
    5666           0 :                                 SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
    5667             :                                             spdk_key_get_name(ctx->drv_opts.tls_psk));
    5668           0 :                                 rc = -ENOKEY;
    5669           0 :                                 goto err;
    5670             :                         }
    5671             :                 }
    5672             : 
    5673          47 :                 if (ctx->drv_opts.dhchap_key != NULL) {
    5674           0 :                         nvme_ctrlr->dhchap_key = spdk_keyring_get_key(
    5675             :                                                          spdk_key_get_name(ctx->drv_opts.dhchap_key));
    5676           0 :                         if (nvme_ctrlr->dhchap_key == NULL) {
    5677           0 :                                 SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
    5678             :                                             spdk_key_get_name(ctx->drv_opts.dhchap_key));
    5679           0 :                                 rc = -ENOKEY;
    5680           0 :                                 goto err;
    5681             :                         }
    5682             :                 }
    5683             : 
    5684          47 :                 if (ctx->drv_opts.dhchap_ctrlr_key != NULL) {
    5685           0 :                         nvme_ctrlr->dhchap_ctrlr_key =
    5686           0 :                                 spdk_keyring_get_key(
    5687             :                                         spdk_key_get_name(ctx->drv_opts.dhchap_ctrlr_key));
    5688           0 :                         if (nvme_ctrlr->dhchap_ctrlr_key == NULL) {
    5689           0 :                                 SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
    5690             :                                             spdk_key_get_name(ctx->drv_opts.dhchap_ctrlr_key));
    5691           0 :                                 rc = -ENOKEY;
    5692           0 :                                 goto err;
    5693             :                         }
    5694             :                 }
    5695             :         }
    5696             : 
    5697          61 :         path_id = calloc(1, sizeof(*path_id));
    5698          61 :         if (path_id == NULL) {
    5699           0 :                 SPDK_ERRLOG("Failed to allocate trid entry pointer\n");
    5700           0 :                 rc = -ENOMEM;
    5701           0 :                 goto err;
    5702             :         }
    5703             : 
    5704          61 :         path_id->trid = *trid;
    5705          61 :         if (ctx != NULL) {
    5706          47 :                 memcpy(path_id->hostid.hostaddr, ctx->drv_opts.src_addr, sizeof(path_id->hostid.hostaddr));
    5707          47 :                 memcpy(path_id->hostid.hostsvcid, ctx->drv_opts.src_svcid, sizeof(path_id->hostid.hostsvcid));
    5708             :         }
    5709          61 :         nvme_ctrlr->active_path_id = path_id;
    5710          61 :         TAILQ_INSERT_HEAD(&nvme_ctrlr->trids, path_id, link);
    5711             : 
    5712          61 :         nvme_ctrlr->thread = spdk_get_thread();
    5713          61 :         nvme_ctrlr->ctrlr = ctrlr;
    5714          61 :         nvme_ctrlr->ref = 1;
    5715             : 
    5716          61 :         if (spdk_nvme_ctrlr_is_ocssd_supported(ctrlr)) {
    5717           0 :                 SPDK_ERRLOG("OCSSDs are not supported");
    5718           0 :                 rc = -ENOTSUP;
    5719           0 :                 goto err;
    5720             :         }
    5721             : 
    5722          61 :         if (ctx != NULL) {
    5723          47 :                 memcpy(&nvme_ctrlr->opts, &ctx->bdev_opts, sizeof(ctx->bdev_opts));
    5724             :         } else {
    5725          14 :                 spdk_bdev_nvme_get_default_ctrlr_opts(&nvme_ctrlr->opts);
    5726             :         }
    5727             : 
    5728          61 :         nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, nvme_ctrlr,
    5729             :                                           g_opts.nvme_adminq_poll_period_us);
    5730             : 
    5731          61 :         if (g_opts.timeout_us > 0) {
    5732             :                 /* Register timeout callback. Timeout values for IO vs. admin reqs can be different. */
    5733             :                 /* If timeout_admin_us is 0 (not specified), admin uses same timeout as IO. */
    5734           0 :                 uint64_t adm_timeout_us = (g_opts.timeout_admin_us == 0) ?
    5735           0 :                                           g_opts.timeout_us : g_opts.timeout_admin_us;
    5736           0 :                 spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_opts.timeout_us,
    5737             :                                 adm_timeout_us, timeout_cb, nvme_ctrlr);
    5738             :         }
    5739             : 
    5740          61 :         spdk_nvme_ctrlr_register_aer_callback(ctrlr, aer_cb, nvme_ctrlr);
    5741          61 :         spdk_nvme_ctrlr_set_remove_cb(ctrlr, remove_cb, nvme_ctrlr);
    5742             : 
    5743          61 :         if (spdk_nvme_ctrlr_get_flags(ctrlr) &
    5744             :             SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) {
    5745           0 :                 nvme_ctrlr->opal_dev = spdk_opal_dev_construct(ctrlr);
    5746             :         }
    5747             : 
    5748          61 :         rc = nvme_bdev_ctrlr_create(name, nvme_ctrlr);
    5749          61 :         if (rc != 0) {
    5750           1 :                 goto err;
    5751             :         }
    5752             : 
    5753          60 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    5754             : 
    5755          60 :         if (cdata->cmic.ana_reporting) {
    5756          30 :                 rc = nvme_ctrlr_init_ana_log_page(nvme_ctrlr, ctx);
    5757          30 :                 if (rc == 0) {
    5758          30 :                         return 0;
    5759             :                 }
    5760             :         } else {
    5761          30 :                 nvme_ctrlr_create_done(nvme_ctrlr, ctx);
    5762          30 :                 return 0;
    5763             :         }
    5764             : 
    5765           1 : err:
    5766           1 :         nvme_ctrlr_delete(nvme_ctrlr);
    5767           1 :         return rc;
    5768             : }
    5769             : 
    5770             : void
    5771          56 : spdk_bdev_nvme_get_default_ctrlr_opts(struct spdk_bdev_nvme_ctrlr_opts *opts)
    5772             : {
    5773          56 :         opts->prchk_flags = 0;
    5774          56 :         opts->ctrlr_loss_timeout_sec = g_opts.ctrlr_loss_timeout_sec;
    5775          56 :         opts->reconnect_delay_sec = g_opts.reconnect_delay_sec;
    5776          56 :         opts->fast_io_fail_timeout_sec = g_opts.fast_io_fail_timeout_sec;
    5777          56 :         opts->multipath = false;
    5778          56 : }
    5779             : 
    5780             : static void
    5781           0 : attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    5782             :           struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *drv_opts)
    5783             : {
    5784             :         char *name;
    5785             : 
    5786           0 :         name = spdk_sprintf_alloc("HotInNvme%d", g_hot_insert_nvme_controller_index++);
    5787           0 :         if (!name) {
    5788           0 :                 SPDK_ERRLOG("Failed to assign name to NVMe device\n");
    5789           0 :                 return;
    5790             :         }
    5791             : 
    5792           0 :         if (nvme_ctrlr_create(ctrlr, name, trid, NULL) == 0) {
    5793           0 :                 SPDK_DEBUGLOG(bdev_nvme, "Attached to %s (%s)\n", trid->traddr, name);
    5794             :         } else {
    5795           0 :                 SPDK_ERRLOG("Failed to attach to %s (%s)\n", trid->traddr, name);
    5796             :         }
    5797             : 
    5798           0 :         free(name);
    5799             : }
    5800             : 
    5801             : static void
    5802          60 : _nvme_ctrlr_destruct(void *ctx)
    5803             : {
    5804          60 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    5805             : 
    5806          60 :         nvme_ctrlr_depopulate_namespaces(nvme_ctrlr);
    5807          60 :         nvme_ctrlr_release(nvme_ctrlr);
    5808          60 : }
    5809             : 
    5810             : static int
    5811          57 : bdev_nvme_delete_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, bool hotplug)
    5812             : {
    5813             :         struct nvme_probe_skip_entry *entry;
    5814             : 
    5815             :         /* The controller's destruction was already started */
    5816          57 :         if (nvme_ctrlr->destruct) {
    5817           0 :                 return -EALREADY;
    5818             :         }
    5819             : 
    5820          57 :         if (!hotplug &&
    5821          57 :             nvme_ctrlr->active_path_id->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
    5822           0 :                 entry = calloc(1, sizeof(*entry));
    5823           0 :                 if (!entry) {
    5824           0 :                         return -ENOMEM;
    5825             :                 }
    5826           0 :                 entry->trid = nvme_ctrlr->active_path_id->trid;
    5827           0 :                 TAILQ_INSERT_TAIL(&g_skipped_nvme_ctrlrs, entry, tailq);
    5828             :         }
    5829             : 
    5830          57 :         nvme_ctrlr->destruct = true;
    5831          57 :         return 0;
    5832             : }
    5833             : 
    5834             : static int
    5835           2 : bdev_nvme_delete_ctrlr(struct nvme_ctrlr *nvme_ctrlr, bool hotplug)
    5836             : {
    5837             :         int rc;
    5838             : 
    5839           2 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    5840           2 :         rc = bdev_nvme_delete_ctrlr_unsafe(nvme_ctrlr, hotplug);
    5841           2 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    5842             : 
    5843           2 :         if (rc == 0) {
    5844           2 :                 _nvme_ctrlr_destruct(nvme_ctrlr);
    5845           0 :         } else if (rc == -EALREADY) {
    5846           0 :                 rc = 0;
    5847             :         }
    5848             : 
    5849           2 :         return rc;
    5850             : }
    5851             : 
    5852             : static void
    5853           0 : remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr)
    5854             : {
    5855           0 :         struct nvme_ctrlr *nvme_ctrlr = cb_ctx;
    5856             : 
    5857           0 :         bdev_nvme_delete_ctrlr(nvme_ctrlr, true);
    5858           0 : }
    5859             : 
    5860             : static int
    5861           0 : bdev_nvme_hotplug_probe(void *arg)
    5862             : {
    5863           0 :         if (g_hotplug_probe_ctx == NULL) {
    5864           0 :                 spdk_poller_unregister(&g_hotplug_probe_poller);
    5865           0 :                 return SPDK_POLLER_IDLE;
    5866             :         }
    5867             : 
    5868           0 :         if (spdk_nvme_probe_poll_async(g_hotplug_probe_ctx) != -EAGAIN) {
    5869           0 :                 g_hotplug_probe_ctx = NULL;
    5870           0 :                 spdk_poller_unregister(&g_hotplug_probe_poller);
    5871             :         }
    5872             : 
    5873           0 :         return SPDK_POLLER_BUSY;
    5874             : }
    5875             : 
    5876             : static int
    5877           0 : bdev_nvme_hotplug(void *arg)
    5878             : {
    5879           0 :         struct spdk_nvme_transport_id trid_pcie;
    5880             : 
    5881           0 :         if (g_hotplug_probe_ctx) {
    5882           0 :                 return SPDK_POLLER_BUSY;
    5883             :         }
    5884             : 
    5885           0 :         memset(&trid_pcie, 0, sizeof(trid_pcie));
    5886           0 :         spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
    5887             : 
    5888           0 :         g_hotplug_probe_ctx = spdk_nvme_probe_async(&trid_pcie, NULL,
    5889             :                               hotplug_probe_cb, attach_cb, NULL);
    5890             : 
    5891           0 :         if (g_hotplug_probe_ctx) {
    5892           0 :                 assert(g_hotplug_probe_poller == NULL);
    5893           0 :                 g_hotplug_probe_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug_probe, NULL, 1000);
    5894             :         }
    5895             : 
    5896           0 :         return SPDK_POLLER_BUSY;
    5897             : }
    5898             : 
    5899             : void
    5900           0 : bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts)
    5901             : {
    5902           0 :         *opts = g_opts;
    5903           0 : }
    5904             : 
    5905             : static bool bdev_nvme_check_io_error_resiliency_params(int32_t ctrlr_loss_timeout_sec,
    5906             :                 uint32_t reconnect_delay_sec,
    5907             :                 uint32_t fast_io_fail_timeout_sec);
    5908             : 
    5909             : static int
    5910           0 : bdev_nvme_validate_opts(const struct spdk_bdev_nvme_opts *opts)
    5911             : {
    5912           0 :         if ((opts->timeout_us == 0) && (opts->timeout_admin_us != 0)) {
    5913             :                 /* Can't set timeout_admin_us without also setting timeout_us */
    5914           0 :                 SPDK_WARNLOG("Invalid options: Can't have (timeout_us == 0) with (timeout_admin_us > 0)\n");
    5915           0 :                 return -EINVAL;
    5916             :         }
    5917             : 
    5918           0 :         if (opts->bdev_retry_count < -1) {
    5919           0 :                 SPDK_WARNLOG("Invalid option: bdev_retry_count can't be less than -1.\n");
    5920           0 :                 return -EINVAL;
    5921             :         }
    5922             : 
    5923           0 :         if (!bdev_nvme_check_io_error_resiliency_params(opts->ctrlr_loss_timeout_sec,
    5924           0 :                         opts->reconnect_delay_sec,
    5925           0 :                         opts->fast_io_fail_timeout_sec)) {
    5926           0 :                 return -EINVAL;
    5927             :         }
    5928             : 
    5929           0 :         return 0;
    5930             : }
    5931             : 
    5932             : int
    5933           0 : bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts)
    5934             : {
    5935             :         int ret;
    5936             : 
    5937           0 :         ret = bdev_nvme_validate_opts(opts);
    5938           0 :         if (ret) {
    5939           0 :                 SPDK_WARNLOG("Failed to set nvme opts.\n");
    5940           0 :                 return ret;
    5941             :         }
    5942             : 
    5943           0 :         if (g_bdev_nvme_init_thread != NULL) {
    5944           0 :                 if (!TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
    5945           0 :                         return -EPERM;
    5946             :                 }
    5947             :         }
    5948             : 
    5949           0 :         if (opts->rdma_srq_size != 0 ||
    5950           0 :             opts->rdma_max_cq_size != 0 ||
    5951           0 :             opts->rdma_cm_event_timeout_ms != 0) {
    5952           0 :                 struct spdk_nvme_transport_opts drv_opts;
    5953             : 
    5954           0 :                 spdk_nvme_transport_get_opts(&drv_opts, sizeof(drv_opts));
    5955           0 :                 if (opts->rdma_srq_size != 0) {
    5956           0 :                         drv_opts.rdma_srq_size = opts->rdma_srq_size;
    5957             :                 }
    5958           0 :                 if (opts->rdma_max_cq_size != 0) {
    5959           0 :                         drv_opts.rdma_max_cq_size = opts->rdma_max_cq_size;
    5960             :                 }
    5961           0 :                 if (opts->rdma_cm_event_timeout_ms != 0) {
    5962           0 :                         drv_opts.rdma_cm_event_timeout_ms = opts->rdma_cm_event_timeout_ms;
    5963             :                 }
    5964             : 
    5965           0 :                 ret = spdk_nvme_transport_set_opts(&drv_opts, sizeof(drv_opts));
    5966           0 :                 if (ret) {
    5967           0 :                         SPDK_ERRLOG("Failed to set NVMe transport opts.\n");
    5968           0 :                         return ret;
    5969             :                 }
    5970             :         }
    5971             : 
    5972           0 :         g_opts = *opts;
    5973             : 
    5974           0 :         return 0;
    5975             : }
    5976             : 
    5977             : struct set_nvme_hotplug_ctx {
    5978             :         uint64_t period_us;
    5979             :         bool enabled;
    5980             :         spdk_msg_fn fn;
    5981             :         void *fn_ctx;
    5982             : };
    5983             : 
    5984             : static void
    5985           0 : set_nvme_hotplug_period_cb(void *_ctx)
    5986             : {
    5987           0 :         struct set_nvme_hotplug_ctx *ctx = _ctx;
    5988             : 
    5989           0 :         spdk_poller_unregister(&g_hotplug_poller);
    5990           0 :         if (ctx->enabled) {
    5991           0 :                 g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug, NULL, ctx->period_us);
    5992             :         } else {
    5993           0 :                 g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_remove_poller, NULL,
    5994             :                                                         NVME_HOTPLUG_POLL_PERIOD_DEFAULT);
    5995             :         }
    5996             : 
    5997           0 :         g_nvme_hotplug_poll_period_us = ctx->period_us;
    5998           0 :         g_nvme_hotplug_enabled = ctx->enabled;
    5999           0 :         if (ctx->fn) {
    6000           0 :                 ctx->fn(ctx->fn_ctx);
    6001             :         }
    6002             : 
    6003           0 :         free(ctx);
    6004           0 : }
    6005             : 
    6006             : int
    6007           0 : bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx)
    6008             : {
    6009             :         struct set_nvme_hotplug_ctx *ctx;
    6010             : 
    6011           0 :         if (enabled == true && !spdk_process_is_primary()) {
    6012           0 :                 return -EPERM;
    6013             :         }
    6014             : 
    6015           0 :         ctx = calloc(1, sizeof(*ctx));
    6016           0 :         if (ctx == NULL) {
    6017           0 :                 return -ENOMEM;
    6018             :         }
    6019             : 
    6020           0 :         period_us = period_us == 0 ? NVME_HOTPLUG_POLL_PERIOD_DEFAULT : period_us;
    6021           0 :         ctx->period_us = spdk_min(period_us, NVME_HOTPLUG_POLL_PERIOD_MAX);
    6022           0 :         ctx->enabled = enabled;
    6023           0 :         ctx->fn = cb;
    6024           0 :         ctx->fn_ctx = cb_ctx;
    6025             : 
    6026           0 :         spdk_thread_send_msg(g_bdev_nvme_init_thread, set_nvme_hotplug_period_cb, ctx);
    6027           0 :         return 0;
    6028             : }
    6029             : 
    6030             : static void
    6031          46 : nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
    6032             :                                     struct nvme_async_probe_ctx *ctx)
    6033             : {
    6034             :         struct nvme_ns  *nvme_ns;
    6035             :         struct nvme_bdev        *nvme_bdev;
    6036             :         size_t                  j;
    6037             : 
    6038          46 :         assert(nvme_ctrlr != NULL);
    6039             : 
    6040          46 :         if (ctx->names == NULL) {
    6041           0 :                 ctx->reported_bdevs = 0;
    6042           0 :                 populate_namespaces_cb(ctx, 0);
    6043           0 :                 return;
    6044             :         }
    6045             : 
    6046             :         /*
    6047             :          * Report the new bdevs that were created in this call.
    6048             :          * There can be more than one bdev per NVMe controller.
    6049             :          */
    6050          46 :         j = 0;
    6051          46 :         nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    6052          94 :         while (nvme_ns != NULL) {
    6053          48 :                 nvme_bdev = nvme_ns->bdev;
    6054          48 :                 if (j < ctx->max_bdevs) {
    6055          48 :                         ctx->names[j] = nvme_bdev->disk.name;
    6056          48 :                         j++;
    6057             :                 } else {
    6058           0 :                         SPDK_ERRLOG("Maximum number of namespaces supported per NVMe controller is %du. Unable to return all names of created bdevs\n",
    6059             :                                     ctx->max_bdevs);
    6060           0 :                         ctx->reported_bdevs = 0;
    6061           0 :                         populate_namespaces_cb(ctx, -ERANGE);
    6062           0 :                         return;
    6063             :                 }
    6064             : 
    6065          48 :                 nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
    6066             :         }
    6067             : 
    6068          46 :         ctx->reported_bdevs = j;
    6069          46 :         populate_namespaces_cb(ctx, 0);
    6070             : }
    6071             : 
    6072             : static int
    6073           9 : bdev_nvme_check_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
    6074             :                                struct spdk_nvme_ctrlr *new_ctrlr,
    6075             :                                struct spdk_nvme_transport_id *trid)
    6076             : {
    6077             :         struct nvme_path_id *tmp_trid;
    6078             : 
    6079           9 :         if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
    6080           0 :                 SPDK_ERRLOG("PCIe failover is not supported.\n");
    6081           0 :                 return -ENOTSUP;
    6082             :         }
    6083             : 
    6084             :         /* Currently we only support failover to the same transport type. */
    6085           9 :         if (nvme_ctrlr->active_path_id->trid.trtype != trid->trtype) {
    6086           0 :                 SPDK_WARNLOG("Failover from trtype: %s to a different trtype: %s is not supported currently\n",
    6087             :                              spdk_nvme_transport_id_trtype_str(nvme_ctrlr->active_path_id->trid.trtype),
    6088             :                              spdk_nvme_transport_id_trtype_str(trid->trtype));
    6089           0 :                 return -EINVAL;
    6090             :         }
    6091             : 
    6092             : 
    6093             :         /* Currently we only support failover to the same NQN. */
    6094           9 :         if (strncmp(trid->subnqn, nvme_ctrlr->active_path_id->trid.subnqn, SPDK_NVMF_NQN_MAX_LEN)) {
    6095           0 :                 SPDK_WARNLOG("Failover from subnqn: %s to a different subnqn: %s is not supported currently\n",
    6096             :                              nvme_ctrlr->active_path_id->trid.subnqn, trid->subnqn);
    6097           0 :                 return -EINVAL;
    6098             :         }
    6099             : 
    6100             :         /* Skip all the other checks if we've already registered this path. */
    6101          21 :         TAILQ_FOREACH(tmp_trid, &nvme_ctrlr->trids, link) {
    6102          12 :                 if (!spdk_nvme_transport_id_compare(&tmp_trid->trid, trid)) {
    6103           0 :                         SPDK_WARNLOG("This path (traddr: %s subnqn: %s) is already registered\n", trid->traddr,
    6104             :                                      trid->subnqn);
    6105           0 :                         return -EALREADY;
    6106             :                 }
    6107             :         }
    6108             : 
    6109           9 :         return 0;
    6110             : }
    6111             : 
    6112             : static int
    6113           9 : bdev_nvme_check_secondary_namespace(struct nvme_ctrlr *nvme_ctrlr,
    6114             :                                     struct spdk_nvme_ctrlr *new_ctrlr)
    6115             : {
    6116             :         struct nvme_ns *nvme_ns;
    6117             :         struct spdk_nvme_ns *new_ns;
    6118             : 
    6119           9 :         nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    6120           9 :         while (nvme_ns != NULL) {
    6121           0 :                 new_ns = spdk_nvme_ctrlr_get_ns(new_ctrlr, nvme_ns->id);
    6122           0 :                 assert(new_ns != NULL);
    6123             : 
    6124           0 :                 if (!bdev_nvme_compare_ns(nvme_ns->ns, new_ns)) {
    6125           0 :                         return -EINVAL;
    6126             :                 }
    6127             : 
    6128           0 :                 nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
    6129             :         }
    6130             : 
    6131           9 :         return 0;
    6132             : }
    6133             : 
    6134             : static int
    6135           9 : _bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
    6136             :                               struct spdk_nvme_transport_id *trid)
    6137             : {
    6138             :         struct nvme_path_id *active_id, *new_trid, *tmp_trid;
    6139             : 
    6140           9 :         new_trid = calloc(1, sizeof(*new_trid));
    6141           9 :         if (new_trid == NULL) {
    6142           0 :                 return -ENOMEM;
    6143             :         }
    6144           9 :         new_trid->trid = *trid;
    6145             : 
    6146           9 :         active_id = nvme_ctrlr->active_path_id;
    6147           9 :         assert(active_id != NULL);
    6148           9 :         assert(active_id == TAILQ_FIRST(&nvme_ctrlr->trids));
    6149             : 
    6150             :         /* Skip the active trid not to replace it until it is failed. */
    6151           9 :         tmp_trid = TAILQ_NEXT(active_id, link);
    6152           9 :         if (tmp_trid == NULL) {
    6153           6 :                 goto add_tail;
    6154             :         }
    6155             : 
    6156             :         /* It means the trid is faled if its last failed time is non-zero.
    6157             :          * Insert the new alternate trid before any failed trid.
    6158             :          */
    6159           5 :         TAILQ_FOREACH_FROM(tmp_trid, &nvme_ctrlr->trids, link) {
    6160           3 :                 if (tmp_trid->last_failed_tsc != 0) {
    6161           1 :                         TAILQ_INSERT_BEFORE(tmp_trid, new_trid, link);
    6162           1 :                         return 0;
    6163             :                 }
    6164             :         }
    6165             : 
    6166           2 : add_tail:
    6167           8 :         TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, new_trid, link);
    6168           8 :         return 0;
    6169             : }
    6170             : 
    6171             : /* This is the case that a secondary path is added to an existing
    6172             :  * nvme_ctrlr for failover. After checking if it can access the same
    6173             :  * namespaces as the primary path, it is disconnected until failover occurs.
    6174             :  */
    6175             : static int
    6176           9 : bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
    6177             :                              struct spdk_nvme_ctrlr *new_ctrlr,
    6178             :                              struct spdk_nvme_transport_id *trid)
    6179             : {
    6180             :         int rc;
    6181             : 
    6182           9 :         assert(nvme_ctrlr != NULL);
    6183             : 
    6184           9 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    6185             : 
    6186           9 :         rc = bdev_nvme_check_secondary_trid(nvme_ctrlr, new_ctrlr, trid);
    6187           9 :         if (rc != 0) {
    6188           0 :                 goto exit;
    6189             :         }
    6190             : 
    6191           9 :         rc = bdev_nvme_check_secondary_namespace(nvme_ctrlr, new_ctrlr);
    6192           9 :         if (rc != 0) {
    6193           0 :                 goto exit;
    6194             :         }
    6195             : 
    6196           9 :         rc = _bdev_nvme_add_secondary_trid(nvme_ctrlr, trid);
    6197             : 
    6198           9 : exit:
    6199           9 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    6200             : 
    6201           9 :         spdk_nvme_detach(new_ctrlr);
    6202             : 
    6203           9 :         return rc;
    6204             : }
    6205             : 
    6206             : static void
    6207          47 : connect_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    6208             :                   struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
    6209             : {
    6210          47 :         struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
    6211             :         struct nvme_async_probe_ctx *ctx;
    6212             :         int rc;
    6213             : 
    6214          47 :         ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, drv_opts);
    6215          47 :         ctx->ctrlr_attached = true;
    6216             : 
    6217          47 :         rc = nvme_ctrlr_create(ctrlr, ctx->base_name, &ctx->trid, ctx);
    6218          47 :         if (rc != 0) {
    6219           1 :                 ctx->reported_bdevs = 0;
    6220           1 :                 populate_namespaces_cb(ctx, rc);
    6221             :         }
    6222          47 : }
    6223             : 
    6224           1 : SPDK_LOG_DEPRECATION_REGISTER(failover_config,
    6225             :                               "bdev_nvme_attach_controller.failover configuration mismatch", "v25.01", 0);
    6226             : 
    6227             : static void
    6228           4 : connect_set_failover_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    6229             :                         struct spdk_nvme_ctrlr *ctrlr,
    6230             :                         const struct spdk_nvme_ctrlr_opts *opts)
    6231             : {
    6232           4 :         struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
    6233             :         struct nvme_ctrlr *nvme_ctrlr;
    6234             :         struct nvme_async_probe_ctx *ctx;
    6235             :         int rc;
    6236             : 
    6237           4 :         ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, drv_opts);
    6238           4 :         ctx->ctrlr_attached = true;
    6239             : 
    6240           4 :         nvme_ctrlr = nvme_ctrlr_get_by_name(ctx->base_name);
    6241           4 :         if (nvme_ctrlr) {
    6242           4 :                 if (nvme_ctrlr->opts.multipath) {
    6243             :                         /* All controllers created with the same name must be configured either
    6244             :                          * for multipath or for failover. Otherwise we have configuration mismatch.
    6245             :                          * While this is currently still supported, support for configuration where some
    6246             :                          * controllers with the same name are configured for multipath, while others
    6247             :                          * are configured for failover will be removed in release 25.01.
    6248             :                          * Default mode change: starting from SPDK 25.01, if the user will not provide
    6249             :                          * '-x <mode>' parameter in the bdev_nvme_attach_controller RPC call, default
    6250             :                          * mode assigned to the controller will be 'multipath'
    6251             :                          */
    6252           0 :                         SPDK_LOG_DEPRECATED(failover_config);
    6253             :                 }
    6254           4 :                 rc = bdev_nvme_add_secondary_trid(nvme_ctrlr, ctrlr, &ctx->trid);
    6255             :         } else {
    6256           0 :                 rc = -ENODEV;
    6257             :         }
    6258             : 
    6259           4 :         ctx->reported_bdevs = 0;
    6260           4 :         populate_namespaces_cb(ctx, rc);
    6261           4 : }
    6262             : 
    6263             : static int
    6264          52 : bdev_nvme_async_poll(void *arg)
    6265             : {
    6266          52 :         struct nvme_async_probe_ctx     *ctx = arg;
    6267             :         int                             rc;
    6268             : 
    6269          52 :         rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
    6270          52 :         if (spdk_unlikely(rc != -EAGAIN)) {
    6271          52 :                 ctx->probe_done = true;
    6272          52 :                 spdk_poller_unregister(&ctx->poller);
    6273          52 :                 if (!ctx->ctrlr_attached) {
    6274             :                         /* The probe is done, but no controller was attached.
    6275             :                          * That means we had a failure, so report -EIO back to
    6276             :                          * the caller (usually the RPC). populate_namespaces_cb()
    6277             :                          * will take care of freeing the nvme_async_probe_ctx.
    6278             :                          */
    6279           1 :                         ctx->reported_bdevs = 0;
    6280           1 :                         populate_namespaces_cb(ctx, -EIO);
    6281          51 :                 } else if (ctx->namespaces_populated) {
    6282             :                         /* The namespaces for the attached controller were all
    6283             :                          * populated and the response was already sent to the
    6284             :                          * caller (usually the RPC).  So free the context here.
    6285             :                          */
    6286          21 :                         free_nvme_async_probe_ctx(ctx);
    6287             :                 }
    6288             :         }
    6289             : 
    6290          52 :         return SPDK_POLLER_BUSY;
    6291             : }
    6292             : 
    6293             : static bool
    6294          29 : bdev_nvme_check_io_error_resiliency_params(int32_t ctrlr_loss_timeout_sec,
    6295             :                 uint32_t reconnect_delay_sec,
    6296             :                 uint32_t fast_io_fail_timeout_sec)
    6297             : {
    6298          29 :         if (ctrlr_loss_timeout_sec < -1) {
    6299           1 :                 SPDK_ERRLOG("ctrlr_loss_timeout_sec can't be less than -1.\n");
    6300           1 :                 return false;
    6301          28 :         } else if (ctrlr_loss_timeout_sec == -1) {
    6302          14 :                 if (reconnect_delay_sec == 0) {
    6303           1 :                         SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
    6304           1 :                         return false;
    6305          13 :                 } else if (fast_io_fail_timeout_sec != 0 &&
    6306             :                            fast_io_fail_timeout_sec < reconnect_delay_sec) {
    6307           1 :                         SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io-fail_timeout_sec.\n");
    6308           1 :                         return false;
    6309             :                 }
    6310          14 :         } else if (ctrlr_loss_timeout_sec != 0) {
    6311          11 :                 if (reconnect_delay_sec == 0) {
    6312           1 :                         SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
    6313           1 :                         return false;
    6314          10 :                 } else if (reconnect_delay_sec > (uint32_t)ctrlr_loss_timeout_sec) {
    6315           1 :                         SPDK_ERRLOG("reconnect_delay_sec can't be more than ctrlr_loss_timeout_sec.\n");
    6316           1 :                         return false;
    6317           9 :                 } else if (fast_io_fail_timeout_sec != 0) {
    6318           6 :                         if (fast_io_fail_timeout_sec < reconnect_delay_sec) {
    6319           1 :                                 SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io_fail_timeout_sec.\n");
    6320           1 :                                 return false;
    6321           5 :                         } else if (fast_io_fail_timeout_sec > (uint32_t)ctrlr_loss_timeout_sec) {
    6322           1 :                                 SPDK_ERRLOG("fast_io_fail_timeout_sec can't be more than ctrlr_loss_timeout_sec.\n");
    6323           1 :                                 return false;
    6324             :                         }
    6325             :                 }
    6326           3 :         } else if (reconnect_delay_sec != 0 || fast_io_fail_timeout_sec != 0) {
    6327           2 :                 SPDK_ERRLOG("Both reconnect_delay_sec and fast_io_fail_timeout_sec must be 0 if ctrlr_loss_timeout_sec is 0.\n");
    6328           2 :                 return false;
    6329             :         }
    6330             : 
    6331          20 :         return true;
    6332             : }
    6333             : 
    6334             : int
    6335          52 : spdk_bdev_nvme_create(struct spdk_nvme_transport_id *trid,
    6336             :                       const char *base_name,
    6337             :                       const char **names,
    6338             :                       uint32_t count,
    6339             :                       spdk_bdev_nvme_create_cb cb_fn,
    6340             :                       void *cb_ctx,
    6341             :                       struct spdk_nvme_ctrlr_opts *drv_opts,
    6342             :                       struct spdk_bdev_nvme_ctrlr_opts *bdev_opts,
    6343             :                       bool multipath)
    6344             : {
    6345             :         struct nvme_probe_skip_entry *entry, *tmp;
    6346             :         struct nvme_async_probe_ctx *ctx;
    6347             :         spdk_nvme_attach_cb attach_cb;
    6348             :         int len;
    6349             : 
    6350             :         /* TODO expand this check to include both the host and target TRIDs.
    6351             :          * Only if both are the same should we fail.
    6352             :          */
    6353          52 :         if (nvme_ctrlr_get(trid, drv_opts->hostnqn) != NULL) {
    6354           0 :                 SPDK_ERRLOG("A controller with the provided trid (traddr: %s, hostnqn: %s) "
    6355             :                             "already exists.\n", trid->traddr, drv_opts->hostnqn);
    6356           0 :                 return -EEXIST;
    6357             :         }
    6358             : 
    6359          52 :         len = strnlen(base_name, SPDK_CONTROLLER_NAME_MAX);
    6360             : 
    6361          52 :         if (len == 0 || len == SPDK_CONTROLLER_NAME_MAX) {
    6362           0 :                 SPDK_ERRLOG("controller name must be between 1 and %d characters\n", SPDK_CONTROLLER_NAME_MAX - 1);
    6363           0 :                 return -EINVAL;
    6364             :         }
    6365             : 
    6366          52 :         if (bdev_opts != NULL &&
    6367          10 :             !bdev_nvme_check_io_error_resiliency_params(bdev_opts->ctrlr_loss_timeout_sec,
    6368             :                             bdev_opts->reconnect_delay_sec,
    6369             :                             bdev_opts->fast_io_fail_timeout_sec)) {
    6370           0 :                 return -EINVAL;
    6371             :         }
    6372             : 
    6373          52 :         ctx = calloc(1, sizeof(*ctx));
    6374          52 :         if (!ctx) {
    6375           0 :                 return -ENOMEM;
    6376             :         }
    6377          52 :         ctx->base_name = base_name;
    6378          52 :         ctx->names = names;
    6379          52 :         ctx->max_bdevs = count;
    6380          52 :         ctx->cb_fn = cb_fn;
    6381          52 :         ctx->cb_ctx = cb_ctx;
    6382          52 :         ctx->trid = *trid;
    6383             : 
    6384          52 :         if (bdev_opts) {
    6385          10 :                 memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
    6386             :         } else {
    6387          42 :                 spdk_bdev_nvme_get_default_ctrlr_opts(&ctx->bdev_opts);
    6388             :         }
    6389             : 
    6390          52 :         if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
    6391           0 :                 TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, tmp) {
    6392           0 :                         if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
    6393           0 :                                 TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
    6394           0 :                                 free(entry);
    6395           0 :                                 break;
    6396             :                         }
    6397             :                 }
    6398             :         }
    6399             : 
    6400          52 :         memcpy(&ctx->drv_opts, drv_opts, sizeof(*drv_opts));
    6401          52 :         ctx->drv_opts.transport_retry_count = g_opts.transport_retry_count;
    6402          52 :         ctx->drv_opts.transport_ack_timeout = g_opts.transport_ack_timeout;
    6403          52 :         ctx->drv_opts.keep_alive_timeout_ms = g_opts.keep_alive_timeout_ms;
    6404          52 :         ctx->drv_opts.disable_read_ana_log_page = true;
    6405          52 :         ctx->drv_opts.transport_tos = g_opts.transport_tos;
    6406             : 
    6407          52 :         if (ctx->bdev_opts.psk != NULL) {
    6408           0 :                 ctx->drv_opts.tls_psk = spdk_keyring_get_key(ctx->bdev_opts.psk);
    6409           0 :                 if (ctx->drv_opts.tls_psk == NULL) {
    6410           0 :                         SPDK_ERRLOG("Could not load PSK: %s\n", ctx->bdev_opts.psk);
    6411           0 :                         free_nvme_async_probe_ctx(ctx);
    6412           0 :                         return -ENOKEY;
    6413             :                 }
    6414             :         }
    6415             : 
    6416          52 :         if (ctx->bdev_opts.dhchap_key != NULL) {
    6417           0 :                 ctx->drv_opts.dhchap_key = spdk_keyring_get_key(ctx->bdev_opts.dhchap_key);
    6418           0 :                 if (ctx->drv_opts.dhchap_key == NULL) {
    6419           0 :                         SPDK_ERRLOG("Could not load DH-HMAC-CHAP key: %s\n",
    6420             :                                     ctx->bdev_opts.dhchap_key);
    6421           0 :                         free_nvme_async_probe_ctx(ctx);
    6422           0 :                         return -ENOKEY;
    6423             :                 }
    6424             : 
    6425           0 :                 ctx->drv_opts.dhchap_digests = g_opts.dhchap_digests;
    6426           0 :                 ctx->drv_opts.dhchap_dhgroups = g_opts.dhchap_dhgroups;
    6427             :         }
    6428          52 :         if (ctx->bdev_opts.dhchap_ctrlr_key != NULL) {
    6429           0 :                 ctx->drv_opts.dhchap_ctrlr_key =
    6430           0 :                         spdk_keyring_get_key(ctx->bdev_opts.dhchap_ctrlr_key);
    6431           0 :                 if (ctx->drv_opts.dhchap_ctrlr_key == NULL) {
    6432           0 :                         SPDK_ERRLOG("Could not load DH-HMAC-CHAP controller key: %s\n",
    6433             :                                     ctx->bdev_opts.dhchap_ctrlr_key);
    6434           0 :                         free_nvme_async_probe_ctx(ctx);
    6435           0 :                         return -ENOKEY;
    6436             :                 }
    6437             :         }
    6438             : 
    6439          52 :         if (nvme_bdev_ctrlr_get_by_name(base_name) == NULL || multipath) {
    6440          48 :                 attach_cb = connect_attach_cb;
    6441             :         } else {
    6442           4 :                 attach_cb = connect_set_failover_cb;
    6443             :         }
    6444             : 
    6445          52 :         ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->drv_opts, attach_cb);
    6446          52 :         if (ctx->probe_ctx == NULL) {
    6447           0 :                 SPDK_ERRLOG("No controller was found with provided trid (traddr: %s)\n", trid->traddr);
    6448           0 :                 free_nvme_async_probe_ctx(ctx);
    6449           0 :                 return -ENODEV;
    6450             :         }
    6451          52 :         ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_async_poll, ctx, 1000);
    6452             : 
    6453          52 :         return 0;
    6454             : }
    6455             : 
    6456             : struct bdev_nvme_delete_ctx {
    6457             :         char                        *name;
    6458             :         struct nvme_path_id         path_id;
    6459             :         bdev_nvme_delete_done_fn    delete_done;
    6460             :         void                        *delete_done_ctx;
    6461             :         uint64_t                    timeout_ticks;
    6462             :         struct spdk_poller          *poller;
    6463             : };
    6464             : 
    6465             : static void
    6466           2 : free_bdev_nvme_delete_ctx(struct bdev_nvme_delete_ctx *ctx)
    6467             : {
    6468           2 :         if (ctx != NULL) {
    6469           1 :                 free(ctx->name);
    6470           1 :                 free(ctx);
    6471             :         }
    6472           2 : }
    6473             : 
    6474             : static bool
    6475          75 : nvme_path_id_compare(struct nvme_path_id *p, const struct nvme_path_id *path_id)
    6476             : {
    6477          75 :         if (path_id->trid.trtype != 0) {
    6478          21 :                 if (path_id->trid.trtype == SPDK_NVME_TRANSPORT_CUSTOM) {
    6479           0 :                         if (strcasecmp(path_id->trid.trstring, p->trid.trstring) != 0) {
    6480           0 :                                 return false;
    6481             :                         }
    6482             :                 } else {
    6483          21 :                         if (path_id->trid.trtype != p->trid.trtype) {
    6484           0 :                                 return false;
    6485             :                         }
    6486             :                 }
    6487             :         }
    6488             : 
    6489          75 :         if (!spdk_mem_all_zero(path_id->trid.traddr, sizeof(path_id->trid.traddr))) {
    6490          21 :                 if (strcasecmp(path_id->trid.traddr, p->trid.traddr) != 0) {
    6491          11 :                         return false;
    6492             :                 }
    6493             :         }
    6494             : 
    6495          64 :         if (path_id->trid.adrfam != 0) {
    6496           0 :                 if (path_id->trid.adrfam != p->trid.adrfam) {
    6497           0 :                         return false;
    6498             :                 }
    6499             :         }
    6500             : 
    6501          64 :         if (!spdk_mem_all_zero(path_id->trid.trsvcid, sizeof(path_id->trid.trsvcid))) {
    6502          10 :                 if (strcasecmp(path_id->trid.trsvcid, p->trid.trsvcid) != 0) {
    6503           0 :                         return false;
    6504             :                 }
    6505             :         }
    6506             : 
    6507          64 :         if (!spdk_mem_all_zero(path_id->trid.subnqn, sizeof(path_id->trid.subnqn))) {
    6508          10 :                 if (strcmp(path_id->trid.subnqn, p->trid.subnqn) != 0) {
    6509           0 :                         return false;
    6510             :                 }
    6511             :         }
    6512             : 
    6513          64 :         if (!spdk_mem_all_zero(path_id->hostid.hostaddr, sizeof(path_id->hostid.hostaddr))) {
    6514           0 :                 if (strcmp(path_id->hostid.hostaddr, p->hostid.hostaddr) != 0) {
    6515           0 :                         return false;
    6516             :                 }
    6517             :         }
    6518             : 
    6519          64 :         if (!spdk_mem_all_zero(path_id->hostid.hostsvcid, sizeof(path_id->hostid.hostsvcid))) {
    6520           0 :                 if (strcmp(path_id->hostid.hostsvcid, p->hostid.hostsvcid) != 0) {
    6521           0 :                         return false;
    6522             :                 }
    6523             :         }
    6524             : 
    6525          64 :         return true;
    6526             : }
    6527             : 
    6528             : static bool
    6529           2 : nvme_path_id_exists(const char *name, const struct nvme_path_id *path_id)
    6530             : {
    6531             :         struct nvme_bdev_ctrlr  *nbdev_ctrlr;
    6532             :         struct nvme_ctrlr       *ctrlr;
    6533             :         struct nvme_path_id     *p;
    6534             : 
    6535           2 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    6536           2 :         nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
    6537           2 :         if (!nbdev_ctrlr) {
    6538           1 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6539           1 :                 return false;
    6540             :         }
    6541             : 
    6542           1 :         TAILQ_FOREACH(ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
    6543           1 :                 pthread_mutex_lock(&ctrlr->mutex);
    6544           1 :                 TAILQ_FOREACH(p, &ctrlr->trids, link) {
    6545           1 :                         if (nvme_path_id_compare(p, path_id)) {
    6546           1 :                                 pthread_mutex_unlock(&ctrlr->mutex);
    6547           1 :                                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6548           1 :                                 return true;
    6549             :                         }
    6550             :                 }
    6551           0 :                 pthread_mutex_unlock(&ctrlr->mutex);
    6552             :         }
    6553           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6554             : 
    6555           0 :         return false;
    6556             : }
    6557             : 
    6558             : static int
    6559           2 : bdev_nvme_delete_complete_poll(void *arg)
    6560             : {
    6561           2 :         struct bdev_nvme_delete_ctx     *ctx = arg;
    6562           2 :         int                             rc = 0;
    6563             : 
    6564           2 :         if (nvme_path_id_exists(ctx->name, &ctx->path_id)) {
    6565           1 :                 if (ctx->timeout_ticks > spdk_get_ticks()) {
    6566           1 :                         return SPDK_POLLER_BUSY;
    6567             :                 }
    6568             : 
    6569           0 :                 SPDK_ERRLOG("NVMe path '%s' still exists after delete\n", ctx->name);
    6570           0 :                 rc = -ETIMEDOUT;
    6571             :         }
    6572             : 
    6573           1 :         spdk_poller_unregister(&ctx->poller);
    6574             : 
    6575           1 :         ctx->delete_done(ctx->delete_done_ctx, rc);
    6576           1 :         free_bdev_nvme_delete_ctx(ctx);
    6577             : 
    6578           1 :         return SPDK_POLLER_BUSY;
    6579             : }
    6580             : 
    6581             : static int
    6582          64 : _bdev_nvme_delete(struct nvme_ctrlr *nvme_ctrlr, const struct nvme_path_id *path_id)
    6583             : {
    6584             :         struct nvme_path_id     *p, *t;
    6585             :         spdk_msg_fn             msg_fn;
    6586          64 :         int                     rc = -ENXIO;
    6587             : 
    6588          64 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    6589             : 
    6590          74 :         TAILQ_FOREACH_REVERSE_SAFE(p, &nvme_ctrlr->trids, nvme_paths, link, t) {
    6591          74 :                 if (p == TAILQ_FIRST(&nvme_ctrlr->trids)) {
    6592          64 :                         break;
    6593             :                 }
    6594             : 
    6595          10 :                 if (!nvme_path_id_compare(p, path_id)) {
    6596           3 :                         continue;
    6597             :                 }
    6598             : 
    6599             :                 /* We are not using the specified path. */
    6600           7 :                 TAILQ_REMOVE(&nvme_ctrlr->trids, p, link);
    6601           7 :                 free(p);
    6602           7 :                 rc = 0;
    6603             :         }
    6604             : 
    6605          64 :         if (p == NULL || !nvme_path_id_compare(p, path_id)) {
    6606           8 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    6607           8 :                 return rc;
    6608             :         }
    6609             : 
    6610             :         /* If we made it here, then this path is a match! Now we need to remove it. */
    6611             : 
    6612             :         /* This is the active path in use right now. The active path is always the first in the list. */
    6613          56 :         assert(p == nvme_ctrlr->active_path_id);
    6614             : 
    6615          56 :         if (!TAILQ_NEXT(p, link)) {
    6616             :                 /* The current path is the only path. */
    6617          55 :                 msg_fn = _nvme_ctrlr_destruct;
    6618          55 :                 rc = bdev_nvme_delete_ctrlr_unsafe(nvme_ctrlr, false);
    6619             :         } else {
    6620             :                 /* There is an alternative path. */
    6621           1 :                 msg_fn = _bdev_nvme_reset_ctrlr;
    6622           1 :                 rc = bdev_nvme_failover_ctrlr_unsafe(nvme_ctrlr, true);
    6623             :         }
    6624             : 
    6625          56 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    6626             : 
    6627          56 :         if (rc == 0) {
    6628          56 :                 spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
    6629           0 :         } else if (rc == -EALREADY) {
    6630           0 :                 rc = 0;
    6631             :         }
    6632             : 
    6633          56 :         return rc;
    6634             : }
    6635             : 
    6636             : int
    6637          49 : bdev_nvme_delete(const char *name, const struct nvme_path_id *path_id,
    6638             :                  bdev_nvme_delete_done_fn delete_done, void *delete_done_ctx)
    6639             : {
    6640             :         struct nvme_bdev_ctrlr          *nbdev_ctrlr;
    6641             :         struct nvme_ctrlr               *nvme_ctrlr, *tmp_nvme_ctrlr;
    6642          49 :         struct bdev_nvme_delete_ctx     *ctx = NULL;
    6643          49 :         int                             rc = -ENXIO, _rc;
    6644             : 
    6645          49 :         if (name == NULL || path_id == NULL) {
    6646           0 :                 rc = -EINVAL;
    6647           0 :                 goto exit;
    6648             :         }
    6649             : 
    6650          49 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    6651             : 
    6652          49 :         nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
    6653          49 :         if (nbdev_ctrlr == NULL) {
    6654           0 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6655             : 
    6656           0 :                 SPDK_ERRLOG("Failed to find NVMe bdev controller\n");
    6657           0 :                 rc = -ENODEV;
    6658           0 :                 goto exit;
    6659             :         }
    6660             : 
    6661         113 :         TAILQ_FOREACH_SAFE(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq, tmp_nvme_ctrlr) {
    6662          64 :                 _rc = _bdev_nvme_delete(nvme_ctrlr, path_id);
    6663          64 :                 if (_rc < 0 && _rc != -ENXIO) {
    6664           0 :                         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6665           0 :                         rc = _rc;
    6666           0 :                         goto exit;
    6667          64 :                 } else if (_rc == 0) {
    6668             :                         /* We traverse all remaining nvme_ctrlrs even if one nvme_ctrlr
    6669             :                          * was deleted successfully. To remember the successful deletion,
    6670             :                          * overwrite rc only if _rc is zero.
    6671             :                          */
    6672          58 :                         rc = 0;
    6673             :                 }
    6674             :         }
    6675             : 
    6676          49 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6677             : 
    6678          49 :         if (rc != 0 || delete_done == NULL) {
    6679          48 :                 goto exit;
    6680             :         }
    6681             : 
    6682           1 :         ctx = calloc(1, sizeof(*ctx));
    6683           1 :         if (ctx == NULL) {
    6684           0 :                 SPDK_ERRLOG("Failed to allocate context for bdev_nvme_delete\n");
    6685           0 :                 rc = -ENOMEM;
    6686           0 :                 goto exit;
    6687             :         }
    6688             : 
    6689           1 :         ctx->name = strdup(name);
    6690           1 :         if (ctx->name == NULL) {
    6691           0 :                 SPDK_ERRLOG("Failed to copy controller name for deletion\n");
    6692           0 :                 rc = -ENOMEM;
    6693           0 :                 goto exit;
    6694             :         }
    6695             : 
    6696           1 :         ctx->delete_done = delete_done;
    6697           1 :         ctx->delete_done_ctx = delete_done_ctx;
    6698           1 :         ctx->path_id = *path_id;
    6699           1 :         ctx->timeout_ticks = spdk_get_ticks() + 10 * spdk_get_ticks_hz();
    6700           1 :         ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_delete_complete_poll, ctx, 1000);
    6701           1 :         if (ctx->poller == NULL) {
    6702           0 :                 SPDK_ERRLOG("Failed to register bdev_nvme_delete poller\n");
    6703           0 :                 rc = -ENOMEM;
    6704           0 :                 goto exit;
    6705             :         }
    6706             : 
    6707           1 : exit:
    6708          49 :         if (rc != 0) {
    6709           1 :                 free_bdev_nvme_delete_ctx(ctx);
    6710             :         }
    6711             : 
    6712          49 :         return rc;
    6713             : }
    6714             : 
    6715             : #define DISCOVERY_INFOLOG(ctx, format, ...) \
    6716             :         SPDK_INFOLOG(bdev_nvme, "Discovery[%s:%s] " format, ctx->trid.traddr, ctx->trid.trsvcid, ##__VA_ARGS__);
    6717             : 
    6718             : #define DISCOVERY_ERRLOG(ctx, format, ...) \
    6719             :         SPDK_ERRLOG("Discovery[%s:%s] " format, ctx->trid.traddr, ctx->trid.trsvcid, ##__VA_ARGS__);
    6720             : 
    6721             : struct discovery_entry_ctx {
    6722             :         char                                            name[128];
    6723             :         struct spdk_nvme_transport_id                   trid;
    6724             :         struct spdk_nvme_ctrlr_opts                     drv_opts;
    6725             :         struct spdk_nvmf_discovery_log_page_entry       entry;
    6726             :         TAILQ_ENTRY(discovery_entry_ctx)                tailq;
    6727             :         struct discovery_ctx                            *ctx;
    6728             : };
    6729             : 
    6730             : struct discovery_ctx {
    6731             :         char                                    *name;
    6732             :         spdk_bdev_nvme_start_discovery_fn       start_cb_fn;
    6733             :         spdk_bdev_nvme_stop_discovery_fn        stop_cb_fn;
    6734             :         void                                    *cb_ctx;
    6735             :         struct spdk_nvme_probe_ctx              *probe_ctx;
    6736             :         struct spdk_nvme_detach_ctx             *detach_ctx;
    6737             :         struct spdk_nvme_ctrlr                  *ctrlr;
    6738             :         struct spdk_nvme_transport_id           trid;
    6739             :         struct discovery_entry_ctx              *entry_ctx_in_use;
    6740             :         struct spdk_poller                      *poller;
    6741             :         struct spdk_nvme_ctrlr_opts             drv_opts;
    6742             :         struct spdk_bdev_nvme_ctrlr_opts        bdev_opts;
    6743             :         struct spdk_nvmf_discovery_log_page     *log_page;
    6744             :         TAILQ_ENTRY(discovery_ctx)              tailq;
    6745             :         TAILQ_HEAD(, discovery_entry_ctx)       nvm_entry_ctxs;
    6746             :         TAILQ_HEAD(, discovery_entry_ctx)       discovery_entry_ctxs;
    6747             :         int                                     rc;
    6748             :         bool                                    wait_for_attach;
    6749             :         uint64_t                                timeout_ticks;
    6750             :         /* Denotes that the discovery service is being started. We're waiting
    6751             :          * for the initial connection to the discovery controller to be
    6752             :          * established and attach discovered NVM ctrlrs.
    6753             :          */
    6754             :         bool                                    initializing;
    6755             :         /* Denotes if a discovery is currently in progress for this context.
    6756             :          * That includes connecting to newly discovered subsystems.  Used to
    6757             :          * ensure we do not start a new discovery until an existing one is
    6758             :          * complete.
    6759             :          */
    6760             :         bool                                    in_progress;
    6761             : 
    6762             :         /* Denotes if another discovery is needed after the one in progress
    6763             :          * completes.  Set when we receive an AER completion while a discovery
    6764             :          * is already in progress.
    6765             :          */
    6766             :         bool                                    pending;
    6767             : 
    6768             :         /* Signal to the discovery context poller that it should stop the
    6769             :          * discovery service, including detaching from the current discovery
    6770             :          * controller.
    6771             :          */
    6772             :         bool                                    stop;
    6773             : 
    6774             :         struct spdk_thread                      *calling_thread;
    6775             :         uint32_t                                index;
    6776             :         uint32_t                                attach_in_progress;
    6777             :         char                                    *hostnqn;
    6778             : 
    6779             :         /* Denotes if the discovery service was started by the mdns discovery.
    6780             :          */
    6781             :         bool                                    from_mdns_discovery_service;
    6782             : };
    6783             : 
    6784             : TAILQ_HEAD(discovery_ctxs, discovery_ctx);
    6785             : static struct discovery_ctxs g_discovery_ctxs = TAILQ_HEAD_INITIALIZER(g_discovery_ctxs);
    6786             : 
    6787             : static void get_discovery_log_page(struct discovery_ctx *ctx);
    6788             : 
    6789             : static void
    6790           0 : free_discovery_ctx(struct discovery_ctx *ctx)
    6791             : {
    6792           0 :         free(ctx->log_page);
    6793           0 :         free(ctx->hostnqn);
    6794           0 :         free(ctx->name);
    6795           0 :         free(ctx);
    6796           0 : }
    6797             : 
    6798             : static void
    6799           0 : discovery_complete(struct discovery_ctx *ctx)
    6800             : {
    6801           0 :         ctx->initializing = false;
    6802           0 :         ctx->in_progress = false;
    6803           0 :         if (ctx->pending) {
    6804           0 :                 ctx->pending = false;
    6805           0 :                 get_discovery_log_page(ctx);
    6806             :         }
    6807           0 : }
    6808             : 
    6809             : static void
    6810           0 : build_trid_from_log_page_entry(struct spdk_nvme_transport_id *trid,
    6811             :                                struct spdk_nvmf_discovery_log_page_entry *entry)
    6812             : {
    6813             :         char *space;
    6814             : 
    6815           0 :         trid->trtype = entry->trtype;
    6816           0 :         trid->adrfam = entry->adrfam;
    6817           0 :         memcpy(trid->traddr, entry->traddr, sizeof(entry->traddr));
    6818           0 :         memcpy(trid->trsvcid, entry->trsvcid, sizeof(entry->trsvcid));
    6819             :         /* Because the source buffer (entry->subnqn) is longer than trid->subnqn, and
    6820             :          * before call to this function trid->subnqn is zeroed out, we need
    6821             :          * to copy sizeof(trid->subnqn) minus one byte to make sure the last character
    6822             :          * remains 0. Then we can shorten the string (replace ' ' with 0) if required
    6823             :          */
    6824           0 :         memcpy(trid->subnqn, entry->subnqn, sizeof(trid->subnqn) - 1);
    6825             : 
    6826             :         /* We want the traddr, trsvcid and subnqn fields to be NULL-terminated.
    6827             :          * But the log page entries typically pad them with spaces, not zeroes.
    6828             :          * So add a NULL terminator to each of these fields at the appropriate
    6829             :          * location.
    6830             :          */
    6831           0 :         space = strchr(trid->traddr, ' ');
    6832           0 :         if (space) {
    6833           0 :                 *space = 0;
    6834             :         }
    6835           0 :         space = strchr(trid->trsvcid, ' ');
    6836           0 :         if (space) {
    6837           0 :                 *space = 0;
    6838             :         }
    6839           0 :         space = strchr(trid->subnqn, ' ');
    6840           0 :         if (space) {
    6841           0 :                 *space = 0;
    6842             :         }
    6843           0 : }
    6844             : 
    6845             : static void
    6846           0 : _stop_discovery(void *_ctx)
    6847             : {
    6848           0 :         struct discovery_ctx *ctx = _ctx;
    6849             : 
    6850           0 :         if (ctx->attach_in_progress > 0) {
    6851           0 :                 spdk_thread_send_msg(spdk_get_thread(), _stop_discovery, ctx);
    6852           0 :                 return;
    6853             :         }
    6854             : 
    6855           0 :         ctx->stop = true;
    6856             : 
    6857           0 :         while (!TAILQ_EMPTY(&ctx->nvm_entry_ctxs)) {
    6858             :                 struct discovery_entry_ctx *entry_ctx;
    6859           0 :                 struct nvme_path_id path = {};
    6860             : 
    6861           0 :                 entry_ctx = TAILQ_FIRST(&ctx->nvm_entry_ctxs);
    6862           0 :                 path.trid = entry_ctx->trid;
    6863           0 :                 bdev_nvme_delete(entry_ctx->name, &path, NULL, NULL);
    6864           0 :                 TAILQ_REMOVE(&ctx->nvm_entry_ctxs, entry_ctx, tailq);
    6865           0 :                 free(entry_ctx);
    6866             :         }
    6867             : 
    6868           0 :         while (!TAILQ_EMPTY(&ctx->discovery_entry_ctxs)) {
    6869             :                 struct discovery_entry_ctx *entry_ctx;
    6870             : 
    6871           0 :                 entry_ctx = TAILQ_FIRST(&ctx->discovery_entry_ctxs);
    6872           0 :                 TAILQ_REMOVE(&ctx->discovery_entry_ctxs, entry_ctx, tailq);
    6873           0 :                 free(entry_ctx);
    6874             :         }
    6875             : 
    6876           0 :         free(ctx->entry_ctx_in_use);
    6877           0 :         ctx->entry_ctx_in_use = NULL;
    6878             : }
    6879             : 
    6880             : static void
    6881           0 : stop_discovery(struct discovery_ctx *ctx, spdk_bdev_nvme_stop_discovery_fn cb_fn, void *cb_ctx)
    6882             : {
    6883           0 :         ctx->stop_cb_fn = cb_fn;
    6884           0 :         ctx->cb_ctx = cb_ctx;
    6885             : 
    6886           0 :         if (ctx->attach_in_progress > 0) {
    6887           0 :                 DISCOVERY_INFOLOG(ctx, "stopping discovery with attach_in_progress: %"PRIu32"\n",
    6888             :                                   ctx->attach_in_progress);
    6889             :         }
    6890             : 
    6891           0 :         _stop_discovery(ctx);
    6892           0 : }
    6893             : 
    6894             : static void
    6895           2 : remove_discovery_entry(struct nvme_ctrlr *nvme_ctrlr)
    6896             : {
    6897             :         struct discovery_ctx *d_ctx;
    6898             :         struct nvme_path_id *path_id;
    6899           2 :         struct spdk_nvme_transport_id trid = {};
    6900             :         struct discovery_entry_ctx *entry_ctx, *tmp;
    6901             : 
    6902           2 :         path_id = TAILQ_FIRST(&nvme_ctrlr->trids);
    6903             : 
    6904           2 :         TAILQ_FOREACH(d_ctx, &g_discovery_ctxs, tailq) {
    6905           0 :                 TAILQ_FOREACH_SAFE(entry_ctx, &d_ctx->nvm_entry_ctxs, tailq, tmp) {
    6906           0 :                         build_trid_from_log_page_entry(&trid, &entry_ctx->entry);
    6907           0 :                         if (spdk_nvme_transport_id_compare(&trid, &path_id->trid) != 0) {
    6908           0 :                                 continue;
    6909             :                         }
    6910             : 
    6911           0 :                         TAILQ_REMOVE(&d_ctx->nvm_entry_ctxs, entry_ctx, tailq);
    6912           0 :                         free(entry_ctx);
    6913           0 :                         DISCOVERY_INFOLOG(d_ctx, "Remove discovery entry: %s:%s:%s\n",
    6914             :                                           trid.subnqn, trid.traddr, trid.trsvcid);
    6915             : 
    6916             :                         /* Fail discovery ctrlr to force reattach attempt */
    6917           0 :                         spdk_nvme_ctrlr_fail(d_ctx->ctrlr);
    6918             :                 }
    6919             :         }
    6920           2 : }
    6921             : 
    6922             : static void
    6923           0 : discovery_remove_controllers(struct discovery_ctx *ctx)
    6924             : {
    6925           0 :         struct spdk_nvmf_discovery_log_page *log_page = ctx->log_page;
    6926             :         struct discovery_entry_ctx *entry_ctx, *tmp;
    6927             :         struct spdk_nvmf_discovery_log_page_entry *new_entry, *old_entry;
    6928           0 :         struct spdk_nvme_transport_id old_trid = {};
    6929             :         uint64_t numrec, i;
    6930             :         bool found;
    6931             : 
    6932           0 :         numrec = from_le64(&log_page->numrec);
    6933           0 :         TAILQ_FOREACH_SAFE(entry_ctx, &ctx->nvm_entry_ctxs, tailq, tmp) {
    6934           0 :                 found = false;
    6935           0 :                 old_entry = &entry_ctx->entry;
    6936           0 :                 build_trid_from_log_page_entry(&old_trid, old_entry);
    6937           0 :                 for (i = 0; i < numrec; i++) {
    6938           0 :                         new_entry = &log_page->entries[i];
    6939           0 :                         if (!memcmp(old_entry, new_entry, sizeof(*old_entry))) {
    6940           0 :                                 DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s found again\n",
    6941             :                                                   old_trid.subnqn, old_trid.traddr, old_trid.trsvcid);
    6942           0 :                                 found = true;
    6943           0 :                                 break;
    6944             :                         }
    6945             :                 }
    6946           0 :                 if (!found) {
    6947           0 :                         struct nvme_path_id path = {};
    6948             : 
    6949           0 :                         DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s not found\n",
    6950             :                                           old_trid.subnqn, old_trid.traddr, old_trid.trsvcid);
    6951             : 
    6952           0 :                         path.trid = entry_ctx->trid;
    6953           0 :                         bdev_nvme_delete(entry_ctx->name, &path, NULL, NULL);
    6954           0 :                         TAILQ_REMOVE(&ctx->nvm_entry_ctxs, entry_ctx, tailq);
    6955           0 :                         free(entry_ctx);
    6956             :                 }
    6957             :         }
    6958           0 :         free(log_page);
    6959           0 :         ctx->log_page = NULL;
    6960           0 :         discovery_complete(ctx);
    6961           0 : }
    6962             : 
    6963             : static void
    6964           0 : complete_discovery_start(struct discovery_ctx *ctx, int status)
    6965             : {
    6966           0 :         ctx->timeout_ticks = 0;
    6967           0 :         ctx->rc = status;
    6968           0 :         if (ctx->start_cb_fn) {
    6969           0 :                 ctx->start_cb_fn(ctx->cb_ctx, status);
    6970           0 :                 ctx->start_cb_fn = NULL;
    6971           0 :                 ctx->cb_ctx = NULL;
    6972             :         }
    6973           0 : }
    6974             : 
    6975             : static void
    6976           0 : discovery_attach_controller_done(void *cb_ctx, size_t bdev_count, int rc)
    6977             : {
    6978           0 :         struct discovery_entry_ctx *entry_ctx = cb_ctx;
    6979           0 :         struct discovery_ctx *ctx = entry_ctx->ctx;
    6980             : 
    6981           0 :         DISCOVERY_INFOLOG(ctx, "attach %s done\n", entry_ctx->name);
    6982           0 :         ctx->attach_in_progress--;
    6983           0 :         if (ctx->attach_in_progress == 0) {
    6984           0 :                 complete_discovery_start(ctx, ctx->rc);
    6985           0 :                 if (ctx->initializing && ctx->rc != 0) {
    6986           0 :                         DISCOVERY_ERRLOG(ctx, "stopping discovery due to errors: %d\n", ctx->rc);
    6987           0 :                         stop_discovery(ctx, NULL, ctx->cb_ctx);
    6988             :                 } else {
    6989           0 :                         discovery_remove_controllers(ctx);
    6990             :                 }
    6991             :         }
    6992           0 : }
    6993             : 
    6994             : static struct discovery_entry_ctx *
    6995           0 : create_discovery_entry_ctx(struct discovery_ctx *ctx, struct spdk_nvme_transport_id *trid)
    6996             : {
    6997             :         struct discovery_entry_ctx *new_ctx;
    6998             : 
    6999           0 :         new_ctx = calloc(1, sizeof(*new_ctx));
    7000           0 :         if (new_ctx == NULL) {
    7001           0 :                 DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
    7002           0 :                 return NULL;
    7003             :         }
    7004             : 
    7005           0 :         new_ctx->ctx = ctx;
    7006           0 :         memcpy(&new_ctx->trid, trid, sizeof(*trid));
    7007           0 :         spdk_nvme_ctrlr_get_default_ctrlr_opts(&new_ctx->drv_opts, sizeof(new_ctx->drv_opts));
    7008           0 :         snprintf(new_ctx->drv_opts.hostnqn, sizeof(new_ctx->drv_opts.hostnqn), "%s", ctx->hostnqn);
    7009           0 :         return new_ctx;
    7010             : }
    7011             : 
    7012             : static void
    7013           0 : discovery_log_page_cb(void *cb_arg, int rc, const struct spdk_nvme_cpl *cpl,
    7014             :                       struct spdk_nvmf_discovery_log_page *log_page)
    7015             : {
    7016           0 :         struct discovery_ctx *ctx = cb_arg;
    7017             :         struct discovery_entry_ctx *entry_ctx, *tmp;
    7018             :         struct spdk_nvmf_discovery_log_page_entry *new_entry, *old_entry;
    7019             :         uint64_t numrec, i;
    7020             :         bool found;
    7021             : 
    7022           0 :         if (rc || spdk_nvme_cpl_is_error(cpl)) {
    7023           0 :                 DISCOVERY_ERRLOG(ctx, "could not get discovery log page\n");
    7024           0 :                 return;
    7025             :         }
    7026             : 
    7027           0 :         ctx->log_page = log_page;
    7028           0 :         assert(ctx->attach_in_progress == 0);
    7029           0 :         numrec = from_le64(&log_page->numrec);
    7030           0 :         TAILQ_FOREACH_SAFE(entry_ctx, &ctx->discovery_entry_ctxs, tailq, tmp) {
    7031           0 :                 TAILQ_REMOVE(&ctx->discovery_entry_ctxs, entry_ctx, tailq);
    7032           0 :                 free(entry_ctx);
    7033             :         }
    7034           0 :         for (i = 0; i < numrec; i++) {
    7035           0 :                 found = false;
    7036           0 :                 new_entry = &log_page->entries[i];
    7037           0 :                 if (new_entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY_CURRENT ||
    7038           0 :                     new_entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
    7039             :                         struct discovery_entry_ctx *new_ctx;
    7040           0 :                         struct spdk_nvme_transport_id trid = {};
    7041             : 
    7042           0 :                         build_trid_from_log_page_entry(&trid, new_entry);
    7043           0 :                         new_ctx = create_discovery_entry_ctx(ctx, &trid);
    7044           0 :                         if (new_ctx == NULL) {
    7045           0 :                                 DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
    7046           0 :                                 break;
    7047             :                         }
    7048             : 
    7049           0 :                         TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, new_ctx, tailq);
    7050           0 :                         continue;
    7051             :                 }
    7052           0 :                 TAILQ_FOREACH(entry_ctx, &ctx->nvm_entry_ctxs, tailq) {
    7053           0 :                         old_entry = &entry_ctx->entry;
    7054           0 :                         if (!memcmp(new_entry, old_entry, sizeof(*new_entry))) {
    7055           0 :                                 found = true;
    7056           0 :                                 break;
    7057             :                         }
    7058             :                 }
    7059           0 :                 if (!found) {
    7060           0 :                         struct discovery_entry_ctx *subnqn_ctx = NULL, *new_ctx;
    7061             :                         struct discovery_ctx *d_ctx;
    7062             : 
    7063           0 :                         TAILQ_FOREACH(d_ctx, &g_discovery_ctxs, tailq) {
    7064           0 :                                 TAILQ_FOREACH(subnqn_ctx, &d_ctx->nvm_entry_ctxs, tailq) {
    7065           0 :                                         if (!memcmp(subnqn_ctx->entry.subnqn, new_entry->subnqn,
    7066             :                                                     sizeof(new_entry->subnqn))) {
    7067           0 :                                                 break;
    7068             :                                         }
    7069             :                                 }
    7070           0 :                                 if (subnqn_ctx) {
    7071           0 :                                         break;
    7072             :                                 }
    7073             :                         }
    7074             : 
    7075           0 :                         new_ctx = calloc(1, sizeof(*new_ctx));
    7076           0 :                         if (new_ctx == NULL) {
    7077           0 :                                 DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
    7078           0 :                                 break;
    7079             :                         }
    7080             : 
    7081           0 :                         new_ctx->ctx = ctx;
    7082           0 :                         memcpy(&new_ctx->entry, new_entry, sizeof(*new_entry));
    7083           0 :                         build_trid_from_log_page_entry(&new_ctx->trid, new_entry);
    7084           0 :                         if (subnqn_ctx) {
    7085           0 :                                 snprintf(new_ctx->name, sizeof(new_ctx->name), "%s", subnqn_ctx->name);
    7086           0 :                                 DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s new path for %s\n",
    7087             :                                                   new_ctx->trid.subnqn, new_ctx->trid.traddr, new_ctx->trid.trsvcid,
    7088             :                                                   new_ctx->name);
    7089             :                         } else {
    7090           0 :                                 snprintf(new_ctx->name, sizeof(new_ctx->name), "%s%d", ctx->name, ctx->index++);
    7091           0 :                                 DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s new subsystem %s\n",
    7092             :                                                   new_ctx->trid.subnqn, new_ctx->trid.traddr, new_ctx->trid.trsvcid,
    7093             :                                                   new_ctx->name);
    7094             :                         }
    7095           0 :                         spdk_nvme_ctrlr_get_default_ctrlr_opts(&new_ctx->drv_opts, sizeof(new_ctx->drv_opts));
    7096           0 :                         snprintf(new_ctx->drv_opts.hostnqn, sizeof(new_ctx->drv_opts.hostnqn), "%s", ctx->hostnqn);
    7097           0 :                         rc = spdk_bdev_nvme_create(&new_ctx->trid, new_ctx->name, NULL, 0,
    7098             :                                                    discovery_attach_controller_done, new_ctx,
    7099             :                                                    &new_ctx->drv_opts, &ctx->bdev_opts, true);
    7100           0 :                         if (rc == 0) {
    7101           0 :                                 TAILQ_INSERT_TAIL(&ctx->nvm_entry_ctxs, new_ctx, tailq);
    7102           0 :                                 ctx->attach_in_progress++;
    7103             :                         } else {
    7104           0 :                                 DISCOVERY_ERRLOG(ctx, "spdk_bdev_nvme_create failed (%s)\n", spdk_strerror(-rc));
    7105             :                         }
    7106             :                 }
    7107             :         }
    7108             : 
    7109           0 :         if (ctx->attach_in_progress == 0) {
    7110           0 :                 discovery_remove_controllers(ctx);
    7111             :         }
    7112             : }
    7113             : 
    7114             : static void
    7115           0 : get_discovery_log_page(struct discovery_ctx *ctx)
    7116             : {
    7117             :         int rc;
    7118             : 
    7119           0 :         assert(ctx->in_progress == false);
    7120           0 :         ctx->in_progress = true;
    7121           0 :         rc = spdk_nvme_ctrlr_get_discovery_log_page(ctx->ctrlr, discovery_log_page_cb, ctx);
    7122           0 :         if (rc != 0) {
    7123           0 :                 DISCOVERY_ERRLOG(ctx, "could not get discovery log page\n");
    7124             :         }
    7125           0 :         DISCOVERY_INFOLOG(ctx, "sent discovery log page command\n");
    7126           0 : }
    7127             : 
    7128             : static void
    7129           0 : discovery_aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
    7130             : {
    7131           0 :         struct discovery_ctx *ctx = arg;
    7132           0 :         uint32_t log_page_id = (cpl->cdw0 & 0xFF0000) >> 16;
    7133             : 
    7134           0 :         if (spdk_nvme_cpl_is_error(cpl)) {
    7135           0 :                 DISCOVERY_ERRLOG(ctx, "aer failed\n");
    7136           0 :                 return;
    7137             :         }
    7138             : 
    7139           0 :         if (log_page_id != SPDK_NVME_LOG_DISCOVERY) {
    7140           0 :                 DISCOVERY_ERRLOG(ctx, "unexpected log page 0x%x\n", log_page_id);
    7141           0 :                 return;
    7142             :         }
    7143             : 
    7144           0 :         DISCOVERY_INFOLOG(ctx, "got aer\n");
    7145           0 :         if (ctx->in_progress) {
    7146           0 :                 ctx->pending = true;
    7147           0 :                 return;
    7148             :         }
    7149             : 
    7150           0 :         get_discovery_log_page(ctx);
    7151             : }
    7152             : 
    7153             : static void
    7154           0 : discovery_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    7155             :                     struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
    7156             : {
    7157           0 :         struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
    7158             :         struct discovery_ctx *ctx;
    7159             : 
    7160           0 :         ctx = SPDK_CONTAINEROF(user_opts, struct discovery_ctx, drv_opts);
    7161             : 
    7162           0 :         DISCOVERY_INFOLOG(ctx, "discovery ctrlr attached\n");
    7163           0 :         ctx->probe_ctx = NULL;
    7164           0 :         ctx->ctrlr = ctrlr;
    7165             : 
    7166           0 :         if (ctx->rc != 0) {
    7167           0 :                 DISCOVERY_ERRLOG(ctx, "encountered error while attaching discovery ctrlr: %d\n",
    7168             :                                  ctx->rc);
    7169           0 :                 return;
    7170             :         }
    7171             : 
    7172           0 :         spdk_nvme_ctrlr_register_aer_callback(ctx->ctrlr, discovery_aer_cb, ctx);
    7173             : }
    7174             : 
    7175             : static int
    7176           0 : discovery_poller(void *arg)
    7177             : {
    7178           0 :         struct discovery_ctx *ctx = arg;
    7179             :         struct spdk_nvme_transport_id *trid;
    7180             :         int rc;
    7181             : 
    7182           0 :         if (ctx->detach_ctx) {
    7183           0 :                 rc = spdk_nvme_detach_poll_async(ctx->detach_ctx);
    7184           0 :                 if (rc != -EAGAIN) {
    7185           0 :                         ctx->detach_ctx = NULL;
    7186           0 :                         ctx->ctrlr = NULL;
    7187             :                 }
    7188           0 :         } else if (ctx->stop) {
    7189           0 :                 if (ctx->ctrlr != NULL) {
    7190           0 :                         rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx);
    7191           0 :                         if (rc == 0) {
    7192           0 :                                 return SPDK_POLLER_BUSY;
    7193             :                         }
    7194           0 :                         DISCOVERY_ERRLOG(ctx, "could not detach discovery ctrlr\n");
    7195             :                 }
    7196           0 :                 spdk_poller_unregister(&ctx->poller);
    7197           0 :                 TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
    7198           0 :                 assert(ctx->start_cb_fn == NULL);
    7199           0 :                 if (ctx->stop_cb_fn != NULL) {
    7200           0 :                         ctx->stop_cb_fn(ctx->cb_ctx);
    7201             :                 }
    7202           0 :                 free_discovery_ctx(ctx);
    7203           0 :         } else if (ctx->probe_ctx == NULL && ctx->ctrlr == NULL) {
    7204           0 :                 if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
    7205           0 :                         DISCOVERY_ERRLOG(ctx, "timed out while attaching discovery ctrlr\n");
    7206           0 :                         assert(ctx->initializing);
    7207           0 :                         spdk_poller_unregister(&ctx->poller);
    7208           0 :                         TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
    7209           0 :                         complete_discovery_start(ctx, -ETIMEDOUT);
    7210           0 :                         stop_discovery(ctx, NULL, NULL);
    7211           0 :                         free_discovery_ctx(ctx);
    7212           0 :                         return SPDK_POLLER_BUSY;
    7213             :                 }
    7214             : 
    7215           0 :                 assert(ctx->entry_ctx_in_use == NULL);
    7216           0 :                 ctx->entry_ctx_in_use = TAILQ_FIRST(&ctx->discovery_entry_ctxs);
    7217           0 :                 TAILQ_REMOVE(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
    7218           0 :                 trid = &ctx->entry_ctx_in_use->trid;
    7219           0 :                 ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->drv_opts, discovery_attach_cb);
    7220           0 :                 if (ctx->probe_ctx) {
    7221           0 :                         spdk_poller_unregister(&ctx->poller);
    7222           0 :                         ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000);
    7223             :                 } else {
    7224           0 :                         DISCOVERY_ERRLOG(ctx, "could not start discovery connect\n");
    7225           0 :                         TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
    7226           0 :                         ctx->entry_ctx_in_use = NULL;
    7227             :                 }
    7228           0 :         } else if (ctx->probe_ctx) {
    7229           0 :                 if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
    7230           0 :                         DISCOVERY_ERRLOG(ctx, "timed out while attaching discovery ctrlr\n");
    7231           0 :                         complete_discovery_start(ctx, -ETIMEDOUT);
    7232           0 :                         return SPDK_POLLER_BUSY;
    7233             :                 }
    7234             : 
    7235           0 :                 rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
    7236           0 :                 if (rc != -EAGAIN) {
    7237           0 :                         if (ctx->rc != 0) {
    7238           0 :                                 assert(ctx->initializing);
    7239           0 :                                 stop_discovery(ctx, NULL, ctx->cb_ctx);
    7240             :                         } else {
    7241           0 :                                 assert(rc == 0);
    7242           0 :                                 DISCOVERY_INFOLOG(ctx, "discovery ctrlr connected\n");
    7243           0 :                                 ctx->rc = rc;
    7244           0 :                                 get_discovery_log_page(ctx);
    7245             :                         }
    7246             :                 }
    7247             :         } else {
    7248           0 :                 if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
    7249           0 :                         DISCOVERY_ERRLOG(ctx, "timed out while attaching NVM ctrlrs\n");
    7250           0 :                         complete_discovery_start(ctx, -ETIMEDOUT);
    7251             :                         /* We need to wait until all NVM ctrlrs are attached before we stop the
    7252             :                          * discovery service to make sure we don't detach a ctrlr that is still
    7253             :                          * being attached.
    7254             :                          */
    7255           0 :                         if (ctx->attach_in_progress == 0) {
    7256           0 :                                 stop_discovery(ctx, NULL, ctx->cb_ctx);
    7257           0 :                                 return SPDK_POLLER_BUSY;
    7258             :                         }
    7259             :                 }
    7260             : 
    7261           0 :                 rc = spdk_nvme_ctrlr_process_admin_completions(ctx->ctrlr);
    7262           0 :                 if (rc < 0) {
    7263           0 :                         spdk_poller_unregister(&ctx->poller);
    7264           0 :                         ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000 * 1000);
    7265           0 :                         TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
    7266           0 :                         ctx->entry_ctx_in_use = NULL;
    7267             : 
    7268           0 :                         rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx);
    7269           0 :                         if (rc != 0) {
    7270           0 :                                 DISCOVERY_ERRLOG(ctx, "could not detach discovery ctrlr\n");
    7271           0 :                                 ctx->ctrlr = NULL;
    7272             :                         }
    7273             :                 }
    7274             :         }
    7275             : 
    7276           0 :         return SPDK_POLLER_BUSY;
    7277             : }
    7278             : 
    7279             : static void
    7280           0 : start_discovery_poller(void *arg)
    7281             : {
    7282           0 :         struct discovery_ctx *ctx = arg;
    7283             : 
    7284           0 :         TAILQ_INSERT_TAIL(&g_discovery_ctxs, ctx, tailq);
    7285           0 :         ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000 * 1000);
    7286           0 : }
    7287             : 
    7288             : int
    7289           0 : bdev_nvme_start_discovery(struct spdk_nvme_transport_id *trid,
    7290             :                           const char *base_name,
    7291             :                           struct spdk_nvme_ctrlr_opts *drv_opts,
    7292             :                           struct spdk_bdev_nvme_ctrlr_opts *bdev_opts,
    7293             :                           uint64_t attach_timeout,
    7294             :                           bool from_mdns,
    7295             :                           spdk_bdev_nvme_start_discovery_fn cb_fn, void *cb_ctx)
    7296             : {
    7297             :         struct discovery_ctx *ctx;
    7298             :         struct discovery_entry_ctx *discovery_entry_ctx;
    7299             : 
    7300           0 :         snprintf(trid->subnqn, sizeof(trid->subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN);
    7301           0 :         TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    7302           0 :                 if (strcmp(ctx->name, base_name) == 0) {
    7303           0 :                         return -EEXIST;
    7304             :                 }
    7305             : 
    7306           0 :                 if (ctx->entry_ctx_in_use != NULL) {
    7307           0 :                         if (!spdk_nvme_transport_id_compare(trid, &ctx->entry_ctx_in_use->trid)) {
    7308           0 :                                 return -EEXIST;
    7309             :                         }
    7310             :                 }
    7311             : 
    7312           0 :                 TAILQ_FOREACH(discovery_entry_ctx, &ctx->discovery_entry_ctxs, tailq) {
    7313           0 :                         if (!spdk_nvme_transport_id_compare(trid, &discovery_entry_ctx->trid)) {
    7314           0 :                                 return -EEXIST;
    7315             :                         }
    7316             :                 }
    7317             :         }
    7318             : 
    7319           0 :         ctx = calloc(1, sizeof(*ctx));
    7320           0 :         if (ctx == NULL) {
    7321           0 :                 return -ENOMEM;
    7322             :         }
    7323             : 
    7324           0 :         ctx->name = strdup(base_name);
    7325           0 :         if (ctx->name == NULL) {
    7326           0 :                 free_discovery_ctx(ctx);
    7327           0 :                 return -ENOMEM;
    7328             :         }
    7329           0 :         memcpy(&ctx->drv_opts, drv_opts, sizeof(*drv_opts));
    7330           0 :         memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
    7331           0 :         ctx->from_mdns_discovery_service = from_mdns;
    7332           0 :         ctx->bdev_opts.from_discovery_service = true;
    7333           0 :         ctx->calling_thread = spdk_get_thread();
    7334           0 :         ctx->start_cb_fn = cb_fn;
    7335           0 :         ctx->cb_ctx = cb_ctx;
    7336           0 :         ctx->initializing = true;
    7337           0 :         if (ctx->start_cb_fn) {
    7338             :                 /* We can use this when dumping json to denote if this RPC parameter
    7339             :                  * was specified or not.
    7340             :                  */
    7341           0 :                 ctx->wait_for_attach = true;
    7342             :         }
    7343           0 :         if (attach_timeout != 0) {
    7344           0 :                 ctx->timeout_ticks = spdk_get_ticks() + attach_timeout *
    7345           0 :                                      spdk_get_ticks_hz() / 1000ull;
    7346             :         }
    7347           0 :         TAILQ_INIT(&ctx->nvm_entry_ctxs);
    7348           0 :         TAILQ_INIT(&ctx->discovery_entry_ctxs);
    7349           0 :         memcpy(&ctx->trid, trid, sizeof(*trid));
    7350             :         /* Even if user did not specify hostnqn, we can still strdup("\0"); */
    7351           0 :         ctx->hostnqn = strdup(ctx->drv_opts.hostnqn);
    7352           0 :         if (ctx->hostnqn == NULL) {
    7353           0 :                 free_discovery_ctx(ctx);
    7354           0 :                 return -ENOMEM;
    7355             :         }
    7356           0 :         discovery_entry_ctx = create_discovery_entry_ctx(ctx, trid);
    7357           0 :         if (discovery_entry_ctx == NULL) {
    7358           0 :                 DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
    7359           0 :                 free_discovery_ctx(ctx);
    7360           0 :                 return -ENOMEM;
    7361             :         }
    7362             : 
    7363           0 :         TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, discovery_entry_ctx, tailq);
    7364           0 :         spdk_thread_send_msg(g_bdev_nvme_init_thread, start_discovery_poller, ctx);
    7365           0 :         return 0;
    7366             : }
    7367             : 
    7368             : int
    7369           0 : bdev_nvme_stop_discovery(const char *name, spdk_bdev_nvme_stop_discovery_fn cb_fn, void *cb_ctx)
    7370             : {
    7371             :         struct discovery_ctx *ctx;
    7372             : 
    7373           0 :         TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    7374           0 :                 if (strcmp(name, ctx->name) == 0) {
    7375           0 :                         if (ctx->stop) {
    7376           0 :                                 return -EALREADY;
    7377             :                         }
    7378             :                         /* If we're still starting the discovery service and ->rc is non-zero, we're
    7379             :                          * going to stop it as soon as we can
    7380             :                          */
    7381           0 :                         if (ctx->initializing && ctx->rc != 0) {
    7382           0 :                                 return -EALREADY;
    7383             :                         }
    7384           0 :                         stop_discovery(ctx, cb_fn, cb_ctx);
    7385           0 :                         return 0;
    7386             :                 }
    7387             :         }
    7388             : 
    7389           0 :         return -ENOENT;
    7390             : }
    7391             : 
    7392             : static int
    7393           1 : bdev_nvme_library_init(void)
    7394             : {
    7395           1 :         g_bdev_nvme_init_thread = spdk_get_thread();
    7396             : 
    7397           1 :         spdk_io_device_register(&g_nvme_bdev_ctrlrs, bdev_nvme_create_poll_group_cb,
    7398             :                                 bdev_nvme_destroy_poll_group_cb,
    7399             :                                 sizeof(struct nvme_poll_group),  "nvme_poll_groups");
    7400             : 
    7401           1 :         return 0;
    7402             : }
    7403             : 
    7404             : static void
    7405           1 : bdev_nvme_fini_destruct_ctrlrs(void)
    7406             : {
    7407             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
    7408             :         struct nvme_ctrlr *nvme_ctrlr;
    7409             : 
    7410           1 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    7411           1 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
    7412           0 :                 TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
    7413           0 :                         pthread_mutex_lock(&nvme_ctrlr->mutex);
    7414           0 :                         if (nvme_ctrlr->destruct) {
    7415             :                                 /* This controller's destruction was already started
    7416             :                                  * before the application started shutting down
    7417             :                                  */
    7418           0 :                                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    7419           0 :                                 continue;
    7420             :                         }
    7421           0 :                         nvme_ctrlr->destruct = true;
    7422           0 :                         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    7423             : 
    7424           0 :                         spdk_thread_send_msg(nvme_ctrlr->thread, _nvme_ctrlr_destruct,
    7425             :                                              nvme_ctrlr);
    7426             :                 }
    7427             :         }
    7428             : 
    7429           1 :         g_bdev_nvme_module_finish = true;
    7430           1 :         if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
    7431           1 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    7432           1 :                 spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
    7433           1 :                 spdk_bdev_module_fini_done();
    7434           1 :                 return;
    7435             :         }
    7436             : 
    7437           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    7438             : }
    7439             : 
    7440             : static void
    7441           0 : check_discovery_fini(void *arg)
    7442             : {
    7443           0 :         if (TAILQ_EMPTY(&g_discovery_ctxs)) {
    7444           0 :                 bdev_nvme_fini_destruct_ctrlrs();
    7445             :         }
    7446           0 : }
    7447             : 
    7448             : static void
    7449           1 : bdev_nvme_library_fini(void)
    7450             : {
    7451             :         struct nvme_probe_skip_entry *entry, *entry_tmp;
    7452             :         struct discovery_ctx *ctx;
    7453             : 
    7454           1 :         spdk_poller_unregister(&g_hotplug_poller);
    7455           1 :         free(g_hotplug_probe_ctx);
    7456           1 :         g_hotplug_probe_ctx = NULL;
    7457             : 
    7458           1 :         TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, entry_tmp) {
    7459           0 :                 TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
    7460           0 :                 free(entry);
    7461             :         }
    7462             : 
    7463           1 :         assert(spdk_get_thread() == g_bdev_nvme_init_thread);
    7464           1 :         if (TAILQ_EMPTY(&g_discovery_ctxs)) {
    7465           1 :                 bdev_nvme_fini_destruct_ctrlrs();
    7466             :         } else {
    7467           0 :                 TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    7468           0 :                         stop_discovery(ctx, check_discovery_fini, NULL);
    7469             :                 }
    7470             :         }
    7471           1 : }
    7472             : 
    7473             : static void
    7474           0 : bdev_nvme_verify_pi_error(struct nvme_bdev_io *bio)
    7475             : {
    7476           0 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7477           0 :         struct spdk_bdev *bdev = bdev_io->bdev;
    7478           0 :         struct spdk_dif_ctx dif_ctx;
    7479           0 :         struct spdk_dif_error err_blk = {};
    7480             :         int rc;
    7481           0 :         struct spdk_dif_ctx_init_ext_opts dif_opts;
    7482             : 
    7483           0 :         dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
    7484           0 :         dif_opts.dif_pi_format = bdev->dif_pi_format;
    7485           0 :         rc = spdk_dif_ctx_init(&dif_ctx,
    7486           0 :                                bdev->blocklen, bdev->md_len, bdev->md_interleave,
    7487           0 :                                bdev->dif_is_head_of_md, bdev->dif_type,
    7488             :                                bdev_io->u.bdev.dif_check_flags,
    7489           0 :                                bdev_io->u.bdev.offset_blocks, 0, 0, 0, 0, &dif_opts);
    7490           0 :         if (rc != 0) {
    7491           0 :                 SPDK_ERRLOG("Initialization of DIF context failed\n");
    7492           0 :                 return;
    7493             :         }
    7494             : 
    7495           0 :         if (bdev->md_interleave) {
    7496           0 :                 rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
    7497           0 :                                      bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
    7498             :         } else {
    7499           0 :                 struct iovec md_iov = {
    7500           0 :                         .iov_base       = bdev_io->u.bdev.md_buf,
    7501           0 :                         .iov_len        = bdev_io->u.bdev.num_blocks * bdev->md_len,
    7502             :                 };
    7503             : 
    7504           0 :                 rc = spdk_dix_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
    7505           0 :                                      &md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
    7506             :         }
    7507             : 
    7508           0 :         if (rc != 0) {
    7509           0 :                 SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
    7510             :                             err_blk.err_type, err_blk.err_offset);
    7511             :         } else {
    7512           0 :                 SPDK_ERRLOG("Hardware reported PI error but SPDK could not find any.\n");
    7513             :         }
    7514             : }
    7515             : 
    7516             : static void
    7517           0 : bdev_nvme_no_pi_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7518             : {
    7519           0 :         struct nvme_bdev_io *bio = ref;
    7520             : 
    7521           0 :         if (spdk_nvme_cpl_is_success(cpl)) {
    7522             :                 /* Run PI verification for read data buffer. */
    7523           0 :                 bdev_nvme_verify_pi_error(bio);
    7524             :         }
    7525             : 
    7526             :         /* Return original completion status */
    7527           0 :         bdev_nvme_io_complete_nvme_status(bio, &bio->cpl);
    7528           0 : }
    7529             : 
    7530             : static void
    7531           3 : bdev_nvme_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7532             : {
    7533           3 :         struct nvme_bdev_io *bio = ref;
    7534           3 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7535             :         int ret;
    7536             : 
    7537           3 :         if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
    7538           0 :                 SPDK_ERRLOG("readv completed with PI error (sct=%d, sc=%d)\n",
    7539             :                             cpl->status.sct, cpl->status.sc);
    7540             : 
    7541             :                 /* Save completion status to use after verifying PI error. */
    7542           0 :                 bio->cpl = *cpl;
    7543             : 
    7544           0 :                 if (spdk_likely(nvme_io_path_is_available(bio->io_path))) {
    7545             :                         /* Read without PI checking to verify PI error. */
    7546           0 :                         ret = bdev_nvme_no_pi_readv(bio,
    7547             :                                                     bdev_io->u.bdev.iovs,
    7548             :                                                     bdev_io->u.bdev.iovcnt,
    7549             :                                                     bdev_io->u.bdev.md_buf,
    7550             :                                                     bdev_io->u.bdev.num_blocks,
    7551             :                                                     bdev_io->u.bdev.offset_blocks);
    7552           0 :                         if (ret == 0) {
    7553           0 :                                 return;
    7554             :                         }
    7555             :                 }
    7556             :         }
    7557             : 
    7558           3 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7559             : }
    7560             : 
    7561             : static void
    7562          25 : bdev_nvme_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7563             : {
    7564          25 :         struct nvme_bdev_io *bio = ref;
    7565             : 
    7566          25 :         if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
    7567           0 :                 SPDK_ERRLOG("writev completed with PI error (sct=%d, sc=%d)\n",
    7568             :                             cpl->status.sct, cpl->status.sc);
    7569             :                 /* Run PI verification for write data buffer if PI error is detected. */
    7570           0 :                 bdev_nvme_verify_pi_error(bio);
    7571             :         }
    7572             : 
    7573          25 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7574          25 : }
    7575             : 
    7576             : static void
    7577           0 : bdev_nvme_zone_appendv_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7578             : {
    7579           0 :         struct nvme_bdev_io *bio = ref;
    7580           0 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7581             : 
    7582             :         /* spdk_bdev_io_get_append_location() requires that the ALBA is stored in offset_blocks.
    7583             :          * Additionally, offset_blocks has to be set before calling bdev_nvme_verify_pi_error().
    7584             :          */
    7585           0 :         bdev_io->u.bdev.offset_blocks = *(uint64_t *)&cpl->cdw0;
    7586             : 
    7587           0 :         if (spdk_nvme_cpl_is_pi_error(cpl)) {
    7588           0 :                 SPDK_ERRLOG("zone append completed with PI error (sct=%d, sc=%d)\n",
    7589             :                             cpl->status.sct, cpl->status.sc);
    7590             :                 /* Run PI verification for zone append data buffer if PI error is detected. */
    7591           0 :                 bdev_nvme_verify_pi_error(bio);
    7592             :         }
    7593             : 
    7594           0 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7595           0 : }
    7596             : 
    7597             : static void
    7598           1 : bdev_nvme_comparev_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7599             : {
    7600           1 :         struct nvme_bdev_io *bio = ref;
    7601             : 
    7602           1 :         if (spdk_nvme_cpl_is_pi_error(cpl)) {
    7603           0 :                 SPDK_ERRLOG("comparev completed with PI error (sct=%d, sc=%d)\n",
    7604             :                             cpl->status.sct, cpl->status.sc);
    7605             :                 /* Run PI verification for compare data buffer if PI error is detected. */
    7606           0 :                 bdev_nvme_verify_pi_error(bio);
    7607             :         }
    7608             : 
    7609           1 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7610           1 : }
    7611             : 
    7612             : static void
    7613           4 : bdev_nvme_comparev_and_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7614             : {
    7615           4 :         struct nvme_bdev_io *bio = ref;
    7616             : 
    7617             :         /* Compare operation completion */
    7618           4 :         if (!bio->first_fused_completed) {
    7619             :                 /* Save compare result for write callback */
    7620           2 :                 bio->cpl = *cpl;
    7621           2 :                 bio->first_fused_completed = true;
    7622           2 :                 return;
    7623             :         }
    7624             : 
    7625             :         /* Write operation completion */
    7626           2 :         if (spdk_nvme_cpl_is_error(&bio->cpl)) {
    7627             :                 /* If bio->cpl is already an error, it means the compare operation failed.  In that case,
    7628             :                  * complete the IO with the compare operation's status.
    7629             :                  */
    7630           1 :                 if (!spdk_nvme_cpl_is_error(cpl)) {
    7631           1 :                         SPDK_ERRLOG("Unexpected write success after compare failure.\n");
    7632             :                 }
    7633             : 
    7634           1 :                 bdev_nvme_io_complete_nvme_status(bio, &bio->cpl);
    7635             :         } else {
    7636           1 :                 bdev_nvme_io_complete_nvme_status(bio, cpl);
    7637             :         }
    7638             : }
    7639             : 
    7640             : static void
    7641           1 : bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7642             : {
    7643           1 :         struct nvme_bdev_io *bio = ref;
    7644             : 
    7645           1 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7646           1 : }
    7647             : 
    7648             : static int
    7649           0 : fill_zone_from_report(struct spdk_bdev_zone_info *info, struct spdk_nvme_zns_zone_desc *desc)
    7650             : {
    7651           0 :         switch (desc->zt) {
    7652           0 :         case SPDK_NVME_ZONE_TYPE_SEQWR:
    7653           0 :                 info->type = SPDK_BDEV_ZONE_TYPE_SEQWR;
    7654           0 :                 break;
    7655           0 :         default:
    7656           0 :                 SPDK_ERRLOG("Invalid zone type: %#x in zone report\n", desc->zt);
    7657           0 :                 return -EIO;
    7658             :         }
    7659             : 
    7660           0 :         switch (desc->zs) {
    7661           0 :         case SPDK_NVME_ZONE_STATE_EMPTY:
    7662           0 :                 info->state = SPDK_BDEV_ZONE_STATE_EMPTY;
    7663           0 :                 break;
    7664           0 :         case SPDK_NVME_ZONE_STATE_IOPEN:
    7665           0 :                 info->state = SPDK_BDEV_ZONE_STATE_IMP_OPEN;
    7666           0 :                 break;
    7667           0 :         case SPDK_NVME_ZONE_STATE_EOPEN:
    7668           0 :                 info->state = SPDK_BDEV_ZONE_STATE_EXP_OPEN;
    7669           0 :                 break;
    7670           0 :         case SPDK_NVME_ZONE_STATE_CLOSED:
    7671           0 :                 info->state = SPDK_BDEV_ZONE_STATE_CLOSED;
    7672           0 :                 break;
    7673           0 :         case SPDK_NVME_ZONE_STATE_RONLY:
    7674           0 :                 info->state = SPDK_BDEV_ZONE_STATE_READ_ONLY;
    7675           0 :                 break;
    7676           0 :         case SPDK_NVME_ZONE_STATE_FULL:
    7677           0 :                 info->state = SPDK_BDEV_ZONE_STATE_FULL;
    7678           0 :                 break;
    7679           0 :         case SPDK_NVME_ZONE_STATE_OFFLINE:
    7680           0 :                 info->state = SPDK_BDEV_ZONE_STATE_OFFLINE;
    7681           0 :                 break;
    7682           0 :         default:
    7683           0 :                 SPDK_ERRLOG("Invalid zone state: %#x in zone report\n", desc->zs);
    7684           0 :                 return -EIO;
    7685             :         }
    7686             : 
    7687           0 :         info->zone_id = desc->zslba;
    7688           0 :         info->write_pointer = desc->wp;
    7689           0 :         info->capacity = desc->zcap;
    7690             : 
    7691           0 :         return 0;
    7692             : }
    7693             : 
    7694             : static void
    7695           0 : bdev_nvme_get_zone_info_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7696             : {
    7697           0 :         struct nvme_bdev_io *bio = ref;
    7698           0 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7699           0 :         uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id;
    7700           0 :         uint32_t zones_to_copy = bdev_io->u.zone_mgmt.num_zones;
    7701           0 :         struct spdk_bdev_zone_info *info = bdev_io->u.zone_mgmt.buf;
    7702             :         uint64_t max_zones_per_buf, i;
    7703             :         uint32_t zone_report_bufsize;
    7704             :         struct spdk_nvme_ns *ns;
    7705             :         struct spdk_nvme_qpair *qpair;
    7706             :         int ret;
    7707             : 
    7708           0 :         if (spdk_nvme_cpl_is_error(cpl)) {
    7709           0 :                 goto out_complete_io_nvme_cpl;
    7710             :         }
    7711             : 
    7712           0 :         if (spdk_unlikely(!nvme_io_path_is_available(bio->io_path))) {
    7713           0 :                 ret = -ENXIO;
    7714           0 :                 goto out_complete_io_ret;
    7715             :         }
    7716             : 
    7717           0 :         ns = bio->io_path->nvme_ns->ns;
    7718           0 :         qpair = bio->io_path->qpair->qpair;
    7719             : 
    7720           0 :         zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns);
    7721           0 :         max_zones_per_buf = (zone_report_bufsize - sizeof(*bio->zone_report_buf)) /
    7722             :                             sizeof(bio->zone_report_buf->descs[0]);
    7723             : 
    7724           0 :         if (bio->zone_report_buf->nr_zones > max_zones_per_buf) {
    7725           0 :                 ret = -EINVAL;
    7726           0 :                 goto out_complete_io_ret;
    7727             :         }
    7728             : 
    7729           0 :         if (!bio->zone_report_buf->nr_zones) {
    7730           0 :                 ret = -EINVAL;
    7731           0 :                 goto out_complete_io_ret;
    7732             :         }
    7733             : 
    7734           0 :         for (i = 0; i < bio->zone_report_buf->nr_zones && bio->handled_zones < zones_to_copy; i++) {
    7735           0 :                 ret = fill_zone_from_report(&info[bio->handled_zones],
    7736           0 :                                             &bio->zone_report_buf->descs[i]);
    7737           0 :                 if (ret) {
    7738           0 :                         goto out_complete_io_ret;
    7739             :                 }
    7740           0 :                 bio->handled_zones++;
    7741             :         }
    7742             : 
    7743           0 :         if (bio->handled_zones < zones_to_copy) {
    7744           0 :                 uint64_t zone_size_lba = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
    7745           0 :                 uint64_t slba = zone_id + (zone_size_lba * bio->handled_zones);
    7746             : 
    7747           0 :                 memset(bio->zone_report_buf, 0, zone_report_bufsize);
    7748           0 :                 ret = spdk_nvme_zns_report_zones(ns, qpair,
    7749           0 :                                                  bio->zone_report_buf, zone_report_bufsize,
    7750             :                                                  slba, SPDK_NVME_ZRA_LIST_ALL, true,
    7751             :                                                  bdev_nvme_get_zone_info_done, bio);
    7752           0 :                 if (!ret) {
    7753           0 :                         return;
    7754             :                 } else {
    7755           0 :                         goto out_complete_io_ret;
    7756             :                 }
    7757             :         }
    7758             : 
    7759           0 : out_complete_io_nvme_cpl:
    7760           0 :         free(bio->zone_report_buf);
    7761           0 :         bio->zone_report_buf = NULL;
    7762           0 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7763           0 :         return;
    7764             : 
    7765           0 : out_complete_io_ret:
    7766           0 :         free(bio->zone_report_buf);
    7767           0 :         bio->zone_report_buf = NULL;
    7768           0 :         bdev_nvme_io_complete(bio, ret);
    7769             : }
    7770             : 
    7771             : static void
    7772           0 : bdev_nvme_zone_management_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7773             : {
    7774           0 :         struct nvme_bdev_io *bio = ref;
    7775             : 
    7776           0 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7777           0 : }
    7778             : 
    7779             : static void
    7780           4 : bdev_nvme_admin_passthru_complete_nvme_status(void *ctx)
    7781             : {
    7782           4 :         struct nvme_bdev_io *bio = ctx;
    7783           4 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7784           4 :         const struct spdk_nvme_cpl *cpl = &bio->cpl;
    7785             : 
    7786           4 :         assert(bdev_nvme_io_type_is_admin(bdev_io->type));
    7787             : 
    7788           4 :         __bdev_nvme_io_complete(bdev_io, 0, cpl);
    7789           4 : }
    7790             : 
    7791             : static void
    7792           3 : bdev_nvme_abort_complete(void *ctx)
    7793             : {
    7794           3 :         struct nvme_bdev_io *bio = ctx;
    7795           3 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7796             : 
    7797           3 :         if (spdk_nvme_cpl_is_abort_success(&bio->cpl)) {
    7798           3 :                 __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS, NULL);
    7799             :         } else {
    7800           0 :                 __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED, NULL);
    7801             :         }
    7802           3 : }
    7803             : 
    7804             : static void
    7805           3 : bdev_nvme_abort_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7806             : {
    7807           3 :         struct nvme_bdev_io *bio = ref;
    7808           3 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7809             : 
    7810           3 :         bio->cpl = *cpl;
    7811           3 :         spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), bdev_nvme_abort_complete, bio);
    7812           3 : }
    7813             : 
    7814             : static void
    7815           4 : bdev_nvme_admin_passthru_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7816             : {
    7817           4 :         struct nvme_bdev_io *bio = ref;
    7818           4 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7819             : 
    7820           4 :         bio->cpl = *cpl;
    7821           4 :         spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io),
    7822             :                              bdev_nvme_admin_passthru_complete_nvme_status, bio);
    7823           4 : }
    7824             : 
    7825             : static void
    7826           0 : bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset)
    7827             : {
    7828           0 :         struct nvme_bdev_io *bio = ref;
    7829             :         struct iovec *iov;
    7830             : 
    7831           0 :         bio->iov_offset = sgl_offset;
    7832           0 :         for (bio->iovpos = 0; bio->iovpos < bio->iovcnt; bio->iovpos++) {
    7833           0 :                 iov = &bio->iovs[bio->iovpos];
    7834           0 :                 if (bio->iov_offset < iov->iov_len) {
    7835           0 :                         break;
    7836             :                 }
    7837             : 
    7838           0 :                 bio->iov_offset -= iov->iov_len;
    7839             :         }
    7840           0 : }
    7841             : 
    7842             : static int
    7843           0 : bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length)
    7844             : {
    7845           0 :         struct nvme_bdev_io *bio = ref;
    7846             :         struct iovec *iov;
    7847             : 
    7848           0 :         assert(bio->iovpos < bio->iovcnt);
    7849             : 
    7850           0 :         iov = &bio->iovs[bio->iovpos];
    7851             : 
    7852           0 :         *address = iov->iov_base;
    7853           0 :         *length = iov->iov_len;
    7854             : 
    7855           0 :         if (bio->iov_offset) {
    7856           0 :                 assert(bio->iov_offset <= iov->iov_len);
    7857           0 :                 *address += bio->iov_offset;
    7858           0 :                 *length -= bio->iov_offset;
    7859             :         }
    7860             : 
    7861           0 :         bio->iov_offset += *length;
    7862           0 :         if (bio->iov_offset == iov->iov_len) {
    7863           0 :                 bio->iovpos++;
    7864           0 :                 bio->iov_offset = 0;
    7865             :         }
    7866             : 
    7867           0 :         return 0;
    7868             : }
    7869             : 
    7870             : static void
    7871           0 : bdev_nvme_queued_reset_fused_sgl(void *ref, uint32_t sgl_offset)
    7872             : {
    7873           0 :         struct nvme_bdev_io *bio = ref;
    7874             :         struct iovec *iov;
    7875             : 
    7876           0 :         bio->fused_iov_offset = sgl_offset;
    7877           0 :         for (bio->fused_iovpos = 0; bio->fused_iovpos < bio->fused_iovcnt; bio->fused_iovpos++) {
    7878           0 :                 iov = &bio->fused_iovs[bio->fused_iovpos];
    7879           0 :                 if (bio->fused_iov_offset < iov->iov_len) {
    7880           0 :                         break;
    7881             :                 }
    7882             : 
    7883           0 :                 bio->fused_iov_offset -= iov->iov_len;
    7884             :         }
    7885           0 : }
    7886             : 
    7887             : static int
    7888           0 : bdev_nvme_queued_next_fused_sge(void *ref, void **address, uint32_t *length)
    7889             : {
    7890           0 :         struct nvme_bdev_io *bio = ref;
    7891             :         struct iovec *iov;
    7892             : 
    7893           0 :         assert(bio->fused_iovpos < bio->fused_iovcnt);
    7894             : 
    7895           0 :         iov = &bio->fused_iovs[bio->fused_iovpos];
    7896             : 
    7897           0 :         *address = iov->iov_base;
    7898           0 :         *length = iov->iov_len;
    7899             : 
    7900           0 :         if (bio->fused_iov_offset) {
    7901           0 :                 assert(bio->fused_iov_offset <= iov->iov_len);
    7902           0 :                 *address += bio->fused_iov_offset;
    7903           0 :                 *length -= bio->fused_iov_offset;
    7904             :         }
    7905             : 
    7906           0 :         bio->fused_iov_offset += *length;
    7907           0 :         if (bio->fused_iov_offset == iov->iov_len) {
    7908           0 :                 bio->fused_iovpos++;
    7909           0 :                 bio->fused_iov_offset = 0;
    7910             :         }
    7911             : 
    7912           0 :         return 0;
    7913             : }
    7914             : 
    7915             : static int
    7916           0 : bdev_nvme_no_pi_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    7917             :                       void *md, uint64_t lba_count, uint64_t lba)
    7918             : {
    7919             :         int rc;
    7920             : 
    7921           0 :         SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 " without PI check\n",
    7922             :                       lba_count, lba);
    7923             : 
    7924           0 :         bio->iovs = iov;
    7925           0 :         bio->iovcnt = iovcnt;
    7926           0 :         bio->iovpos = 0;
    7927           0 :         bio->iov_offset = 0;
    7928             : 
    7929           0 :         rc = spdk_nvme_ns_cmd_readv_with_md(bio->io_path->nvme_ns->ns,
    7930           0 :                                             bio->io_path->qpair->qpair,
    7931             :                                             lba, lba_count,
    7932             :                                             bdev_nvme_no_pi_readv_done, bio, 0,
    7933             :                                             bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
    7934             :                                             md, 0, 0);
    7935             : 
    7936           0 :         if (rc != 0 && rc != -ENOMEM) {
    7937           0 :                 SPDK_ERRLOG("no_pi_readv failed: rc = %d\n", rc);
    7938             :         }
    7939           0 :         return rc;
    7940             : }
    7941             : 
    7942             : static int
    7943           3 : bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    7944             :                 void *md, uint64_t lba_count, uint64_t lba, uint32_t flags,
    7945             :                 struct spdk_memory_domain *domain, void *domain_ctx,
    7946             :                 struct spdk_accel_sequence *seq)
    7947             : {
    7948           3 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    7949           3 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    7950             :         int rc;
    7951             : 
    7952           3 :         SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 "\n",
    7953             :                       lba_count, lba);
    7954             : 
    7955           3 :         bio->iovs = iov;
    7956           3 :         bio->iovcnt = iovcnt;
    7957           3 :         bio->iovpos = 0;
    7958           3 :         bio->iov_offset = 0;
    7959             : 
    7960           3 :         if (domain != NULL || seq != NULL) {
    7961           1 :                 bio->ext_opts.size = SPDK_SIZEOF(&bio->ext_opts, accel_sequence);
    7962           1 :                 bio->ext_opts.memory_domain = domain;
    7963           1 :                 bio->ext_opts.memory_domain_ctx = domain_ctx;
    7964           1 :                 bio->ext_opts.io_flags = flags;
    7965           1 :                 bio->ext_opts.metadata = md;
    7966           1 :                 bio->ext_opts.accel_sequence = seq;
    7967             : 
    7968           1 :                 if (iovcnt == 1) {
    7969           1 :                         rc = spdk_nvme_ns_cmd_read_ext(ns, qpair, iov[0].iov_base, lba, lba_count, bdev_nvme_readv_done,
    7970             :                                                        bio, &bio->ext_opts);
    7971             :                 } else {
    7972           0 :                         rc = spdk_nvme_ns_cmd_readv_ext(ns, qpair, lba, lba_count,
    7973             :                                                         bdev_nvme_readv_done, bio,
    7974             :                                                         bdev_nvme_queued_reset_sgl,
    7975             :                                                         bdev_nvme_queued_next_sge,
    7976             :                                                         &bio->ext_opts);
    7977             :                 }
    7978           2 :         } else if (iovcnt == 1) {
    7979           2 :                 rc = spdk_nvme_ns_cmd_read_with_md(ns, qpair, iov[0].iov_base,
    7980             :                                                    md, lba, lba_count, bdev_nvme_readv_done,
    7981             :                                                    bio, flags, 0, 0);
    7982             :         } else {
    7983           0 :                 rc = spdk_nvme_ns_cmd_readv_with_md(ns, qpair, lba, lba_count,
    7984             :                                                     bdev_nvme_readv_done, bio, flags,
    7985             :                                                     bdev_nvme_queued_reset_sgl,
    7986             :                                                     bdev_nvme_queued_next_sge, md, 0, 0);
    7987             :         }
    7988             : 
    7989           3 :         if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
    7990           0 :                 SPDK_ERRLOG("readv failed: rc = %d\n", rc);
    7991             :         }
    7992           3 :         return rc;
    7993             : }
    7994             : 
    7995             : static int
    7996          25 : bdev_nvme_writev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    7997             :                  void *md, uint64_t lba_count, uint64_t lba, uint32_t flags,
    7998             :                  struct spdk_memory_domain *domain, void *domain_ctx,
    7999             :                  struct spdk_accel_sequence *seq,
    8000             :                  union spdk_bdev_nvme_cdw12 cdw12, union spdk_bdev_nvme_cdw13 cdw13)
    8001             : {
    8002          25 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8003          25 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8004             :         int rc;
    8005             : 
    8006          25 :         SPDK_DEBUGLOG(bdev_nvme, "write %" PRIu64 " blocks with offset %#" PRIx64 "\n",
    8007             :                       lba_count, lba);
    8008             : 
    8009          25 :         bio->iovs = iov;
    8010          25 :         bio->iovcnt = iovcnt;
    8011          25 :         bio->iovpos = 0;
    8012          25 :         bio->iov_offset = 0;
    8013             : 
    8014          25 :         if (domain != NULL || seq != NULL) {
    8015           0 :                 bio->ext_opts.size = SPDK_SIZEOF(&bio->ext_opts, accel_sequence);
    8016           0 :                 bio->ext_opts.memory_domain = domain;
    8017           0 :                 bio->ext_opts.memory_domain_ctx = domain_ctx;
    8018           0 :                 bio->ext_opts.io_flags = flags | SPDK_NVME_IO_FLAGS_DIRECTIVE(cdw12.write.dtype);
    8019           0 :                 bio->ext_opts.cdw13 = cdw13.raw;
    8020           0 :                 bio->ext_opts.metadata = md;
    8021           0 :                 bio->ext_opts.accel_sequence = seq;
    8022             : 
    8023           0 :                 if (iovcnt == 1) {
    8024           0 :                         rc = spdk_nvme_ns_cmd_write_ext(ns, qpair, iov[0].iov_base, lba, lba_count, bdev_nvme_writev_done,
    8025             :                                                         bio, &bio->ext_opts);
    8026             :                 } else {
    8027           0 :                         rc = spdk_nvme_ns_cmd_writev_ext(ns, qpair, lba, lba_count,
    8028             :                                                          bdev_nvme_writev_done, bio,
    8029             :                                                          bdev_nvme_queued_reset_sgl,
    8030             :                                                          bdev_nvme_queued_next_sge,
    8031             :                                                          &bio->ext_opts);
    8032             :                 }
    8033          25 :         } else if (iovcnt == 1) {
    8034          25 :                 rc = spdk_nvme_ns_cmd_write_with_md(ns, qpair, iov[0].iov_base,
    8035             :                                                     md, lba, lba_count, bdev_nvme_writev_done,
    8036             :                                                     bio, flags, 0, 0);
    8037             :         } else {
    8038           0 :                 rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count,
    8039             :                                                      bdev_nvme_writev_done, bio, flags,
    8040             :                                                      bdev_nvme_queued_reset_sgl,
    8041             :                                                      bdev_nvme_queued_next_sge, md, 0, 0);
    8042             :         }
    8043             : 
    8044          25 :         if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
    8045           0 :                 SPDK_ERRLOG("writev failed: rc = %d\n", rc);
    8046             :         }
    8047          25 :         return rc;
    8048             : }
    8049             : 
    8050             : static int
    8051           0 : bdev_nvme_zone_appendv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    8052             :                        void *md, uint64_t lba_count, uint64_t zslba,
    8053             :                        uint32_t flags)
    8054             : {
    8055           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8056           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8057             :         int rc;
    8058             : 
    8059           0 :         SPDK_DEBUGLOG(bdev_nvme, "zone append %" PRIu64 " blocks to zone start lba %#" PRIx64 "\n",
    8060             :                       lba_count, zslba);
    8061             : 
    8062           0 :         bio->iovs = iov;
    8063           0 :         bio->iovcnt = iovcnt;
    8064           0 :         bio->iovpos = 0;
    8065           0 :         bio->iov_offset = 0;
    8066             : 
    8067           0 :         if (iovcnt == 1) {
    8068           0 :                 rc = spdk_nvme_zns_zone_append_with_md(ns, qpair, iov[0].iov_base, md, zslba,
    8069             :                                                        lba_count,
    8070             :                                                        bdev_nvme_zone_appendv_done, bio,
    8071             :                                                        flags,
    8072             :                                                        0, 0);
    8073             :         } else {
    8074           0 :                 rc = spdk_nvme_zns_zone_appendv_with_md(ns, qpair, zslba, lba_count,
    8075             :                                                         bdev_nvme_zone_appendv_done, bio, flags,
    8076             :                                                         bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
    8077             :                                                         md, 0, 0);
    8078             :         }
    8079             : 
    8080           0 :         if (rc != 0 && rc != -ENOMEM) {
    8081           0 :                 SPDK_ERRLOG("zone append failed: rc = %d\n", rc);
    8082             :         }
    8083           0 :         return rc;
    8084             : }
    8085             : 
    8086             : static int
    8087           1 : bdev_nvme_comparev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    8088             :                    void *md, uint64_t lba_count, uint64_t lba,
    8089             :                    uint32_t flags)
    8090             : {
    8091             :         int rc;
    8092             : 
    8093           1 :         SPDK_DEBUGLOG(bdev_nvme, "compare %" PRIu64 " blocks with offset %#" PRIx64 "\n",
    8094             :                       lba_count, lba);
    8095             : 
    8096           1 :         bio->iovs = iov;
    8097           1 :         bio->iovcnt = iovcnt;
    8098           1 :         bio->iovpos = 0;
    8099           1 :         bio->iov_offset = 0;
    8100             : 
    8101           1 :         rc = spdk_nvme_ns_cmd_comparev_with_md(bio->io_path->nvme_ns->ns,
    8102           1 :                                                bio->io_path->qpair->qpair,
    8103             :                                                lba, lba_count,
    8104             :                                                bdev_nvme_comparev_done, bio, flags,
    8105             :                                                bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
    8106             :                                                md, 0, 0);
    8107             : 
    8108           1 :         if (rc != 0 && rc != -ENOMEM) {
    8109           0 :                 SPDK_ERRLOG("comparev failed: rc = %d\n", rc);
    8110             :         }
    8111           1 :         return rc;
    8112             : }
    8113             : 
    8114             : static int
    8115           2 : bdev_nvme_comparev_and_writev(struct nvme_bdev_io *bio, struct iovec *cmp_iov, int cmp_iovcnt,
    8116             :                               struct iovec *write_iov, int write_iovcnt,
    8117             :                               void *md, uint64_t lba_count, uint64_t lba, uint32_t flags)
    8118             : {
    8119           2 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8120           2 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8121           2 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    8122             :         int rc;
    8123             : 
    8124           2 :         SPDK_DEBUGLOG(bdev_nvme, "compare and write %" PRIu64 " blocks with offset %#" PRIx64 "\n",
    8125             :                       lba_count, lba);
    8126             : 
    8127           2 :         bio->iovs = cmp_iov;
    8128           2 :         bio->iovcnt = cmp_iovcnt;
    8129           2 :         bio->iovpos = 0;
    8130           2 :         bio->iov_offset = 0;
    8131           2 :         bio->fused_iovs = write_iov;
    8132           2 :         bio->fused_iovcnt = write_iovcnt;
    8133           2 :         bio->fused_iovpos = 0;
    8134           2 :         bio->fused_iov_offset = 0;
    8135             : 
    8136           2 :         if (bdev_io->num_retries == 0) {
    8137           2 :                 bio->first_fused_submitted = false;
    8138           2 :                 bio->first_fused_completed = false;
    8139             :         }
    8140             : 
    8141           2 :         if (!bio->first_fused_submitted) {
    8142           2 :                 flags |= SPDK_NVME_IO_FLAGS_FUSE_FIRST;
    8143           2 :                 memset(&bio->cpl, 0, sizeof(bio->cpl));
    8144             : 
    8145           2 :                 rc = spdk_nvme_ns_cmd_comparev_with_md(ns, qpair, lba, lba_count,
    8146             :                                                        bdev_nvme_comparev_and_writev_done, bio, flags,
    8147             :                                                        bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, md, 0, 0);
    8148           2 :                 if (rc == 0) {
    8149           2 :                         bio->first_fused_submitted = true;
    8150           2 :                         flags &= ~SPDK_NVME_IO_FLAGS_FUSE_FIRST;
    8151             :                 } else {
    8152           0 :                         if (rc != -ENOMEM) {
    8153           0 :                                 SPDK_ERRLOG("compare failed: rc = %d\n", rc);
    8154             :                         }
    8155           0 :                         return rc;
    8156             :                 }
    8157             :         }
    8158             : 
    8159           2 :         flags |= SPDK_NVME_IO_FLAGS_FUSE_SECOND;
    8160             : 
    8161           2 :         rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count,
    8162             :                                              bdev_nvme_comparev_and_writev_done, bio, flags,
    8163             :                                              bdev_nvme_queued_reset_fused_sgl, bdev_nvme_queued_next_fused_sge, md, 0, 0);
    8164           2 :         if (rc != 0 && rc != -ENOMEM) {
    8165           0 :                 SPDK_ERRLOG("write failed: rc = %d\n", rc);
    8166           0 :                 rc = 0;
    8167             :         }
    8168             : 
    8169           2 :         return rc;
    8170             : }
    8171             : 
    8172             : static int
    8173           1 : bdev_nvme_unmap(struct nvme_bdev_io *bio, uint64_t offset_blocks, uint64_t num_blocks)
    8174             : {
    8175           1 :         struct spdk_nvme_dsm_range dsm_ranges[SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES];
    8176             :         struct spdk_nvme_dsm_range *range;
    8177             :         uint64_t offset, remaining;
    8178             :         uint64_t num_ranges_u64;
    8179             :         uint16_t num_ranges;
    8180             :         int rc;
    8181             : 
    8182           1 :         num_ranges_u64 = (num_blocks + SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS - 1) /
    8183             :                          SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
    8184           1 :         if (num_ranges_u64 > SPDK_COUNTOF(dsm_ranges)) {
    8185           0 :                 SPDK_ERRLOG("Unmap request for %" PRIu64 " blocks is too large\n", num_blocks);
    8186           0 :                 return -EINVAL;
    8187             :         }
    8188           1 :         num_ranges = (uint16_t)num_ranges_u64;
    8189             : 
    8190           1 :         offset = offset_blocks;
    8191           1 :         remaining = num_blocks;
    8192           1 :         range = &dsm_ranges[0];
    8193             : 
    8194             :         /* Fill max-size ranges until the remaining blocks fit into one range */
    8195           1 :         while (remaining > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) {
    8196           0 :                 range->attributes.raw = 0;
    8197           0 :                 range->length = SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
    8198           0 :                 range->starting_lba = offset;
    8199             : 
    8200           0 :                 offset += SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
    8201           0 :                 remaining -= SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
    8202           0 :                 range++;
    8203             :         }
    8204             : 
    8205             :         /* Final range describes the remaining blocks */
    8206           1 :         range->attributes.raw = 0;
    8207           1 :         range->length = remaining;
    8208           1 :         range->starting_lba = offset;
    8209             : 
    8210           1 :         rc = spdk_nvme_ns_cmd_dataset_management(bio->io_path->nvme_ns->ns,
    8211           1 :                         bio->io_path->qpair->qpair,
    8212             :                         SPDK_NVME_DSM_ATTR_DEALLOCATE,
    8213             :                         dsm_ranges, num_ranges,
    8214             :                         bdev_nvme_queued_done, bio);
    8215             : 
    8216           1 :         return rc;
    8217             : }
    8218             : 
    8219             : static int
    8220           0 : bdev_nvme_write_zeroes(struct nvme_bdev_io *bio, uint64_t offset_blocks, uint64_t num_blocks)
    8221             : {
    8222           0 :         if (num_blocks > UINT16_MAX + 1) {
    8223           0 :                 SPDK_ERRLOG("NVMe write zeroes is limited to 16-bit block count\n");
    8224           0 :                 return -EINVAL;
    8225             :         }
    8226             : 
    8227           0 :         return spdk_nvme_ns_cmd_write_zeroes(bio->io_path->nvme_ns->ns,
    8228           0 :                                              bio->io_path->qpair->qpair,
    8229             :                                              offset_blocks, num_blocks,
    8230             :                                              bdev_nvme_queued_done, bio,
    8231             :                                              0);
    8232             : }
    8233             : 
    8234             : static int
    8235           0 : bdev_nvme_get_zone_info(struct nvme_bdev_io *bio, uint64_t zone_id, uint32_t num_zones,
    8236             :                         struct spdk_bdev_zone_info *info)
    8237             : {
    8238           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8239           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8240           0 :         uint32_t zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns);
    8241           0 :         uint64_t zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
    8242           0 :         uint64_t total_zones = spdk_nvme_zns_ns_get_num_zones(ns);
    8243             : 
    8244           0 :         if (zone_id % zone_size != 0) {
    8245           0 :                 return -EINVAL;
    8246             :         }
    8247             : 
    8248           0 :         if (num_zones > total_zones || !num_zones) {
    8249           0 :                 return -EINVAL;
    8250             :         }
    8251             : 
    8252           0 :         assert(!bio->zone_report_buf);
    8253           0 :         bio->zone_report_buf = calloc(1, zone_report_bufsize);
    8254           0 :         if (!bio->zone_report_buf) {
    8255           0 :                 return -ENOMEM;
    8256             :         }
    8257             : 
    8258           0 :         bio->handled_zones = 0;
    8259             : 
    8260           0 :         return spdk_nvme_zns_report_zones(ns, qpair, bio->zone_report_buf, zone_report_bufsize,
    8261             :                                           zone_id, SPDK_NVME_ZRA_LIST_ALL, true,
    8262             :                                           bdev_nvme_get_zone_info_done, bio);
    8263             : }
    8264             : 
    8265             : static int
    8266           0 : bdev_nvme_zone_management(struct nvme_bdev_io *bio, uint64_t zone_id,
    8267             :                           enum spdk_bdev_zone_action action)
    8268             : {
    8269           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8270           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8271             : 
    8272           0 :         switch (action) {
    8273           0 :         case SPDK_BDEV_ZONE_CLOSE:
    8274           0 :                 return spdk_nvme_zns_close_zone(ns, qpair, zone_id, false,
    8275             :                                                 bdev_nvme_zone_management_done, bio);
    8276           0 :         case SPDK_BDEV_ZONE_FINISH:
    8277           0 :                 return spdk_nvme_zns_finish_zone(ns, qpair, zone_id, false,
    8278             :                                                  bdev_nvme_zone_management_done, bio);
    8279           0 :         case SPDK_BDEV_ZONE_OPEN:
    8280           0 :                 return spdk_nvme_zns_open_zone(ns, qpair, zone_id, false,
    8281             :                                                bdev_nvme_zone_management_done, bio);
    8282           0 :         case SPDK_BDEV_ZONE_RESET:
    8283           0 :                 return spdk_nvme_zns_reset_zone(ns, qpair, zone_id, false,
    8284             :                                                 bdev_nvme_zone_management_done, bio);
    8285           0 :         case SPDK_BDEV_ZONE_OFFLINE:
    8286           0 :                 return spdk_nvme_zns_offline_zone(ns, qpair, zone_id, false,
    8287             :                                                   bdev_nvme_zone_management_done, bio);
    8288           0 :         default:
    8289           0 :                 return -EINVAL;
    8290             :         }
    8291             : }
    8292             : 
    8293             : static void
    8294           5 : bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio,
    8295             :                          struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes)
    8296             : {
    8297             :         struct nvme_io_path *io_path;
    8298             :         struct nvme_ctrlr *nvme_ctrlr;
    8299             :         uint32_t max_xfer_size;
    8300           5 :         int rc = -ENXIO;
    8301             : 
    8302             :         /* Choose the first ctrlr which is not failed. */
    8303           8 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    8304           7 :                 nvme_ctrlr = io_path->qpair->ctrlr;
    8305             : 
    8306             :                 /* We should skip any unavailable nvme_ctrlr rather than checking
    8307             :                  * if the return value of spdk_nvme_ctrlr_cmd_admin_raw() is -ENXIO.
    8308             :                  */
    8309           7 :                 if (!nvme_ctrlr_is_available(nvme_ctrlr)) {
    8310           3 :                         continue;
    8311             :                 }
    8312             : 
    8313           4 :                 max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nvme_ctrlr->ctrlr);
    8314             : 
    8315           4 :                 if (nbytes > max_xfer_size) {
    8316           0 :                         SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
    8317           0 :                         rc = -EINVAL;
    8318           0 :                         goto err;
    8319             :                 }
    8320             : 
    8321           4 :                 rc = spdk_nvme_ctrlr_cmd_admin_raw(nvme_ctrlr->ctrlr, cmd, buf, (uint32_t)nbytes,
    8322             :                                                    bdev_nvme_admin_passthru_done, bio);
    8323           4 :                 if (rc == 0) {
    8324           4 :                         return;
    8325             :                 }
    8326             :         }
    8327             : 
    8328           1 : err:
    8329           1 :         bdev_nvme_admin_complete(bio, rc);
    8330             : }
    8331             : 
    8332             : static int
    8333           0 : bdev_nvme_io_passthru(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
    8334             :                       void *buf, size_t nbytes)
    8335             : {
    8336           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8337           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8338           0 :         uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
    8339           0 :         struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    8340             : 
    8341           0 :         if (nbytes > max_xfer_size) {
    8342           0 :                 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
    8343           0 :                 return -EINVAL;
    8344             :         }
    8345             : 
    8346             :         /*
    8347             :          * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
    8348             :          * so fill it out automatically.
    8349             :          */
    8350           0 :         cmd->nsid = spdk_nvme_ns_get_id(ns);
    8351             : 
    8352           0 :         return spdk_nvme_ctrlr_cmd_io_raw(ctrlr, qpair, cmd, buf,
    8353             :                                           (uint32_t)nbytes, bdev_nvme_queued_done, bio);
    8354             : }
    8355             : 
    8356             : static int
    8357           0 : bdev_nvme_io_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
    8358             :                          void *buf, size_t nbytes, void *md_buf, size_t md_len)
    8359             : {
    8360           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8361           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8362           0 :         size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(ns);
    8363           0 :         uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
    8364           0 :         struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    8365             : 
    8366           0 :         if (nbytes > max_xfer_size) {
    8367           0 :                 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
    8368           0 :                 return -EINVAL;
    8369             :         }
    8370             : 
    8371           0 :         if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(ns)) {
    8372           0 :                 SPDK_ERRLOG("invalid meta data buffer size\n");
    8373           0 :                 return -EINVAL;
    8374             :         }
    8375             : 
    8376             :         /*
    8377             :          * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
    8378             :          * so fill it out automatically.
    8379             :          */
    8380           0 :         cmd->nsid = spdk_nvme_ns_get_id(ns);
    8381             : 
    8382           0 :         return spdk_nvme_ctrlr_cmd_io_raw_with_md(ctrlr, qpair, cmd, buf,
    8383             :                         (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio);
    8384             : }
    8385             : 
    8386             : static int
    8387           0 : bdev_nvme_iov_passthru_md(struct nvme_bdev_io *bio,
    8388             :                           struct spdk_nvme_cmd *cmd, struct iovec *iov, int iovcnt,
    8389             :                           size_t nbytes, void *md_buf, size_t md_len)
    8390             : {
    8391           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8392           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8393           0 :         size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(ns);
    8394           0 :         uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
    8395           0 :         struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    8396             : 
    8397           0 :         bio->iovs = iov;
    8398           0 :         bio->iovcnt = iovcnt;
    8399           0 :         bio->iovpos = 0;
    8400           0 :         bio->iov_offset = 0;
    8401             : 
    8402           0 :         if (nbytes > max_xfer_size) {
    8403           0 :                 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
    8404           0 :                 return -EINVAL;
    8405             :         }
    8406             : 
    8407           0 :         if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(ns)) {
    8408           0 :                 SPDK_ERRLOG("invalid meta data buffer size\n");
    8409           0 :                 return -EINVAL;
    8410             :         }
    8411             : 
    8412             :         /*
    8413             :          * Each NVMe bdev is a specific namespace, and all NVMe I/O commands
    8414             :          * require a nsid, so fill it out automatically.
    8415             :          */
    8416           0 :         cmd->nsid = spdk_nvme_ns_get_id(ns);
    8417             : 
    8418           0 :         return spdk_nvme_ctrlr_cmd_iov_raw_with_md(
    8419             :                        ctrlr, qpair, cmd, (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio,
    8420             :                        bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge);
    8421             : }
    8422             : 
    8423             : static void
    8424           6 : bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio,
    8425             :                 struct nvme_bdev_io *bio_to_abort)
    8426             : {
    8427             :         struct nvme_io_path *io_path;
    8428           6 :         int rc = 0;
    8429             : 
    8430           6 :         rc = bdev_nvme_abort_retry_io(nbdev_ch, bio_to_abort);
    8431           6 :         if (rc == 0) {
    8432           1 :                 bdev_nvme_admin_complete(bio, 0);
    8433           1 :                 return;
    8434             :         }
    8435             : 
    8436           5 :         io_path = bio_to_abort->io_path;
    8437           5 :         if (io_path != NULL) {
    8438           3 :                 rc = spdk_nvme_ctrlr_cmd_abort_ext(io_path->qpair->ctrlr->ctrlr,
    8439           3 :                                                    io_path->qpair->qpair,
    8440             :                                                    bio_to_abort,
    8441             :                                                    bdev_nvme_abort_done, bio);
    8442             :         } else {
    8443           3 :                 STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    8444           2 :                         rc = spdk_nvme_ctrlr_cmd_abort_ext(io_path->qpair->ctrlr->ctrlr,
    8445             :                                                            NULL,
    8446             :                                                            bio_to_abort,
    8447             :                                                            bdev_nvme_abort_done, bio);
    8448             : 
    8449           2 :                         if (rc != -ENOENT) {
    8450           1 :                                 break;
    8451             :                         }
    8452             :                 }
    8453             :         }
    8454             : 
    8455           5 :         if (rc != 0) {
    8456             :                 /* If no command was found or there was any error, complete the abort
    8457             :                  * request with failure.
    8458             :                  */
    8459           2 :                 bdev_nvme_admin_complete(bio, rc);
    8460             :         }
    8461             : }
    8462             : 
    8463             : static int
    8464           0 : bdev_nvme_copy(struct nvme_bdev_io *bio, uint64_t dst_offset_blocks, uint64_t src_offset_blocks,
    8465             :                uint64_t num_blocks)
    8466             : {
    8467           0 :         struct spdk_nvme_scc_source_range range = {
    8468             :                 .slba = src_offset_blocks,
    8469           0 :                 .nlb = num_blocks - 1
    8470             :         };
    8471             : 
    8472           0 :         return spdk_nvme_ns_cmd_copy(bio->io_path->nvme_ns->ns,
    8473           0 :                                      bio->io_path->qpair->qpair,
    8474             :                                      &range, 1, dst_offset_blocks,
    8475             :                                      bdev_nvme_queued_done, bio);
    8476             : }
    8477             : 
    8478             : static void
    8479           0 : bdev_nvme_opts_config_json(struct spdk_json_write_ctx *w)
    8480             : {
    8481             :         const char *action;
    8482             :         uint32_t i;
    8483             : 
    8484           0 :         if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET) {
    8485           0 :                 action = "reset";
    8486           0 :         } else if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT) {
    8487           0 :                 action = "abort";
    8488             :         } else {
    8489           0 :                 action = "none";
    8490             :         }
    8491             : 
    8492           0 :         spdk_json_write_object_begin(w);
    8493             : 
    8494           0 :         spdk_json_write_named_string(w, "method", "bdev_nvme_set_options");
    8495             : 
    8496           0 :         spdk_json_write_named_object_begin(w, "params");
    8497           0 :         spdk_json_write_named_string(w, "action_on_timeout", action);
    8498           0 :         spdk_json_write_named_uint64(w, "timeout_us", g_opts.timeout_us);
    8499           0 :         spdk_json_write_named_uint64(w, "timeout_admin_us", g_opts.timeout_admin_us);
    8500           0 :         spdk_json_write_named_uint32(w, "keep_alive_timeout_ms", g_opts.keep_alive_timeout_ms);
    8501           0 :         spdk_json_write_named_uint32(w, "arbitration_burst", g_opts.arbitration_burst);
    8502           0 :         spdk_json_write_named_uint32(w, "low_priority_weight", g_opts.low_priority_weight);
    8503           0 :         spdk_json_write_named_uint32(w, "medium_priority_weight", g_opts.medium_priority_weight);
    8504           0 :         spdk_json_write_named_uint32(w, "high_priority_weight", g_opts.high_priority_weight);
    8505           0 :         spdk_json_write_named_uint64(w, "nvme_adminq_poll_period_us", g_opts.nvme_adminq_poll_period_us);
    8506           0 :         spdk_json_write_named_uint64(w, "nvme_ioq_poll_period_us", g_opts.nvme_ioq_poll_period_us);
    8507           0 :         spdk_json_write_named_uint32(w, "io_queue_requests", g_opts.io_queue_requests);
    8508           0 :         spdk_json_write_named_bool(w, "delay_cmd_submit", g_opts.delay_cmd_submit);
    8509           0 :         spdk_json_write_named_uint32(w, "transport_retry_count", g_opts.transport_retry_count);
    8510           0 :         spdk_json_write_named_int32(w, "bdev_retry_count", g_opts.bdev_retry_count);
    8511           0 :         spdk_json_write_named_uint8(w, "transport_ack_timeout", g_opts.transport_ack_timeout);
    8512           0 :         spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", g_opts.ctrlr_loss_timeout_sec);
    8513           0 :         spdk_json_write_named_uint32(w, "reconnect_delay_sec", g_opts.reconnect_delay_sec);
    8514           0 :         spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec", g_opts.fast_io_fail_timeout_sec);
    8515           0 :         spdk_json_write_named_bool(w, "disable_auto_failback", g_opts.disable_auto_failback);
    8516           0 :         spdk_json_write_named_bool(w, "generate_uuids", g_opts.generate_uuids);
    8517           0 :         spdk_json_write_named_uint8(w, "transport_tos", g_opts.transport_tos);
    8518           0 :         spdk_json_write_named_bool(w, "nvme_error_stat", g_opts.nvme_error_stat);
    8519           0 :         spdk_json_write_named_uint32(w, "rdma_srq_size", g_opts.rdma_srq_size);
    8520           0 :         spdk_json_write_named_bool(w, "io_path_stat", g_opts.io_path_stat);
    8521           0 :         spdk_json_write_named_bool(w, "allow_accel_sequence", g_opts.allow_accel_sequence);
    8522           0 :         spdk_json_write_named_uint32(w, "rdma_max_cq_size", g_opts.rdma_max_cq_size);
    8523           0 :         spdk_json_write_named_uint16(w, "rdma_cm_event_timeout_ms", g_opts.rdma_cm_event_timeout_ms);
    8524           0 :         spdk_json_write_named_array_begin(w, "dhchap_digests");
    8525           0 :         for (i = 0; i < 32; ++i) {
    8526           0 :                 if (g_opts.dhchap_digests & SPDK_BIT(i)) {
    8527           0 :                         spdk_json_write_string(w, spdk_nvme_dhchap_get_digest_name(i));
    8528             :                 }
    8529             :         }
    8530           0 :         spdk_json_write_array_end(w);
    8531           0 :         spdk_json_write_named_array_begin(w, "dhchap_dhgroups");
    8532           0 :         for (i = 0; i < 32; ++i) {
    8533           0 :                 if (g_opts.dhchap_dhgroups & SPDK_BIT(i)) {
    8534           0 :                         spdk_json_write_string(w, spdk_nvme_dhchap_get_dhgroup_name(i));
    8535             :                 }
    8536             :         }
    8537             : 
    8538           0 :         spdk_json_write_array_end(w);
    8539           0 :         spdk_json_write_object_end(w);
    8540             : 
    8541           0 :         spdk_json_write_object_end(w);
    8542           0 : }
    8543             : 
    8544             : static void
    8545           0 : bdev_nvme_discovery_config_json(struct spdk_json_write_ctx *w, struct discovery_ctx *ctx)
    8546             : {
    8547           0 :         struct spdk_nvme_transport_id trid;
    8548             : 
    8549           0 :         spdk_json_write_object_begin(w);
    8550             : 
    8551           0 :         spdk_json_write_named_string(w, "method", "bdev_nvme_start_discovery");
    8552             : 
    8553           0 :         spdk_json_write_named_object_begin(w, "params");
    8554           0 :         spdk_json_write_named_string(w, "name", ctx->name);
    8555           0 :         spdk_json_write_named_string(w, "hostnqn", ctx->hostnqn);
    8556             : 
    8557           0 :         trid = ctx->trid;
    8558           0 :         memset(trid.subnqn, 0, sizeof(trid.subnqn));
    8559           0 :         nvme_bdev_dump_trid_json(&trid, w);
    8560             : 
    8561           0 :         spdk_json_write_named_bool(w, "wait_for_attach", ctx->wait_for_attach);
    8562           0 :         spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", ctx->bdev_opts.ctrlr_loss_timeout_sec);
    8563           0 :         spdk_json_write_named_uint32(w, "reconnect_delay_sec", ctx->bdev_opts.reconnect_delay_sec);
    8564           0 :         spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec",
    8565             :                                      ctx->bdev_opts.fast_io_fail_timeout_sec);
    8566           0 :         spdk_json_write_object_end(w);
    8567             : 
    8568           0 :         spdk_json_write_object_end(w);
    8569           0 : }
    8570             : 
    8571             : #ifdef SPDK_CONFIG_NVME_CUSE
    8572             : static void
    8573           0 : nvme_ctrlr_cuse_config_json(struct spdk_json_write_ctx *w,
    8574             :                             struct nvme_ctrlr *nvme_ctrlr)
    8575           0 : {
    8576           0 :         size_t cuse_name_size = 128;
    8577           0 :         char cuse_name[cuse_name_size];
    8578             : 
    8579           0 :         if (spdk_nvme_cuse_get_ctrlr_name(nvme_ctrlr->ctrlr,
    8580             :                                           cuse_name, &cuse_name_size) != 0) {
    8581           0 :                 return;
    8582             :         }
    8583             : 
    8584           0 :         spdk_json_write_object_begin(w);
    8585             : 
    8586           0 :         spdk_json_write_named_string(w, "method", "bdev_nvme_cuse_register");
    8587             : 
    8588           0 :         spdk_json_write_named_object_begin(w, "params");
    8589           0 :         spdk_json_write_named_string(w, "name", nvme_ctrlr->nbdev_ctrlr->name);
    8590           0 :         spdk_json_write_object_end(w);
    8591             : 
    8592           0 :         spdk_json_write_object_end(w);
    8593             : }
    8594             : #endif
    8595             : 
    8596             : static void
    8597           0 : nvme_ctrlr_config_json(struct spdk_json_write_ctx *w,
    8598             :                        struct nvme_ctrlr *nvme_ctrlr,
    8599             :                        struct nvme_path_id *path_id)
    8600             : {
    8601             :         struct spdk_nvme_transport_id   *trid;
    8602             :         const struct spdk_nvme_ctrlr_opts *opts;
    8603             : 
    8604           0 :         if (nvme_ctrlr->opts.from_discovery_service) {
    8605             :                 /* Do not emit an RPC for this - it will be implicitly
    8606             :                  * covered by a separate bdev_nvme_start_discovery or
    8607             :                  * bdev_nvme_start_mdns_discovery RPC.
    8608             :                  */
    8609           0 :                 return;
    8610             :         }
    8611             : 
    8612           0 :         trid = &path_id->trid;
    8613             : 
    8614           0 :         spdk_json_write_object_begin(w);
    8615             : 
    8616           0 :         spdk_json_write_named_string(w, "method", "bdev_nvme_attach_controller");
    8617             : 
    8618           0 :         spdk_json_write_named_object_begin(w, "params");
    8619           0 :         spdk_json_write_named_string(w, "name", nvme_ctrlr->nbdev_ctrlr->name);
    8620           0 :         nvme_bdev_dump_trid_json(trid, w);
    8621           0 :         spdk_json_write_named_bool(w, "prchk_reftag",
    8622           0 :                                    (nvme_ctrlr->opts.prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) != 0);
    8623           0 :         spdk_json_write_named_bool(w, "prchk_guard",
    8624           0 :                                    (nvme_ctrlr->opts.prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) != 0);
    8625           0 :         spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", nvme_ctrlr->opts.ctrlr_loss_timeout_sec);
    8626           0 :         spdk_json_write_named_uint32(w, "reconnect_delay_sec", nvme_ctrlr->opts.reconnect_delay_sec);
    8627           0 :         spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec",
    8628             :                                      nvme_ctrlr->opts.fast_io_fail_timeout_sec);
    8629           0 :         if (nvme_ctrlr->psk != NULL) {
    8630           0 :                 spdk_json_write_named_string(w, "psk", spdk_key_get_name(nvme_ctrlr->psk));
    8631             :         }
    8632           0 :         if (nvme_ctrlr->dhchap_key != NULL) {
    8633           0 :                 spdk_json_write_named_string(w, "dhchap_key",
    8634             :                                              spdk_key_get_name(nvme_ctrlr->dhchap_key));
    8635             :         }
    8636           0 :         if (nvme_ctrlr->dhchap_ctrlr_key != NULL) {
    8637           0 :                 spdk_json_write_named_string(w, "dhchap_ctrlr_key",
    8638             :                                              spdk_key_get_name(nvme_ctrlr->dhchap_ctrlr_key));
    8639             :         }
    8640           0 :         opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
    8641           0 :         spdk_json_write_named_string(w, "hostnqn", opts->hostnqn);
    8642           0 :         spdk_json_write_named_bool(w, "hdgst", opts->header_digest);
    8643           0 :         spdk_json_write_named_bool(w, "ddgst", opts->data_digest);
    8644           0 :         if (opts->src_addr[0] != '\0') {
    8645           0 :                 spdk_json_write_named_string(w, "hostaddr", opts->src_addr);
    8646             :         }
    8647           0 :         if (opts->src_svcid[0] != '\0') {
    8648           0 :                 spdk_json_write_named_string(w, "hostsvcid", opts->src_svcid);
    8649             :         }
    8650             : 
    8651           0 :         if (nvme_ctrlr->opts.multipath) {
    8652           0 :                 spdk_json_write_named_string(w, "multipath", "multipath");
    8653             :         }
    8654           0 :         spdk_json_write_object_end(w);
    8655             : 
    8656           0 :         spdk_json_write_object_end(w);
    8657             : }
    8658             : 
    8659             : static void
    8660           0 : bdev_nvme_hotplug_config_json(struct spdk_json_write_ctx *w)
    8661             : {
    8662           0 :         spdk_json_write_object_begin(w);
    8663           0 :         spdk_json_write_named_string(w, "method", "bdev_nvme_set_hotplug");
    8664             : 
    8665           0 :         spdk_json_write_named_object_begin(w, "params");
    8666           0 :         spdk_json_write_named_uint64(w, "period_us", g_nvme_hotplug_poll_period_us);
    8667           0 :         spdk_json_write_named_bool(w, "enable", g_nvme_hotplug_enabled);
    8668           0 :         spdk_json_write_object_end(w);
    8669             : 
    8670           0 :         spdk_json_write_object_end(w);
    8671           0 : }
    8672             : 
    8673             : static int
    8674           0 : bdev_nvme_config_json(struct spdk_json_write_ctx *w)
    8675             : {
    8676             :         struct nvme_bdev_ctrlr  *nbdev_ctrlr;
    8677             :         struct nvme_ctrlr       *nvme_ctrlr;
    8678             :         struct discovery_ctx    *ctx;
    8679             :         struct nvme_path_id     *path_id;
    8680             : 
    8681           0 :         bdev_nvme_opts_config_json(w);
    8682             : 
    8683           0 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    8684             : 
    8685           0 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
    8686           0 :                 TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
    8687           0 :                         path_id = nvme_ctrlr->active_path_id;
    8688           0 :                         assert(path_id == TAILQ_FIRST(&nvme_ctrlr->trids));
    8689           0 :                         nvme_ctrlr_config_json(w, nvme_ctrlr, path_id);
    8690             : 
    8691           0 :                         path_id = TAILQ_NEXT(path_id, link);
    8692           0 :                         while (path_id != NULL) {
    8693           0 :                                 nvme_ctrlr_config_json(w, nvme_ctrlr, path_id);
    8694           0 :                                 path_id = TAILQ_NEXT(path_id, link);
    8695             :                         }
    8696             : 
    8697             : #ifdef SPDK_CONFIG_NVME_CUSE
    8698           0 :                         nvme_ctrlr_cuse_config_json(w, nvme_ctrlr);
    8699             : #endif
    8700             :                 }
    8701             :         }
    8702             : 
    8703           0 :         TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    8704           0 :                 if (!ctx->from_mdns_discovery_service) {
    8705           0 :                         bdev_nvme_discovery_config_json(w, ctx);
    8706             :                 }
    8707             :         }
    8708             : 
    8709           0 :         bdev_nvme_mdns_discovery_config_json(w);
    8710             : 
    8711             :         /* Dump as last parameter to give all NVMe bdevs chance to be constructed
    8712             :          * before enabling hotplug poller.
    8713             :          */
    8714           0 :         bdev_nvme_hotplug_config_json(w);
    8715             : 
    8716           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    8717           0 :         return 0;
    8718             : }
    8719             : 
    8720             : struct spdk_nvme_ctrlr *
    8721           1 : bdev_nvme_get_ctrlr(struct spdk_bdev *bdev)
    8722             : {
    8723             :         struct nvme_bdev *nbdev;
    8724             :         struct nvme_ns *nvme_ns;
    8725             : 
    8726           1 :         if (!bdev || bdev->module != &nvme_if) {
    8727           0 :                 return NULL;
    8728             :         }
    8729             : 
    8730           1 :         nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
    8731           1 :         nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
    8732           1 :         assert(nvme_ns != NULL);
    8733             : 
    8734           1 :         return nvme_ns->ctrlr->ctrlr;
    8735             : }
    8736             : 
    8737             : static bool
    8738          12 : nvme_io_path_is_current(struct nvme_io_path *io_path)
    8739             : {
    8740             :         const struct nvme_bdev_channel *nbdev_ch;
    8741             :         bool current;
    8742             : 
    8743          12 :         if (!nvme_io_path_is_available(io_path)) {
    8744           4 :                 return false;
    8745             :         }
    8746             : 
    8747           8 :         nbdev_ch = io_path->nbdev_ch;
    8748           8 :         if (nbdev_ch == NULL) {
    8749           1 :                 current = false;
    8750           7 :         } else if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
    8751           3 :                 struct nvme_io_path *optimized_io_path = NULL;
    8752             : 
    8753           6 :                 STAILQ_FOREACH(optimized_io_path, &nbdev_ch->io_path_list, stailq) {
    8754           5 :                         if (optimized_io_path->nvme_ns->ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE) {
    8755           2 :                                 break;
    8756             :                         }
    8757             :                 }
    8758             : 
    8759             :                 /* A non-optimized path is only current if there are no optimized paths. */
    8760           3 :                 current = (io_path->nvme_ns->ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE) ||
    8761             :                           (optimized_io_path == NULL);
    8762             :         } else {
    8763           4 :                 if (nbdev_ch->current_io_path) {
    8764           1 :                         current = (io_path == nbdev_ch->current_io_path);
    8765             :                 } else {
    8766             :                         struct nvme_io_path *first_path;
    8767             : 
    8768             :                         /* We arrived here as there are no optimized paths for active-passive
    8769             :                          * mode. Check if this io_path is the first one available on the list.
    8770             :                          */
    8771           3 :                         current = false;
    8772           3 :                         STAILQ_FOREACH(first_path, &nbdev_ch->io_path_list, stailq) {
    8773           3 :                                 if (nvme_io_path_is_available(first_path)) {
    8774           3 :                                         current = (io_path == first_path);
    8775           3 :                                         break;
    8776             :                                 }
    8777             :                         }
    8778             :                 }
    8779             :         }
    8780             : 
    8781           8 :         return current;
    8782             : }
    8783             : 
    8784             : static struct nvme_ctrlr *
    8785           0 : bdev_nvme_next_ctrlr_unsafe(struct nvme_bdev_ctrlr *nbdev_ctrlr, struct nvme_ctrlr *prev)
    8786             : {
    8787             :         struct nvme_ctrlr *next;
    8788             : 
    8789             :         /* Must be called under g_bdev_nvme_mutex */
    8790           0 :         next = prev != NULL ? TAILQ_NEXT(prev, tailq) : TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
    8791           0 :         while (next != NULL) {
    8792             :                 /* ref can be 0 when the ctrlr was released, but hasn't been detached yet */
    8793           0 :                 pthread_mutex_lock(&next->mutex);
    8794           0 :                 if (next->ref > 0) {
    8795           0 :                         next->ref++;
    8796           0 :                         pthread_mutex_unlock(&next->mutex);
    8797           0 :                         return next;
    8798             :                 }
    8799             : 
    8800           0 :                 pthread_mutex_unlock(&next->mutex);
    8801           0 :                 next = TAILQ_NEXT(next, tailq);
    8802             :         }
    8803             : 
    8804           0 :         return NULL;
    8805             : }
    8806             : 
    8807             : struct bdev_nvme_set_keys_ctx {
    8808             :         struct nvme_ctrlr       *nctrlr;
    8809             :         struct spdk_key         *dhchap_key;
    8810             :         struct spdk_key         *dhchap_ctrlr_key;
    8811             :         struct spdk_thread      *thread;
    8812             :         bdev_nvme_set_keys_cb   cb_fn;
    8813             :         void                    *cb_ctx;
    8814             :         int                     status;
    8815             : };
    8816             : 
    8817             : static void
    8818           0 : bdev_nvme_free_set_keys_ctx(struct bdev_nvme_set_keys_ctx *ctx)
    8819             : {
    8820           0 :         if (ctx == NULL) {
    8821           0 :                 return;
    8822             :         }
    8823             : 
    8824           0 :         spdk_keyring_put_key(ctx->dhchap_key);
    8825           0 :         spdk_keyring_put_key(ctx->dhchap_ctrlr_key);
    8826           0 :         free(ctx);
    8827             : }
    8828             : 
    8829             : static void
    8830           0 : _bdev_nvme_set_keys_done(void *_ctx)
    8831             : {
    8832           0 :         struct bdev_nvme_set_keys_ctx *ctx = _ctx;
    8833             : 
    8834           0 :         ctx->cb_fn(ctx->cb_ctx, ctx->status);
    8835             : 
    8836           0 :         if (ctx->nctrlr != NULL) {
    8837           0 :                 nvme_ctrlr_release(ctx->nctrlr);
    8838             :         }
    8839           0 :         bdev_nvme_free_set_keys_ctx(ctx);
    8840           0 : }
    8841             : 
    8842             : static void
    8843           0 : bdev_nvme_set_keys_done(struct bdev_nvme_set_keys_ctx *ctx, int status)
    8844             : {
    8845           0 :         ctx->status = status;
    8846           0 :         spdk_thread_exec_msg(ctx->thread, _bdev_nvme_set_keys_done, ctx);
    8847           0 : }
    8848             : 
    8849             : static void bdev_nvme_authenticate_ctrlr(struct bdev_nvme_set_keys_ctx *ctx);
    8850             : 
    8851             : static void
    8852           0 : bdev_nvme_authenticate_ctrlr_continue(struct bdev_nvme_set_keys_ctx *ctx)
    8853             : {
    8854             :         struct nvme_ctrlr *next;
    8855             : 
    8856           0 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    8857           0 :         next = bdev_nvme_next_ctrlr_unsafe(NULL, ctx->nctrlr);
    8858           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    8859             : 
    8860           0 :         nvme_ctrlr_release(ctx->nctrlr);
    8861           0 :         ctx->nctrlr = next;
    8862             : 
    8863           0 :         if (next == NULL) {
    8864           0 :                 bdev_nvme_set_keys_done(ctx, 0);
    8865             :         } else {
    8866           0 :                 bdev_nvme_authenticate_ctrlr(ctx);
    8867             :         }
    8868           0 : }
    8869             : 
    8870             : static void
    8871           0 : bdev_nvme_authenticate_qpairs_done(struct spdk_io_channel_iter *i, int status)
    8872             : {
    8873           0 :         struct bdev_nvme_set_keys_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
    8874             : 
    8875           0 :         if (status != 0) {
    8876           0 :                 bdev_nvme_set_keys_done(ctx, status);
    8877           0 :                 return;
    8878             :         }
    8879           0 :         bdev_nvme_authenticate_ctrlr_continue(ctx);
    8880             : }
    8881             : 
    8882             : static void
    8883           0 : bdev_nvme_authenticate_qpair_done(void *ctx, int status)
    8884             : {
    8885           0 :         spdk_for_each_channel_continue(ctx, status);
    8886           0 : }
    8887             : 
    8888             : static void
    8889           0 : bdev_nvme_authenticate_qpair(struct spdk_io_channel_iter *i)
    8890             : {
    8891           0 :         struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
    8892           0 :         struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(ch);
    8893           0 :         struct nvme_qpair *qpair = ctrlr_ch->qpair;
    8894             :         int rc;
    8895             : 
    8896           0 :         if (!nvme_qpair_is_connected(qpair)) {
    8897           0 :                 spdk_for_each_channel_continue(i, 0);
    8898           0 :                 return;
    8899             :         }
    8900             : 
    8901           0 :         rc = spdk_nvme_qpair_authenticate(qpair->qpair, bdev_nvme_authenticate_qpair_done, i);
    8902           0 :         if (rc != 0) {
    8903           0 :                 spdk_for_each_channel_continue(i, rc);
    8904             :         }
    8905             : }
    8906             : 
    8907             : static void
    8908           0 : bdev_nvme_authenticate_ctrlr_done(void *_ctx, int status)
    8909             : {
    8910           0 :         struct bdev_nvme_set_keys_ctx *ctx = _ctx;
    8911             : 
    8912           0 :         if (status != 0) {
    8913           0 :                 bdev_nvme_set_keys_done(ctx, status);
    8914           0 :                 return;
    8915             :         }
    8916             : 
    8917           0 :         spdk_for_each_channel(ctx->nctrlr, bdev_nvme_authenticate_qpair, ctx,
    8918             :                               bdev_nvme_authenticate_qpairs_done);
    8919             : }
    8920             : 
    8921             : static void
    8922           0 : bdev_nvme_authenticate_ctrlr(struct bdev_nvme_set_keys_ctx *ctx)
    8923             : {
    8924           0 :         struct spdk_nvme_ctrlr_key_opts opts = {};
    8925           0 :         struct nvme_ctrlr *nctrlr = ctx->nctrlr;
    8926             :         int rc;
    8927             : 
    8928           0 :         opts.size = SPDK_SIZEOF(&opts, dhchap_ctrlr_key);
    8929           0 :         opts.dhchap_key = ctx->dhchap_key;
    8930           0 :         opts.dhchap_ctrlr_key = ctx->dhchap_ctrlr_key;
    8931           0 :         rc = spdk_nvme_ctrlr_set_keys(nctrlr->ctrlr, &opts);
    8932           0 :         if (rc != 0) {
    8933           0 :                 bdev_nvme_set_keys_done(ctx, rc);
    8934           0 :                 return;
    8935             :         }
    8936             : 
    8937           0 :         if (ctx->dhchap_key != NULL) {
    8938           0 :                 rc = spdk_nvme_ctrlr_authenticate(nctrlr->ctrlr,
    8939             :                                                   bdev_nvme_authenticate_ctrlr_done, ctx);
    8940           0 :                 if (rc != 0) {
    8941           0 :                         bdev_nvme_set_keys_done(ctx, rc);
    8942             :                 }
    8943             :         } else {
    8944           0 :                 bdev_nvme_authenticate_ctrlr_continue(ctx);
    8945             :         }
    8946             : }
    8947             : 
    8948             : int
    8949           0 : bdev_nvme_set_keys(const char *name, const char *dhchap_key, const char *dhchap_ctrlr_key,
    8950             :                    bdev_nvme_set_keys_cb cb_fn, void *cb_ctx)
    8951             : {
    8952             :         struct bdev_nvme_set_keys_ctx *ctx;
    8953             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
    8954             :         struct nvme_ctrlr *nctrlr;
    8955             : 
    8956           0 :         ctx = calloc(1, sizeof(*ctx));
    8957           0 :         if (ctx == NULL) {
    8958           0 :                 return -ENOMEM;
    8959             :         }
    8960             : 
    8961           0 :         if (dhchap_key != NULL) {
    8962           0 :                 ctx->dhchap_key = spdk_keyring_get_key(dhchap_key);
    8963           0 :                 if (ctx->dhchap_key == NULL) {
    8964           0 :                         SPDK_ERRLOG("Could not find key %s for bdev %s\n", dhchap_key, name);
    8965           0 :                         bdev_nvme_free_set_keys_ctx(ctx);
    8966           0 :                         return -ENOKEY;
    8967             :                 }
    8968             :         }
    8969           0 :         if (dhchap_ctrlr_key != NULL) {
    8970           0 :                 ctx->dhchap_ctrlr_key = spdk_keyring_get_key(dhchap_ctrlr_key);
    8971           0 :                 if (ctx->dhchap_ctrlr_key == NULL) {
    8972           0 :                         SPDK_ERRLOG("Could not find key %s for bdev %s\n", dhchap_ctrlr_key, name);
    8973           0 :                         bdev_nvme_free_set_keys_ctx(ctx);
    8974           0 :                         return -ENOKEY;
    8975             :                 }
    8976             :         }
    8977             : 
    8978           0 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    8979           0 :         nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
    8980           0 :         if (nbdev_ctrlr == NULL) {
    8981           0 :                 SPDK_ERRLOG("Could not find bdev_ctrlr %s\n", name);
    8982           0 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    8983           0 :                 bdev_nvme_free_set_keys_ctx(ctx);
    8984           0 :                 return -ENODEV;
    8985             :         }
    8986           0 :         nctrlr = bdev_nvme_next_ctrlr_unsafe(nbdev_ctrlr, NULL);
    8987           0 :         if (nctrlr == NULL) {
    8988           0 :                 SPDK_ERRLOG("Could not find any nvme_ctrlrs on bdev_ctrlr %s\n", name);
    8989           0 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    8990           0 :                 bdev_nvme_free_set_keys_ctx(ctx);
    8991           0 :                 return -ENODEV;
    8992             :         }
    8993           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    8994             : 
    8995           0 :         ctx->nctrlr = nctrlr;
    8996           0 :         ctx->cb_fn = cb_fn;
    8997           0 :         ctx->cb_ctx = cb_ctx;
    8998           0 :         ctx->thread = spdk_get_thread();
    8999             : 
    9000           0 :         bdev_nvme_authenticate_ctrlr(ctx);
    9001             : 
    9002           0 :         return 0;
    9003             : }
    9004             : 
    9005             : void
    9006           0 : nvme_io_path_info_json(struct spdk_json_write_ctx *w, struct nvme_io_path *io_path)
    9007             : {
    9008           0 :         struct nvme_ns *nvme_ns = io_path->nvme_ns;
    9009           0 :         struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
    9010             :         const struct spdk_nvme_ctrlr_data *cdata;
    9011             :         const struct spdk_nvme_transport_id *trid;
    9012             :         const char *adrfam_str;
    9013             : 
    9014           0 :         spdk_json_write_object_begin(w);
    9015             : 
    9016           0 :         spdk_json_write_named_string(w, "bdev_name", nvme_ns->bdev->disk.name);
    9017             : 
    9018           0 :         cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
    9019           0 :         trid = spdk_nvme_ctrlr_get_transport_id(nvme_ctrlr->ctrlr);
    9020             : 
    9021           0 :         spdk_json_write_named_uint32(w, "cntlid", cdata->cntlid);
    9022           0 :         spdk_json_write_named_bool(w, "current", nvme_io_path_is_current(io_path));
    9023           0 :         spdk_json_write_named_bool(w, "connected", nvme_qpair_is_connected(io_path->qpair));
    9024           0 :         spdk_json_write_named_bool(w, "accessible", nvme_ns_is_accessible(nvme_ns));
    9025             : 
    9026           0 :         spdk_json_write_named_object_begin(w, "transport");
    9027           0 :         spdk_json_write_named_string(w, "trtype", trid->trstring);
    9028           0 :         spdk_json_write_named_string(w, "traddr", trid->traddr);
    9029           0 :         if (trid->trsvcid[0] != '\0') {
    9030           0 :                 spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
    9031             :         }
    9032           0 :         adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
    9033           0 :         if (adrfam_str) {
    9034           0 :                 spdk_json_write_named_string(w, "adrfam", adrfam_str);
    9035             :         }
    9036           0 :         spdk_json_write_object_end(w);
    9037             : 
    9038           0 :         spdk_json_write_object_end(w);
    9039           0 : }
    9040             : 
    9041             : void
    9042           0 : bdev_nvme_get_discovery_info(struct spdk_json_write_ctx *w)
    9043             : {
    9044             :         struct discovery_ctx *ctx;
    9045             :         struct discovery_entry_ctx *entry_ctx;
    9046             : 
    9047           0 :         spdk_json_write_array_begin(w);
    9048           0 :         TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    9049           0 :                 spdk_json_write_object_begin(w);
    9050           0 :                 spdk_json_write_named_string(w, "name", ctx->name);
    9051             : 
    9052           0 :                 spdk_json_write_named_object_begin(w, "trid");
    9053           0 :                 nvme_bdev_dump_trid_json(&ctx->trid, w);
    9054           0 :                 spdk_json_write_object_end(w);
    9055             : 
    9056           0 :                 spdk_json_write_named_array_begin(w, "referrals");
    9057           0 :                 TAILQ_FOREACH(entry_ctx, &ctx->discovery_entry_ctxs, tailq) {
    9058           0 :                         spdk_json_write_object_begin(w);
    9059           0 :                         spdk_json_write_named_object_begin(w, "trid");
    9060           0 :                         nvme_bdev_dump_trid_json(&entry_ctx->trid, w);
    9061           0 :                         spdk_json_write_object_end(w);
    9062           0 :                         spdk_json_write_object_end(w);
    9063             :                 }
    9064           0 :                 spdk_json_write_array_end(w);
    9065             : 
    9066           0 :                 spdk_json_write_object_end(w);
    9067             :         }
    9068           0 :         spdk_json_write_array_end(w);
    9069           0 : }
    9070             : 
    9071           1 : SPDK_LOG_REGISTER_COMPONENT(bdev_nvme)
    9072             : 
    9073             : static void
    9074           0 : bdev_nvme_trace(void)
    9075             : {
    9076           0 :         struct spdk_trace_tpoint_opts opts[] = {
    9077             :                 {
    9078             :                         "BDEV_NVME_IO_START", TRACE_BDEV_NVME_IO_START,
    9079             :                         OWNER_TYPE_NONE, OBJECT_BDEV_NVME_IO, 1,
    9080             :                         {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }}
    9081             :                 },
    9082             :                 {
    9083             :                         "BDEV_NVME_IO_DONE", TRACE_BDEV_NVME_IO_DONE,
    9084             :                         OWNER_TYPE_NONE, OBJECT_BDEV_NVME_IO, 0,
    9085             :                         {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }}
    9086             :                 }
    9087             :         };
    9088             : 
    9089             : 
    9090           0 :         spdk_trace_register_object(OBJECT_BDEV_NVME_IO, 'N');
    9091           0 :         spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
    9092           0 :         spdk_trace_tpoint_register_relation(TRACE_NVME_PCIE_SUBMIT, OBJECT_BDEV_NVME_IO, 0);
    9093           0 :         spdk_trace_tpoint_register_relation(TRACE_NVME_TCP_SUBMIT, OBJECT_BDEV_NVME_IO, 0);
    9094           0 :         spdk_trace_tpoint_register_relation(TRACE_NVME_PCIE_COMPLETE, OBJECT_BDEV_NVME_IO, 0);
    9095           0 :         spdk_trace_tpoint_register_relation(TRACE_NVME_TCP_COMPLETE, OBJECT_BDEV_NVME_IO, 0);
    9096           0 : }
    9097           1 : SPDK_TRACE_REGISTER_FN(bdev_nvme_trace, "bdev_nvme", TRACE_GROUP_BDEV_NVME)

Generated by: LCOV version 1.15