LCOV - code coverage report
Current view: top level - module/bdev/nvme - bdev_nvme.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 2689 5101 52.7 %
Date: 2024-12-12 08:35:40 Functions: 226 326 69.3 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2016 Intel Corporation. All rights reserved.
       3             :  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
       4             :  *   Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
       5             :  *   Copyright (c) 2022 Dell Inc, or its subsidiaries. All rights reserved.
       6             :  */
       7             : 
       8             : #include "spdk/stdinc.h"
       9             : 
      10             : #include "bdev_nvme.h"
      11             : 
      12             : #include "spdk/accel.h"
      13             : #include "spdk/config.h"
      14             : #include "spdk/endian.h"
      15             : #include "spdk/bdev.h"
      16             : #include "spdk/json.h"
      17             : #include "spdk/keyring.h"
      18             : #include "spdk/likely.h"
      19             : #include "spdk/nvme.h"
      20             : #include "spdk/nvme_ocssd.h"
      21             : #include "spdk/nvme_zns.h"
      22             : #include "spdk/opal.h"
      23             : #include "spdk/thread.h"
      24             : #include "spdk/trace.h"
      25             : #include "spdk/string.h"
      26             : #include "spdk/util.h"
      27             : #include "spdk/uuid.h"
      28             : 
      29             : #include "spdk/bdev_module.h"
      30             : #include "spdk/log.h"
      31             : 
      32             : #include "spdk_internal/usdt.h"
      33             : #include "spdk_internal/trace_defs.h"
      34             : 
      35             : #define CTRLR_STRING(nvme_ctrlr) \
      36             :         (spdk_nvme_trtype_is_fabrics(nvme_ctrlr->active_path_id->trid.trtype) ? \
      37             :         nvme_ctrlr->active_path_id->trid.subnqn : nvme_ctrlr->active_path_id->trid.traddr)
      38             : 
      39             : #define CTRLR_ID(nvme_ctrlr)    (spdk_nvme_ctrlr_get_id(nvme_ctrlr->ctrlr))
      40             : 
      41             : #define NVME_CTRLR_ERRLOG(ctrlr, format, ...) \
      42             :         SPDK_ERRLOG("[%s, %u] " format, CTRLR_STRING(ctrlr), CTRLR_ID(ctrlr), ##__VA_ARGS__);
      43             : 
      44             : #define NVME_CTRLR_WARNLOG(ctrlr, format, ...) \
      45             :         SPDK_WARNLOG("[%s, %u] " format, CTRLR_STRING(ctrlr), CTRLR_ID(ctrlr), ##__VA_ARGS__);
      46             : 
      47             : #define NVME_CTRLR_NOTICELOG(ctrlr, format, ...) \
      48             :         SPDK_NOTICELOG("[%s, %u] " format, CTRLR_STRING(ctrlr), CTRLR_ID(ctrlr), ##__VA_ARGS__);
      49             : 
      50             : #define NVME_CTRLR_INFOLOG(ctrlr, format, ...) \
      51             :         SPDK_INFOLOG(bdev_nvme, "[%s, %u] " format, CTRLR_STRING(ctrlr), CTRLR_ID(ctrlr), ##__VA_ARGS__);
      52             : 
      53             : #ifdef DEBUG
      54             : #define NVME_CTRLR_DEBUGLOG(ctrlr, format, ...) \
      55             :         SPDK_DEBUGLOG(bdev_nvme, "[%s, %u] " format, CTRLR_STRING(ctrlr), CTRLR_ID(ctrlr), ##__VA_ARGS__);
      56             : #else
      57             : #define NVME_CTRLR_DEBUGLOG(ctrlr, ...) do { } while (0)
      58             : #endif
      59             : 
      60             : #define BDEV_STRING(nbdev) (nbdev->disk.name)
      61             : 
      62             : #define NVME_BDEV_ERRLOG(nbdev, format, ...) \
      63             :         SPDK_ERRLOG("[%s] " format, BDEV_STRING(nbdev), ##__VA_ARGS__);
      64             : 
      65             : #define NVME_BDEV_WARNLOG(nbdev, format, ...) \
      66             :         SPDK_WARNLOG("[%s] " format, BDEV_STRING(nbdev), ##__VA_ARGS__);
      67             : 
      68             : #define NVME_BDEV_NOTICELOG(nbdev, format, ...) \
      69             :         SPDK_NOTICELOG("[%s] " format, BDEV_STRING(nbdev), ##__VA_ARGS__);
      70             : 
      71             : #define NVME_BDEV_INFOLOG(nbdev, format, ...) \
      72             :         SPDK_INFOLOG(bdev_nvme, "[%s] " format, BDEV_STRING(nbdev), ##__VA_ARGS__);
      73             : 
      74             : #define SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT true
      75             : #define SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS (10000)
      76             : 
      77             : #define NSID_STR_LEN 10
      78             : 
      79             : #define SPDK_CONTROLLER_NAME_MAX 512
      80             : 
      81             : static int bdev_nvme_config_json(struct spdk_json_write_ctx *w);
      82             : 
      83             : struct nvme_bdev_io {
      84             :         /** array of iovecs to transfer. */
      85             :         struct iovec *iovs;
      86             : 
      87             :         /** Number of iovecs in iovs array. */
      88             :         int iovcnt;
      89             : 
      90             :         /** Current iovec position. */
      91             :         int iovpos;
      92             : 
      93             :         /** Offset in current iovec. */
      94             :         uint32_t iov_offset;
      95             : 
      96             :         /** Offset in current iovec. */
      97             :         uint32_t fused_iov_offset;
      98             : 
      99             :         /** array of iovecs to transfer. */
     100             :         struct iovec *fused_iovs;
     101             : 
     102             :         /** Number of iovecs in iovs array. */
     103             :         int fused_iovcnt;
     104             : 
     105             :         /** Current iovec position. */
     106             :         int fused_iovpos;
     107             : 
     108             :         /** I/O path the current I/O or admin passthrough is submitted on, or the I/O path
     109             :          *  being reset in a reset I/O.
     110             :          */
     111             :         struct nvme_io_path *io_path;
     112             : 
     113             :         /** Saved status for admin passthru completion event, PI error verification, or intermediate compare-and-write status */
     114             :         struct spdk_nvme_cpl cpl;
     115             : 
     116             :         /** Extended IO opts passed by the user to bdev layer and mapped to NVME format */
     117             :         struct spdk_nvme_ns_cmd_ext_io_opts ext_opts;
     118             : 
     119             :         /** Keeps track if first of fused commands was submitted */
     120             :         bool first_fused_submitted;
     121             : 
     122             :         /** Keeps track if first of fused commands was completed */
     123             :         bool first_fused_completed;
     124             : 
     125             :         /* How many times the current I/O was retried. */
     126             :         int32_t retry_count;
     127             : 
     128             :         /** Expiration value in ticks to retry the current I/O. */
     129             :         uint64_t retry_ticks;
     130             : 
     131             :         /** Temporary pointer to zone report buffer */
     132             :         struct spdk_nvme_zns_zone_report *zone_report_buf;
     133             : 
     134             :         /** Keep track of how many zones that have been copied to the spdk_bdev_zone_info struct */
     135             :         uint64_t handled_zones;
     136             : 
     137             :         /* Current tsc at submit time. */
     138             :         uint64_t submit_tsc;
     139             : 
     140             :         /* Used to put nvme_bdev_io into the list */
     141             :         TAILQ_ENTRY(nvme_bdev_io) retry_link;
     142             : };
     143             : 
     144             : struct nvme_probe_skip_entry {
     145             :         struct spdk_nvme_transport_id           trid;
     146             :         TAILQ_ENTRY(nvme_probe_skip_entry)      tailq;
     147             : };
     148             : /* All the controllers deleted by users via RPC are skipped by hotplug monitor */
     149             : static TAILQ_HEAD(, nvme_probe_skip_entry) g_skipped_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER(
     150             :                         g_skipped_nvme_ctrlrs);
     151             : 
     152             : #define BDEV_NVME_DEFAULT_DIGESTS (SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA256) | \
     153             :                                    SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA384) | \
     154             :                                    SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA512))
     155             : 
     156             : #define BDEV_NVME_DEFAULT_DHGROUPS (SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_NULL) | \
     157             :                                     SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_2048) | \
     158             :                                     SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_3072) | \
     159             :                                     SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_4096) | \
     160             :                                     SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_6144) | \
     161             :                                     SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_8192))
     162             : 
     163             : static struct spdk_bdev_nvme_opts g_opts = {
     164             :         .action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE,
     165             :         .keep_alive_timeout_ms = SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS,
     166             :         .timeout_us = 0,
     167             :         .timeout_admin_us = 0,
     168             :         .transport_retry_count = 4,
     169             :         .arbitration_burst = 0,
     170             :         .low_priority_weight = 0,
     171             :         .medium_priority_weight = 0,
     172             :         .high_priority_weight = 0,
     173             :         .io_queue_requests = 0,
     174             :         .nvme_adminq_poll_period_us = 10000ULL,
     175             :         .nvme_ioq_poll_period_us = 0,
     176             :         .delay_cmd_submit = SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT,
     177             :         .bdev_retry_count = 3,
     178             :         .ctrlr_loss_timeout_sec = 0,
     179             :         .reconnect_delay_sec = 0,
     180             :         .fast_io_fail_timeout_sec = 0,
     181             :         .transport_ack_timeout = 0,
     182             :         .disable_auto_failback = false,
     183             :         .generate_uuids = false,
     184             :         .transport_tos = 0,
     185             :         .nvme_error_stat = false,
     186             :         .io_path_stat = false,
     187             :         .allow_accel_sequence = false,
     188             :         .dhchap_digests = BDEV_NVME_DEFAULT_DIGESTS,
     189             :         .dhchap_dhgroups = BDEV_NVME_DEFAULT_DHGROUPS,
     190             :         .rdma_umr_per_io = false,
     191             : };
     192             : 
     193             : #define NVME_HOTPLUG_POLL_PERIOD_MAX                    10000000ULL
     194             : #define NVME_HOTPLUG_POLL_PERIOD_DEFAULT                100000ULL
     195             : 
     196             : static int g_hot_insert_nvme_controller_index = 0;
     197             : static uint64_t g_nvme_hotplug_poll_period_us = NVME_HOTPLUG_POLL_PERIOD_DEFAULT;
     198             : static bool g_nvme_hotplug_enabled = false;
     199             : struct spdk_thread *g_bdev_nvme_init_thread;
     200             : static struct spdk_poller *g_hotplug_poller;
     201             : static struct spdk_poller *g_hotplug_probe_poller;
     202             : static struct spdk_nvme_probe_ctx *g_hotplug_probe_ctx;
     203             : 
     204             : static void nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr,
     205             :                 struct nvme_async_probe_ctx *ctx);
     206             : static void nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
     207             :                 struct nvme_async_probe_ctx *ctx);
     208             : static int bdev_nvme_library_init(void);
     209             : static void bdev_nvme_library_fini(void);
     210             : static void _bdev_nvme_submit_request(struct nvme_bdev_channel *nbdev_ch,
     211             :                                       struct spdk_bdev_io *bdev_io);
     212             : static void bdev_nvme_submit_request(struct spdk_io_channel *ch,
     213             :                                      struct spdk_bdev_io *bdev_io);
     214             : static int bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     215             :                            void *md, uint64_t lba_count, uint64_t lba,
     216             :                            uint32_t flags, struct spdk_memory_domain *domain, void *domain_ctx,
     217             :                            struct spdk_accel_sequence *seq);
     218             : static int bdev_nvme_no_pi_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     219             :                                  void *md, uint64_t lba_count, uint64_t lba);
     220             : static int bdev_nvme_writev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     221             :                             void *md, uint64_t lba_count, uint64_t lba,
     222             :                             uint32_t flags, struct spdk_memory_domain *domain, void *domain_ctx,
     223             :                             struct spdk_accel_sequence *seq,
     224             :                             union spdk_bdev_nvme_cdw12 cdw12, union spdk_bdev_nvme_cdw13 cdw13);
     225             : static int bdev_nvme_zone_appendv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     226             :                                   void *md, uint64_t lba_count,
     227             :                                   uint64_t zslba, uint32_t flags);
     228             : static int bdev_nvme_comparev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
     229             :                               void *md, uint64_t lba_count, uint64_t lba,
     230             :                               uint32_t flags);
     231             : static int bdev_nvme_comparev_and_writev(struct nvme_bdev_io *bio,
     232             :                 struct iovec *cmp_iov, int cmp_iovcnt, struct iovec *write_iov,
     233             :                 int write_iovcnt, void *md, uint64_t lba_count, uint64_t lba,
     234             :                 uint32_t flags);
     235             : static int bdev_nvme_get_zone_info(struct nvme_bdev_io *bio, uint64_t zone_id,
     236             :                                    uint32_t num_zones, struct spdk_bdev_zone_info *info);
     237             : static int bdev_nvme_zone_management(struct nvme_bdev_io *bio, uint64_t zone_id,
     238             :                                      enum spdk_bdev_zone_action action);
     239             : static void bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch,
     240             :                                      struct nvme_bdev_io *bio,
     241             :                                      struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes);
     242             : static int bdev_nvme_io_passthru(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
     243             :                                  void *buf, size_t nbytes);
     244             : static int bdev_nvme_io_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
     245             :                                     void *buf, size_t nbytes, void *md_buf, size_t md_len);
     246             : static int bdev_nvme_iov_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
     247             :                                      struct iovec *iov, int iovcnt, size_t nbytes,
     248             :                                      void *md_buf, size_t md_len);
     249             : static void bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch,
     250             :                             struct nvme_bdev_io *bio, struct nvme_bdev_io *bio_to_abort);
     251             : static void bdev_nvme_reset_io(struct nvme_bdev *nbdev, struct nvme_bdev_io *bio);
     252             : static int bdev_nvme_reset_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
     253             : static int bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
     254             : static void remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr);
     255             : static int nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr);
     256             : 
     257             : static struct nvme_ns *nvme_ns_alloc(void);
     258             : static void nvme_ns_free(struct nvme_ns *ns);
     259             : 
     260             : static int
     261         176 : nvme_ns_cmp(struct nvme_ns *ns1, struct nvme_ns *ns2)
     262             : {
     263         176 :         return ns1->id < ns2->id ? -1 : ns1->id > ns2->id;
     264             : }
     265             : 
     266        1088 : RB_GENERATE_STATIC(nvme_ns_tree, nvme_ns, node, nvme_ns_cmp);
     267             : 
     268             : struct spdk_nvme_qpair *
     269           1 : bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch)
     270             : {
     271             :         struct nvme_ctrlr_channel *ctrlr_ch;
     272             : 
     273           1 :         assert(ctrlr_io_ch != NULL);
     274             : 
     275           1 :         ctrlr_ch = spdk_io_channel_get_ctx(ctrlr_io_ch);
     276             : 
     277           1 :         return ctrlr_ch->qpair->qpair;
     278             : }
     279             : 
     280             : static int
     281           0 : bdev_nvme_get_ctx_size(void)
     282             : {
     283           0 :         return sizeof(struct nvme_bdev_io);
     284             : }
     285             : 
     286             : static struct spdk_bdev_module nvme_if = {
     287             :         .name = "nvme",
     288             :         .async_fini = true,
     289             :         .module_init = bdev_nvme_library_init,
     290             :         .module_fini = bdev_nvme_library_fini,
     291             :         .config_json = bdev_nvme_config_json,
     292             :         .get_ctx_size = bdev_nvme_get_ctx_size,
     293             : 
     294             : };
     295           1 : SPDK_BDEV_MODULE_REGISTER(nvme, &nvme_if)
     296             : 
     297             : struct nvme_bdev_ctrlrs g_nvme_bdev_ctrlrs = TAILQ_HEAD_INITIALIZER(g_nvme_bdev_ctrlrs);
     298             : pthread_mutex_t g_bdev_nvme_mutex = PTHREAD_MUTEX_INITIALIZER;
     299             : bool g_bdev_nvme_module_finish;
     300             : 
     301             : struct nvme_bdev_ctrlr *
     302         334 : nvme_bdev_ctrlr_get_by_name(const char *name)
     303             : {
     304             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
     305             : 
     306         334 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
     307         172 :                 if (strcmp(name, nbdev_ctrlr->name) == 0) {
     308         172 :                         break;
     309             :                 }
     310           0 :         }
     311             : 
     312         334 :         return nbdev_ctrlr;
     313             : }
     314             : 
     315             : static struct nvme_ctrlr *
     316          59 : nvme_bdev_ctrlr_get_ctrlr(struct nvme_bdev_ctrlr *nbdev_ctrlr,
     317             :                           const struct spdk_nvme_transport_id *trid, const char *hostnqn)
     318             : {
     319             :         const struct spdk_nvme_ctrlr_opts *opts;
     320             :         struct nvme_ctrlr *nvme_ctrlr;
     321             : 
     322         100 :         TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
     323          75 :                 opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
     324          75 :                 if (spdk_nvme_transport_id_compare(trid, &nvme_ctrlr->active_path_id->trid) == 0 &&
     325          34 :                     strcmp(hostnqn, opts->hostnqn) == 0) {
     326          34 :                         break;
     327             :                 }
     328          41 :         }
     329             : 
     330          59 :         return nvme_ctrlr;
     331             : }
     332             : 
     333             : struct nvme_ctrlr *
     334           0 : nvme_bdev_ctrlr_get_ctrlr_by_id(struct nvme_bdev_ctrlr *nbdev_ctrlr,
     335             :                                 uint16_t cntlid)
     336             : {
     337             :         struct nvme_ctrlr *nvme_ctrlr;
     338             :         const struct spdk_nvme_ctrlr_data *cdata;
     339             : 
     340           0 :         TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
     341           0 :                 cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
     342           0 :                 if (cdata->cntlid == cntlid) {
     343           0 :                         break;
     344             :                 }
     345           0 :         }
     346             : 
     347           0 :         return nvme_ctrlr;
     348             : }
     349             : 
     350             : static struct nvme_bdev *
     351          75 : nvme_bdev_ctrlr_get_bdev(struct nvme_bdev_ctrlr *nbdev_ctrlr, uint32_t nsid)
     352             : {
     353             :         struct nvme_bdev *nbdev;
     354             : 
     355          75 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     356         109 :         TAILQ_FOREACH(nbdev, &nbdev_ctrlr->bdevs, tailq) {
     357          69 :                 if (nbdev->nsid == nsid) {
     358          35 :                         break;
     359             :                 }
     360          34 :         }
     361          75 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     362             : 
     363          75 :         return nbdev;
     364             : }
     365             : 
     366             : struct nvme_ns *
     367         145 : nvme_ctrlr_get_ns(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid)
     368             : {
     369             :         struct nvme_ns ns;
     370             : 
     371         145 :         assert(nsid > 0);
     372             : 
     373         145 :         ns.id = nsid;
     374         145 :         return RB_FIND(nvme_ns_tree, &nvme_ctrlr->namespaces, &ns);
     375             : }
     376             : 
     377             : struct nvme_ns *
     378         164 : nvme_ctrlr_get_first_active_ns(struct nvme_ctrlr *nvme_ctrlr)
     379             : {
     380         164 :         return RB_MIN(nvme_ns_tree, &nvme_ctrlr->namespaces);
     381             : }
     382             : 
     383             : struct nvme_ns *
     384          74 : nvme_ctrlr_get_next_active_ns(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *ns)
     385             : {
     386          74 :         if (ns == NULL) {
     387           0 :                 return NULL;
     388             :         }
     389             : 
     390          74 :         return RB_NEXT(nvme_ns_tree, &nvme_ctrlr->namespaces, ns);
     391          74 : }
     392             : 
     393             : static struct nvme_ctrlr *
     394          53 : nvme_ctrlr_get(const struct spdk_nvme_transport_id *trid, const char *hostnqn)
     395             : {
     396             :         struct nvme_bdev_ctrlr  *nbdev_ctrlr;
     397          53 :         struct nvme_ctrlr       *nvme_ctrlr = NULL;
     398             : 
     399          53 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     400          72 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
     401          19 :                 nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, trid, hostnqn);
     402          19 :                 if (nvme_ctrlr != NULL) {
     403           0 :                         break;
     404             :                 }
     405          19 :         }
     406          53 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     407             : 
     408          53 :         return nvme_ctrlr;
     409             : }
     410             : 
     411             : struct nvme_ctrlr *
     412         127 : nvme_ctrlr_get_by_name(const char *name)
     413             : {
     414             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
     415         127 :         struct nvme_ctrlr *nvme_ctrlr = NULL;
     416             : 
     417         127 :         if (name == NULL) {
     418           0 :                 return NULL;
     419             :         }
     420             : 
     421         127 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     422         127 :         nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
     423         127 :         if (nbdev_ctrlr != NULL) {
     424          61 :                 nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
     425          61 :         }
     426         127 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     427             : 
     428         127 :         return nvme_ctrlr;
     429         127 : }
     430             : 
     431             : void
     432           0 : nvme_bdev_ctrlr_for_each(nvme_bdev_ctrlr_for_each_fn fn, void *ctx)
     433             : {
     434             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
     435             : 
     436           0 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     437           0 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
     438           0 :                 fn(nbdev_ctrlr, ctx);
     439           0 :         }
     440           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     441           0 : }
     442             : 
     443             : struct nvme_ctrlr_channel_iter {
     444             :         nvme_ctrlr_for_each_channel_msg fn;
     445             :         nvme_ctrlr_for_each_channel_done cpl;
     446             :         struct spdk_io_channel_iter *i;
     447             :         void *ctx;
     448             : };
     449             : 
     450             : void
     451         170 : nvme_ctrlr_for_each_channel_continue(struct nvme_ctrlr_channel_iter *iter, int status)
     452             : {
     453         170 :         spdk_for_each_channel_continue(iter->i, status);
     454         170 : }
     455             : 
     456             : static void
     457         170 : nvme_ctrlr_each_channel_msg(struct spdk_io_channel_iter *i)
     458             : {
     459         170 :         struct nvme_ctrlr_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
     460         170 :         struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
     461         170 :         struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
     462         170 :         struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(ch);
     463             : 
     464         170 :         iter->i = i;
     465         170 :         iter->fn(iter, nvme_ctrlr, ctrlr_ch, iter->ctx);
     466         170 : }
     467             : 
     468             : static void
     469          99 : nvme_ctrlr_each_channel_cpl(struct spdk_io_channel_iter *i, int status)
     470             : {
     471          99 :         struct nvme_ctrlr_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
     472          99 :         struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
     473             : 
     474          99 :         iter->i = i;
     475          99 :         iter->cpl(nvme_ctrlr, iter->ctx, status);
     476             : 
     477          99 :         free(iter);
     478          99 : }
     479             : 
     480             : void
     481          99 : nvme_ctrlr_for_each_channel(struct nvme_ctrlr *nvme_ctrlr,
     482             :                             nvme_ctrlr_for_each_channel_msg fn, void *ctx,
     483             :                             nvme_ctrlr_for_each_channel_done cpl)
     484             : {
     485             :         struct nvme_ctrlr_channel_iter *iter;
     486             : 
     487          99 :         assert(nvme_ctrlr != NULL && fn != NULL);
     488             : 
     489          99 :         iter = calloc(1, sizeof(struct nvme_ctrlr_channel_iter));
     490          99 :         if (iter == NULL) {
     491           0 :                 SPDK_ERRLOG("Unable to allocate iterator\n");
     492           0 :                 assert(false);
     493             :                 return;
     494             :         }
     495             : 
     496          99 :         iter->fn = fn;
     497          99 :         iter->cpl = cpl;
     498          99 :         iter->ctx = ctx;
     499             : 
     500         198 :         spdk_for_each_channel(nvme_ctrlr, nvme_ctrlr_each_channel_msg,
     501          99 :                               iter, nvme_ctrlr_each_channel_cpl);
     502          99 : }
     503             : 
     504             : struct nvme_bdev_channel_iter {
     505             :         nvme_bdev_for_each_channel_msg fn;
     506             :         nvme_bdev_for_each_channel_done cpl;
     507             :         struct spdk_io_channel_iter *i;
     508             :         void *ctx;
     509             : };
     510             : 
     511             : void
     512          69 : nvme_bdev_for_each_channel_continue(struct nvme_bdev_channel_iter *iter, int status)
     513             : {
     514          69 :         spdk_for_each_channel_continue(iter->i, status);
     515          69 : }
     516             : 
     517             : static void
     518          69 : nvme_bdev_each_channel_msg(struct spdk_io_channel_iter *i)
     519             : {
     520          69 :         struct nvme_bdev_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
     521          69 :         struct nvme_bdev *nbdev = spdk_io_channel_iter_get_io_device(i);
     522          69 :         struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
     523          69 :         struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
     524             : 
     525          69 :         iter->i = i;
     526          69 :         iter->fn(iter, nbdev, nbdev_ch, iter->ctx);
     527          69 : }
     528             : 
     529             : static void
     530          60 : nvme_bdev_each_channel_cpl(struct spdk_io_channel_iter *i, int status)
     531             : {
     532          60 :         struct nvme_bdev_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
     533          60 :         struct nvme_bdev *nbdev = spdk_io_channel_iter_get_io_device(i);
     534             : 
     535          60 :         iter->i = i;
     536          60 :         iter->cpl(nbdev, iter->ctx, status);
     537             : 
     538          60 :         free(iter);
     539          60 : }
     540             : 
     541             : void
     542          60 : nvme_bdev_for_each_channel(struct nvme_bdev *nbdev,
     543             :                            nvme_bdev_for_each_channel_msg fn, void *ctx,
     544             :                            nvme_bdev_for_each_channel_done cpl)
     545             : {
     546             :         struct nvme_bdev_channel_iter *iter;
     547             : 
     548          60 :         assert(nbdev != NULL && fn != NULL);
     549             : 
     550          60 :         iter = calloc(1, sizeof(struct nvme_bdev_channel_iter));
     551          60 :         if (iter == NULL) {
     552           0 :                 SPDK_ERRLOG("Unable to allocate iterator\n");
     553           0 :                 assert(false);
     554             :                 return;
     555             :         }
     556             : 
     557          60 :         iter->fn = fn;
     558          60 :         iter->cpl = cpl;
     559          60 :         iter->ctx = ctx;
     560             : 
     561          60 :         spdk_for_each_channel(nbdev, nvme_bdev_each_channel_msg, iter,
     562             :                               nvme_bdev_each_channel_cpl);
     563          60 : }
     564             : 
     565             : void
     566           0 : nvme_bdev_dump_trid_json(const struct spdk_nvme_transport_id *trid, struct spdk_json_write_ctx *w)
     567             : {
     568             :         const char *trtype_str;
     569             :         const char *adrfam_str;
     570             : 
     571           0 :         trtype_str = spdk_nvme_transport_id_trtype_str(trid->trtype);
     572           0 :         if (trtype_str) {
     573           0 :                 spdk_json_write_named_string(w, "trtype", trtype_str);
     574           0 :         }
     575             : 
     576           0 :         adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
     577           0 :         if (adrfam_str) {
     578           0 :                 spdk_json_write_named_string(w, "adrfam", adrfam_str);
     579           0 :         }
     580             : 
     581           0 :         if (trid->traddr[0] != '\0') {
     582           0 :                 spdk_json_write_named_string(w, "traddr", trid->traddr);
     583           0 :         }
     584             : 
     585           0 :         if (trid->trsvcid[0] != '\0') {
     586           0 :                 spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
     587           0 :         }
     588             : 
     589           0 :         if (trid->subnqn[0] != '\0') {
     590           0 :                 spdk_json_write_named_string(w, "subnqn", trid->subnqn);
     591           0 :         }
     592           0 : }
     593             : 
     594             : static void
     595          61 : nvme_bdev_ctrlr_delete(struct nvme_bdev_ctrlr *nbdev_ctrlr,
     596             :                        struct nvme_ctrlr *nvme_ctrlr)
     597             : {
     598             :         SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_delete, nvme_ctrlr->nbdev_ctrlr->name);
     599          61 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     600             : 
     601          61 :         TAILQ_REMOVE(&nbdev_ctrlr->ctrlrs, nvme_ctrlr, tailq);
     602          61 :         if (!TAILQ_EMPTY(&nbdev_ctrlr->ctrlrs)) {
     603          15 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
     604             : 
     605          15 :                 return;
     606             :         }
     607          46 :         TAILQ_REMOVE(&g_nvme_bdev_ctrlrs, nbdev_ctrlr, tailq);
     608             : 
     609          46 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     610             : 
     611          46 :         assert(TAILQ_EMPTY(&nbdev_ctrlr->bdevs));
     612             : 
     613          46 :         free(nbdev_ctrlr->name);
     614          46 :         free(nbdev_ctrlr);
     615          61 : }
     616             : 
     617             : static void
     618          62 : _nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
     619             : {
     620             :         struct nvme_path_id *path_id, *tmp_path;
     621             :         struct nvme_ns *ns, *tmp_ns;
     622             : 
     623          62 :         free(nvme_ctrlr->copied_ana_desc);
     624          62 :         spdk_free(nvme_ctrlr->ana_log_page);
     625             : 
     626          62 :         if (nvme_ctrlr->opal_dev) {
     627           0 :                 spdk_opal_dev_destruct(nvme_ctrlr->opal_dev);
     628           0 :                 nvme_ctrlr->opal_dev = NULL;
     629           0 :         }
     630             : 
     631          62 :         if (nvme_ctrlr->nbdev_ctrlr) {
     632          61 :                 nvme_bdev_ctrlr_delete(nvme_ctrlr->nbdev_ctrlr, nvme_ctrlr);
     633          61 :         }
     634             : 
     635          62 :         RB_FOREACH_SAFE(ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp_ns) {
     636           0 :                 RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, ns);
     637           0 :                 nvme_ns_free(ns);
     638           0 :         }
     639             : 
     640         124 :         TAILQ_FOREACH_SAFE(path_id, &nvme_ctrlr->trids, link, tmp_path) {
     641          62 :                 TAILQ_REMOVE(&nvme_ctrlr->trids, path_id, link);
     642          62 :                 free(path_id);
     643          62 :         }
     644             : 
     645          62 :         pthread_mutex_destroy(&nvme_ctrlr->mutex);
     646          62 :         spdk_keyring_put_key(nvme_ctrlr->psk);
     647          62 :         spdk_keyring_put_key(nvme_ctrlr->dhchap_key);
     648          62 :         spdk_keyring_put_key(nvme_ctrlr->dhchap_ctrlr_key);
     649          62 :         free(nvme_ctrlr);
     650             : 
     651          62 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
     652          62 :         if (g_bdev_nvme_module_finish && TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
     653           0 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
     654           0 :                 spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
     655           0 :                 spdk_bdev_module_fini_done();
     656           0 :                 return;
     657             :         }
     658          62 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
     659          62 : }
     660             : 
     661             : static int
     662          62 : nvme_detach_poller(void *arg)
     663             : {
     664          62 :         struct nvme_ctrlr *nvme_ctrlr = arg;
     665             :         int rc;
     666             : 
     667          62 :         rc = spdk_nvme_detach_poll_async(nvme_ctrlr->detach_ctx);
     668          62 :         if (rc != -EAGAIN) {
     669          62 :                 spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
     670          62 :                 _nvme_ctrlr_delete(nvme_ctrlr);
     671          62 :         }
     672             : 
     673          62 :         return SPDK_POLLER_BUSY;
     674             : }
     675             : 
     676             : static void
     677          62 : nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
     678             : {
     679             :         int rc;
     680             : 
     681          62 :         spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
     682             : 
     683          62 :         if (spdk_interrupt_mode_is_enabled()) {
     684           0 :                 spdk_interrupt_unregister(&nvme_ctrlr->intr);
     685           0 :         }
     686             : 
     687             :         /* First, unregister the adminq poller, as the driver will poll adminq if necessary */
     688          62 :         spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
     689             : 
     690             :         /* If we got here, the reset/detach poller cannot be active */
     691          62 :         assert(nvme_ctrlr->reset_detach_poller == NULL);
     692          62 :         nvme_ctrlr->reset_detach_poller = SPDK_POLLER_REGISTER(nvme_detach_poller,
     693             :                                           nvme_ctrlr, 1000);
     694          62 :         if (nvme_ctrlr->reset_detach_poller == NULL) {
     695           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to register detach poller\n");
     696           0 :                 goto error;
     697             :         }
     698             : 
     699          62 :         rc = spdk_nvme_detach_async(nvme_ctrlr->ctrlr, &nvme_ctrlr->detach_ctx);
     700          62 :         if (rc != 0) {
     701           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to detach the NVMe controller\n");
     702           0 :                 goto error;
     703             :         }
     704             : 
     705          62 :         return;
     706             : error:
     707             :         /* We don't have a good way to handle errors here, so just do what we can and delete the
     708             :          * controller without detaching the underlying NVMe device.
     709             :          */
     710           0 :         spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
     711           0 :         _nvme_ctrlr_delete(nvme_ctrlr);
     712          62 : }
     713             : 
     714             : static void
     715          61 : nvme_ctrlr_unregister_cb(void *io_device)
     716             : {
     717          61 :         struct nvme_ctrlr *nvme_ctrlr = io_device;
     718             : 
     719          61 :         nvme_ctrlr_delete(nvme_ctrlr);
     720          61 : }
     721             : 
     722             : static void
     723          61 : nvme_ctrlr_unregister(void *ctx)
     724             : {
     725          61 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
     726             : 
     727          61 :         spdk_io_device_unregister(nvme_ctrlr, nvme_ctrlr_unregister_cb);
     728          61 : }
     729             : 
     730             : static bool
     731         252 : nvme_ctrlr_can_be_unregistered(struct nvme_ctrlr *nvme_ctrlr)
     732             : {
     733         252 :         if (!nvme_ctrlr->destruct) {
     734         131 :                 return false;
     735             :         }
     736             : 
     737         121 :         if (nvme_ctrlr->ref > 0) {
     738          60 :                 return false;
     739             :         }
     740             : 
     741          61 :         if (nvme_ctrlr->resetting) {
     742           0 :                 return false;
     743             :         }
     744             : 
     745          61 :         if (nvme_ctrlr->ana_log_page_updating) {
     746           0 :                 return false;
     747             :         }
     748             : 
     749          61 :         if (nvme_ctrlr->io_path_cache_clearing) {
     750           0 :                 return false;
     751             :         }
     752             : 
     753          61 :         return true;
     754         252 : }
     755             : 
     756             : static void
     757         172 : nvme_ctrlr_put_ref(struct nvme_ctrlr *nvme_ctrlr)
     758             : {
     759         172 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
     760             :         SPDK_DTRACE_PROBE2(bdev_nvme_ctrlr_release, nvme_ctrlr->nbdev_ctrlr->name, nvme_ctrlr->ref);
     761             : 
     762         172 :         assert(nvme_ctrlr->ref > 0);
     763         172 :         nvme_ctrlr->ref--;
     764             : 
     765         172 :         if (!nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
     766         111 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
     767         111 :                 return;
     768             :         }
     769             : 
     770          61 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
     771             : 
     772          61 :         spdk_thread_exec_msg(nvme_ctrlr->thread, nvme_ctrlr_unregister, nvme_ctrlr);
     773         172 : }
     774             : 
     775             : static void
     776         111 : nvme_ctrlr_get_ref(struct nvme_ctrlr *nvme_ctrlr)
     777             : {
     778         111 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
     779         111 :         nvme_ctrlr->ref++;
     780         111 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
     781         111 : }
     782             : 
     783             : static void
     784         259 : bdev_nvme_clear_current_io_path(struct nvme_bdev_channel *nbdev_ch)
     785             : {
     786         259 :         nbdev_ch->current_io_path = NULL;
     787         259 :         nbdev_ch->rr_counter = 0;
     788         259 : }
     789             : 
     790             : static struct nvme_io_path *
     791           8 : _bdev_nvme_get_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
     792             : {
     793             :         struct nvme_io_path *io_path;
     794             : 
     795          16 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
     796          15 :                 if (io_path->nvme_ns == nvme_ns) {
     797           7 :                         break;
     798             :                 }
     799           8 :         }
     800             : 
     801           8 :         return io_path;
     802             : }
     803             : 
     804             : static struct nvme_io_path *
     805          39 : nvme_io_path_alloc(void)
     806             : {
     807             :         struct nvme_io_path *io_path;
     808             : 
     809          39 :         io_path = calloc(1, sizeof(*io_path));
     810          39 :         if (io_path == NULL) {
     811           0 :                 SPDK_ERRLOG("Failed to alloc io_path.\n");
     812           0 :                 return NULL;
     813             :         }
     814             : 
     815          39 :         if (g_opts.io_path_stat) {
     816           0 :                 io_path->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
     817           0 :                 if (io_path->stat == NULL) {
     818           0 :                         free(io_path);
     819           0 :                         SPDK_ERRLOG("Failed to alloc io_path stat.\n");
     820           0 :                         return NULL;
     821             :                 }
     822           0 :                 spdk_bdev_reset_io_stat(io_path->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
     823           0 :         }
     824             : 
     825          39 :         return io_path;
     826          39 : }
     827             : 
     828             : static void
     829          39 : nvme_io_path_free(struct nvme_io_path *io_path)
     830             : {
     831          39 :         free(io_path->stat);
     832          39 :         free(io_path);
     833          39 : }
     834             : 
     835             : static int
     836          39 : _bdev_nvme_add_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
     837             : {
     838             :         struct nvme_io_path *io_path;
     839             :         struct spdk_io_channel *ch;
     840             :         struct nvme_ctrlr_channel *ctrlr_ch;
     841             :         struct nvme_qpair *nvme_qpair;
     842             : 
     843          39 :         io_path = nvme_io_path_alloc();
     844          39 :         if (io_path == NULL) {
     845           0 :                 return -ENOMEM;
     846             :         }
     847             : 
     848          39 :         io_path->nvme_ns = nvme_ns;
     849             : 
     850          39 :         ch = spdk_get_io_channel(nvme_ns->ctrlr);
     851          39 :         if (ch == NULL) {
     852           0 :                 nvme_io_path_free(io_path);
     853           0 :                 SPDK_ERRLOG("Failed to alloc io_channel.\n");
     854           0 :                 return -ENOMEM;
     855             :         }
     856             : 
     857          39 :         ctrlr_ch = spdk_io_channel_get_ctx(ch);
     858             : 
     859          39 :         nvme_qpair = ctrlr_ch->qpair;
     860          39 :         assert(nvme_qpair != NULL);
     861             : 
     862          39 :         io_path->qpair = nvme_qpair;
     863          39 :         TAILQ_INSERT_TAIL(&nvme_qpair->io_path_list, io_path, tailq);
     864             : 
     865          39 :         io_path->nbdev_ch = nbdev_ch;
     866          39 :         STAILQ_INSERT_TAIL(&nbdev_ch->io_path_list, io_path, stailq);
     867             : 
     868          39 :         bdev_nvme_clear_current_io_path(nbdev_ch);
     869             : 
     870          39 :         return 0;
     871          39 : }
     872             : 
     873             : static void
     874          39 : bdev_nvme_clear_retry_io_path(struct nvme_bdev_channel *nbdev_ch,
     875             :                               struct nvme_io_path *io_path)
     876             : {
     877             :         struct nvme_bdev_io *bio;
     878             : 
     879          40 :         TAILQ_FOREACH(bio, &nbdev_ch->retry_io_list, retry_link) {
     880           1 :                 if (bio->io_path == io_path) {
     881           1 :                         bio->io_path = NULL;
     882           1 :                 }
     883           1 :         }
     884          39 : }
     885             : 
     886             : static void
     887          39 : _bdev_nvme_delete_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_path *io_path)
     888             : {
     889             :         struct spdk_io_channel *ch;
     890             :         struct nvme_qpair *nvme_qpair;
     891             :         struct nvme_ctrlr_channel *ctrlr_ch;
     892             :         struct nvme_bdev *nbdev;
     893             : 
     894          39 :         nbdev = spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(nbdev_ch));
     895             : 
     896             :         /* Add the statistics to nvme_ns before this path is destroyed. */
     897          39 :         pthread_mutex_lock(&nbdev->mutex);
     898          39 :         if (nbdev->ref != 0 && io_path->nvme_ns->stat != NULL && io_path->stat != NULL) {
     899           0 :                 spdk_bdev_add_io_stat(io_path->nvme_ns->stat, io_path->stat);
     900           0 :         }
     901          39 :         pthread_mutex_unlock(&nbdev->mutex);
     902             : 
     903          39 :         bdev_nvme_clear_current_io_path(nbdev_ch);
     904          39 :         bdev_nvme_clear_retry_io_path(nbdev_ch, io_path);
     905             : 
     906          41 :         STAILQ_REMOVE(&nbdev_ch->io_path_list, io_path, nvme_io_path, stailq);
     907          39 :         io_path->nbdev_ch = NULL;
     908             : 
     909          39 :         nvme_qpair = io_path->qpair;
     910          39 :         assert(nvme_qpair != NULL);
     911             : 
     912          39 :         ctrlr_ch = nvme_qpair->ctrlr_ch;
     913          39 :         assert(ctrlr_ch != NULL);
     914             : 
     915          39 :         ch = spdk_io_channel_from_ctx(ctrlr_ch);
     916          39 :         spdk_put_io_channel(ch);
     917             : 
     918             :         /* After an io_path is removed, I/Os submitted to it may complete and update statistics
     919             :          * of the io_path. To avoid heap-use-after-free error from this case, do not free the
     920             :          * io_path here but free the io_path when the associated qpair is freed. It is ensured
     921             :          * that all I/Os submitted to the io_path are completed when the associated qpair is freed.
     922             :          */
     923          39 : }
     924             : 
     925             : static void
     926          26 : _bdev_nvme_delete_io_paths(struct nvme_bdev_channel *nbdev_ch)
     927             : {
     928             :         struct nvme_io_path *io_path, *tmp_io_path;
     929             : 
     930          63 :         STAILQ_FOREACH_SAFE(io_path, &nbdev_ch->io_path_list, stailq, tmp_io_path) {
     931          37 :                 _bdev_nvme_delete_io_path(nbdev_ch, io_path);
     932          37 :         }
     933          26 : }
     934             : 
     935             : static int
     936          26 : bdev_nvme_create_bdev_channel_cb(void *io_device, void *ctx_buf)
     937             : {
     938          26 :         struct nvme_bdev_channel *nbdev_ch = ctx_buf;
     939          26 :         struct nvme_bdev *nbdev = io_device;
     940             :         struct nvme_ns *nvme_ns;
     941             :         int rc;
     942             : 
     943          26 :         STAILQ_INIT(&nbdev_ch->io_path_list);
     944          26 :         TAILQ_INIT(&nbdev_ch->retry_io_list);
     945             : 
     946          26 :         pthread_mutex_lock(&nbdev->mutex);
     947             : 
     948          26 :         nbdev_ch->mp_policy = nbdev->mp_policy;
     949          26 :         nbdev_ch->mp_selector = nbdev->mp_selector;
     950          26 :         nbdev_ch->rr_min_io = nbdev->rr_min_io;
     951             : 
     952          63 :         TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
     953          37 :                 rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns);
     954          37 :                 if (rc != 0) {
     955           0 :                         pthread_mutex_unlock(&nbdev->mutex);
     956             : 
     957           0 :                         _bdev_nvme_delete_io_paths(nbdev_ch);
     958           0 :                         return rc;
     959             :                 }
     960          37 :         }
     961          26 :         pthread_mutex_unlock(&nbdev->mutex);
     962             : 
     963          26 :         return 0;
     964          26 : }
     965             : 
     966             : /* If cpl != NULL, complete the bdev_io with nvme status based on 'cpl'.
     967             :  * If cpl == NULL, complete the bdev_io with bdev status based on 'status'.
     968             :  */
     969             : static inline void
     970          58 : __bdev_nvme_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status,
     971             :                         const struct spdk_nvme_cpl *cpl)
     972             : {
     973          58 :         spdk_trace_record(TRACE_BDEV_NVME_IO_DONE, 0, 0, (uintptr_t)bdev_io->driver_ctx,
     974             :                           (uintptr_t)bdev_io);
     975          58 :         if (cpl) {
     976          29 :                 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc);
     977          29 :         } else {
     978          29 :                 spdk_bdev_io_complete(bdev_io, status);
     979             :         }
     980          58 : }
     981             : 
     982             : static void bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch);
     983             : 
     984             : static void
     985          26 : bdev_nvme_destroy_bdev_channel_cb(void *io_device, void *ctx_buf)
     986             : {
     987          26 :         struct nvme_bdev_channel *nbdev_ch = ctx_buf;
     988             : 
     989          26 :         bdev_nvme_abort_retry_ios(nbdev_ch);
     990          26 :         _bdev_nvme_delete_io_paths(nbdev_ch);
     991          26 : }
     992             : 
     993             : static inline bool
     994          62 : bdev_nvme_io_type_is_admin(enum spdk_bdev_io_type io_type)
     995             : {
     996          62 :         switch (io_type) {
     997             :         case SPDK_BDEV_IO_TYPE_RESET:
     998             :         case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
     999             :         case SPDK_BDEV_IO_TYPE_ABORT:
    1000           5 :                 return true;
    1001             :         default:
    1002          57 :                 break;
    1003             :         }
    1004             : 
    1005          57 :         return false;
    1006          62 : }
    1007             : 
    1008             : static inline bool
    1009          98 : nvme_ns_is_active(struct nvme_ns *nvme_ns)
    1010             : {
    1011          98 :         if (spdk_unlikely(nvme_ns->ana_state_updating)) {
    1012           1 :                 return false;
    1013             :         }
    1014             : 
    1015          97 :         if (spdk_unlikely(nvme_ns->ns == NULL)) {
    1016           0 :                 return false;
    1017             :         }
    1018             : 
    1019          97 :         return true;
    1020          98 : }
    1021             : 
    1022             : static inline bool
    1023          86 : nvme_ns_is_accessible(struct nvme_ns *nvme_ns)
    1024             : {
    1025          86 :         if (spdk_unlikely(!nvme_ns_is_active(nvme_ns))) {
    1026           1 :                 return false;
    1027             :         }
    1028             : 
    1029          85 :         switch (nvme_ns->ana_state) {
    1030             :         case SPDK_NVME_ANA_OPTIMIZED_STATE:
    1031             :         case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
    1032          76 :                 return true;
    1033             :         default:
    1034           9 :                 break;
    1035             :         }
    1036             : 
    1037           9 :         return false;
    1038          86 : }
    1039             : 
    1040             : static inline bool
    1041         128 : nvme_qpair_is_connected(struct nvme_qpair *nvme_qpair)
    1042             : {
    1043         128 :         if (spdk_unlikely(nvme_qpair->qpair == NULL)) {
    1044          23 :                 return false;
    1045             :         }
    1046             : 
    1047         105 :         if (spdk_unlikely(spdk_nvme_qpair_get_failure_reason(nvme_qpair->qpair) !=
    1048             :                           SPDK_NVME_QPAIR_FAILURE_NONE)) {
    1049           2 :                 return false;
    1050             :         }
    1051             : 
    1052         103 :         if (spdk_unlikely(nvme_qpair->ctrlr_ch->reset_iter != NULL)) {
    1053           0 :                 return false;
    1054             :         }
    1055             : 
    1056         103 :         return true;
    1057         128 : }
    1058             : 
    1059             : static inline bool
    1060         102 : nvme_io_path_is_available(struct nvme_io_path *io_path)
    1061             : {
    1062         102 :         if (spdk_unlikely(!nvme_qpair_is_connected(io_path->qpair))) {
    1063          16 :                 return false;
    1064             :         }
    1065             : 
    1066          86 :         if (spdk_unlikely(!nvme_ns_is_accessible(io_path->nvme_ns))) {
    1067          10 :                 return false;
    1068             :         }
    1069             : 
    1070          76 :         return true;
    1071         102 : }
    1072             : 
    1073             : static inline bool
    1074           9 : nvme_ctrlr_is_failed(struct nvme_ctrlr *nvme_ctrlr)
    1075             : {
    1076           9 :         if (nvme_ctrlr->destruct) {
    1077           0 :                 return true;
    1078             :         }
    1079             : 
    1080           9 :         if (nvme_ctrlr->fast_io_fail_timedout) {
    1081           2 :                 return true;
    1082             :         }
    1083             : 
    1084           7 :         if (nvme_ctrlr->resetting) {
    1085           5 :                 if (nvme_ctrlr->opts.reconnect_delay_sec != 0) {
    1086           5 :                         return false;
    1087             :                 } else {
    1088           0 :                         return true;
    1089             :                 }
    1090             :         }
    1091             : 
    1092           2 :         if (nvme_ctrlr->reconnect_is_delayed) {
    1093           2 :                 return false;
    1094             :         }
    1095             : 
    1096           0 :         if (nvme_ctrlr->disabled) {
    1097           0 :                 return true;
    1098             :         }
    1099             : 
    1100           0 :         if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
    1101           0 :                 return true;
    1102             :         } else {
    1103           0 :                 return false;
    1104             :         }
    1105           9 : }
    1106             : 
    1107             : static bool
    1108          20 : nvme_ctrlr_is_available(struct nvme_ctrlr *nvme_ctrlr)
    1109             : {
    1110          20 :         if (nvme_ctrlr->destruct) {
    1111           0 :                 return false;
    1112             :         }
    1113             : 
    1114          20 :         if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
    1115           3 :                 return false;
    1116             :         }
    1117             : 
    1118          17 :         if (nvme_ctrlr->resetting || nvme_ctrlr->reconnect_is_delayed) {
    1119           1 :                 return false;
    1120             :         }
    1121             : 
    1122          16 :         if (nvme_ctrlr->disabled) {
    1123           0 :                 return false;
    1124             :         }
    1125             : 
    1126          16 :         return true;
    1127          20 : }
    1128             : 
    1129             : /* Simulate circular linked list. */
    1130             : static inline struct nvme_io_path *
    1131          99 : nvme_io_path_get_next(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_path *prev_path)
    1132             : {
    1133             :         struct nvme_io_path *next_path;
    1134             : 
    1135          99 :         if (prev_path != NULL) {
    1136          39 :                 next_path = STAILQ_NEXT(prev_path, stailq);
    1137          39 :                 if (next_path != NULL) {
    1138          14 :                         return next_path;
    1139             :                 }
    1140          25 :         }
    1141             : 
    1142          85 :         return STAILQ_FIRST(&nbdev_ch->io_path_list);
    1143          99 : }
    1144             : 
    1145             : static struct nvme_io_path *
    1146          67 : _bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
    1147             : {
    1148          67 :         struct nvme_io_path *io_path, *start, *non_optimized = NULL;
    1149             : 
    1150          67 :         start = nvme_io_path_get_next(nbdev_ch, nbdev_ch->current_io_path);
    1151             : 
    1152          67 :         io_path = start;
    1153          67 :         do {
    1154          79 :                 if (spdk_likely(nvme_io_path_is_available(io_path))) {
    1155          57 :                         switch (io_path->nvme_ns->ana_state) {
    1156             :                         case SPDK_NVME_ANA_OPTIMIZED_STATE:
    1157          47 :                                 nbdev_ch->current_io_path = io_path;
    1158          47 :                                 return io_path;
    1159             :                         case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
    1160          10 :                                 if (non_optimized == NULL) {
    1161           7 :                                         non_optimized = io_path;
    1162           7 :                                 }
    1163          10 :                                 break;
    1164             :                         default:
    1165           0 :                                 assert(false);
    1166             :                                 break;
    1167             :                         }
    1168          10 :                 }
    1169          32 :                 io_path = nvme_io_path_get_next(nbdev_ch, io_path);
    1170          32 :         } while (io_path != start);
    1171             : 
    1172          20 :         if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
    1173             :                 /* We come here only if there is no optimized path. Cache even non_optimized
    1174             :                  * path for load balance across multiple non_optimized paths.
    1175             :                  */
    1176           1 :                 nbdev_ch->current_io_path = non_optimized;
    1177           1 :         }
    1178             : 
    1179          20 :         return non_optimized;
    1180          67 : }
    1181             : 
    1182             : static struct nvme_io_path *
    1183           4 : _bdev_nvme_find_io_path_min_qd(struct nvme_bdev_channel *nbdev_ch)
    1184             : {
    1185             :         struct nvme_io_path *io_path;
    1186           4 :         struct nvme_io_path *optimized = NULL, *non_optimized = NULL;
    1187           4 :         uint32_t opt_min_qd = UINT32_MAX, non_opt_min_qd = UINT32_MAX;
    1188             :         uint32_t num_outstanding_reqs;
    1189             : 
    1190          16 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    1191          12 :                 if (spdk_unlikely(!nvme_qpair_is_connected(io_path->qpair))) {
    1192             :                         /* The device is currently resetting. */
    1193           0 :                         continue;
    1194             :                 }
    1195             : 
    1196          12 :                 if (spdk_unlikely(!nvme_ns_is_active(io_path->nvme_ns))) {
    1197           0 :                         continue;
    1198             :                 }
    1199             : 
    1200          12 :                 num_outstanding_reqs = spdk_nvme_qpair_get_num_outstanding_reqs(io_path->qpair->qpair);
    1201          12 :                 switch (io_path->nvme_ns->ana_state) {
    1202             :                 case SPDK_NVME_ANA_OPTIMIZED_STATE:
    1203           6 :                         if (num_outstanding_reqs < opt_min_qd) {
    1204           5 :                                 opt_min_qd = num_outstanding_reqs;
    1205           5 :                                 optimized = io_path;
    1206           5 :                         }
    1207           6 :                         break;
    1208             :                 case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
    1209           3 :                         if (num_outstanding_reqs < non_opt_min_qd) {
    1210           3 :                                 non_opt_min_qd = num_outstanding_reqs;
    1211           3 :                                 non_optimized = io_path;
    1212           3 :                         }
    1213           3 :                         break;
    1214             :                 default:
    1215           3 :                         break;
    1216             :                 }
    1217          12 :         }
    1218             : 
    1219             :         /* don't cache io path for BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH selector */
    1220           4 :         if (optimized != NULL) {
    1221           3 :                 return optimized;
    1222             :         }
    1223             : 
    1224           1 :         return non_optimized;
    1225           4 : }
    1226             : 
    1227             : static inline struct nvme_io_path *
    1228         105 : bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
    1229             : {
    1230         105 :         if (spdk_likely(nbdev_ch->current_io_path != NULL)) {
    1231          41 :                 if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE) {
    1232          31 :                         return nbdev_ch->current_io_path;
    1233          10 :                 } else if (nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
    1234          10 :                         if (++nbdev_ch->rr_counter < nbdev_ch->rr_min_io) {
    1235           3 :                                 return nbdev_ch->current_io_path;
    1236             :                         }
    1237           7 :                         nbdev_ch->rr_counter = 0;
    1238           7 :                 }
    1239           7 :         }
    1240             : 
    1241          71 :         if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE ||
    1242          14 :             nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
    1243          67 :                 return _bdev_nvme_find_io_path(nbdev_ch);
    1244             :         } else {
    1245           4 :                 return _bdev_nvme_find_io_path_min_qd(nbdev_ch);
    1246             :         }
    1247         105 : }
    1248             : 
    1249             : /* Return true if there is any io_path whose qpair is active or ctrlr is not failed,
    1250             :  * or false otherwise.
    1251             :  *
    1252             :  * If any io_path has an active qpair but find_io_path() returned NULL, its namespace
    1253             :  * is likely to be non-accessible now but may become accessible.
    1254             :  *
    1255             :  * If any io_path has an unfailed ctrlr but find_io_path() returned NULL, the ctrlr
    1256             :  * is likely to be resetting now but the reset may succeed. A ctrlr is set to unfailed
    1257             :  * when starting to reset it but it is set to failed when the reset failed. Hence, if
    1258             :  * a ctrlr is unfailed, it is likely that it works fine or is resetting.
    1259             :  */
    1260             : static bool
    1261          15 : any_io_path_may_become_available(struct nvme_bdev_channel *nbdev_ch)
    1262             : {
    1263             :         struct nvme_io_path *io_path;
    1264             : 
    1265          15 :         if (nbdev_ch->resetting) {
    1266           1 :                 return false;
    1267             :         }
    1268             : 
    1269          16 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    1270          14 :                 if (io_path->nvme_ns->ana_transition_timedout) {
    1271           0 :                         continue;
    1272             :                 }
    1273             : 
    1274          14 :                 if (nvme_qpair_is_connected(io_path->qpair) ||
    1275           9 :                     !nvme_ctrlr_is_failed(io_path->qpair->ctrlr)) {
    1276          12 :                         return true;
    1277             :                 }
    1278           2 :         }
    1279             : 
    1280           2 :         return false;
    1281          15 : }
    1282             : 
    1283             : static void
    1284          14 : bdev_nvme_retry_io(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io)
    1285             : {
    1286          14 :         struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
    1287             :         struct spdk_io_channel *ch;
    1288             : 
    1289          14 :         if (nbdev_io->io_path != NULL && nvme_io_path_is_available(nbdev_io->io_path)) {
    1290           3 :                 _bdev_nvme_submit_request(nbdev_ch, bdev_io);
    1291           3 :         } else {
    1292          11 :                 ch = spdk_io_channel_from_ctx(nbdev_ch);
    1293          11 :                 bdev_nvme_submit_request(ch, bdev_io);
    1294             :         }
    1295          14 : }
    1296             : 
    1297             : static int
    1298          14 : bdev_nvme_retry_ios(void *arg)
    1299             : {
    1300          14 :         struct nvme_bdev_channel *nbdev_ch = arg;
    1301             :         struct nvme_bdev_io *bio, *tmp_bio;
    1302             :         uint64_t now, delay_us;
    1303             : 
    1304          14 :         now = spdk_get_ticks();
    1305             : 
    1306          28 :         TAILQ_FOREACH_SAFE(bio, &nbdev_ch->retry_io_list, retry_link, tmp_bio) {
    1307          15 :                 if (bio->retry_ticks > now) {
    1308           1 :                         break;
    1309             :                 }
    1310             : 
    1311          14 :                 TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);
    1312             : 
    1313          14 :                 bdev_nvme_retry_io(nbdev_ch, spdk_bdev_io_from_ctx(bio));
    1314          14 :         }
    1315             : 
    1316          14 :         spdk_poller_unregister(&nbdev_ch->retry_io_poller);
    1317             : 
    1318          14 :         bio = TAILQ_FIRST(&nbdev_ch->retry_io_list);
    1319          14 :         if (bio != NULL) {
    1320           4 :                 delay_us = (bio->retry_ticks - now) * SPDK_SEC_TO_USEC / spdk_get_ticks_hz();
    1321             : 
    1322           4 :                 nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
    1323             :                                             delay_us);
    1324           4 :         }
    1325             : 
    1326          14 :         return SPDK_POLLER_BUSY;
    1327             : }
    1328             : 
    1329             : static void
    1330          16 : bdev_nvme_queue_retry_io(struct nvme_bdev_channel *nbdev_ch,
    1331             :                          struct nvme_bdev_io *bio, uint64_t delay_ms)
    1332             : {
    1333             :         struct nvme_bdev_io *tmp_bio;
    1334             : 
    1335          16 :         bio->retry_ticks = spdk_get_ticks() + delay_ms * spdk_get_ticks_hz() / 1000ULL;
    1336             : 
    1337          16 :         TAILQ_FOREACH_REVERSE(tmp_bio, &nbdev_ch->retry_io_list, retry_io_head, retry_link) {
    1338           1 :                 if (tmp_bio->retry_ticks <= bio->retry_ticks) {
    1339           1 :                         TAILQ_INSERT_AFTER(&nbdev_ch->retry_io_list, tmp_bio, bio,
    1340             :                                            retry_link);
    1341           1 :                         return;
    1342             :                 }
    1343           0 :         }
    1344             : 
    1345             :         /* No earlier I/Os were found. This I/O must be the new head. */
    1346          15 :         TAILQ_INSERT_HEAD(&nbdev_ch->retry_io_list, bio, retry_link);
    1347             : 
    1348          15 :         spdk_poller_unregister(&nbdev_ch->retry_io_poller);
    1349             : 
    1350          15 :         nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
    1351             :                                     delay_ms * 1000ULL);
    1352          16 : }
    1353             : 
    1354             : static void
    1355          58 : bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch)
    1356             : {
    1357             :         struct nvme_bdev_io *bio, *tmp_bio;
    1358             : 
    1359          59 :         TAILQ_FOREACH_SAFE(bio, &nbdev_ch->retry_io_list, retry_link, tmp_bio) {
    1360           1 :                 TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);
    1361           1 :                 __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_ABORTED, NULL);
    1362           1 :         }
    1363             : 
    1364          58 :         spdk_poller_unregister(&nbdev_ch->retry_io_poller);
    1365          58 : }
    1366             : 
    1367             : static int
    1368           6 : bdev_nvme_abort_retry_io(struct nvme_bdev_channel *nbdev_ch,
    1369             :                          struct nvme_bdev_io *bio_to_abort)
    1370             : {
    1371             :         struct nvme_bdev_io *bio;
    1372             : 
    1373           6 :         TAILQ_FOREACH(bio, &nbdev_ch->retry_io_list, retry_link) {
    1374           1 :                 if (bio == bio_to_abort) {
    1375           1 :                         TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);
    1376           1 :                         __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_ABORTED, NULL);
    1377           1 :                         return 0;
    1378             :                 }
    1379           0 :         }
    1380             : 
    1381           5 :         return -ENOENT;
    1382           6 : }
    1383             : 
    1384             : static void
    1385          12 : bdev_nvme_update_nvme_error_stat(struct spdk_bdev_io *bdev_io, const struct spdk_nvme_cpl *cpl)
    1386             : {
    1387             :         struct nvme_bdev *nbdev;
    1388             :         uint16_t sct, sc;
    1389             : 
    1390          12 :         assert(spdk_nvme_cpl_is_error(cpl));
    1391             : 
    1392          12 :         nbdev = bdev_io->bdev->ctxt;
    1393             : 
    1394          12 :         if (nbdev->err_stat == NULL) {
    1395          12 :                 return;
    1396             :         }
    1397             : 
    1398           0 :         sct = cpl->status.sct;
    1399           0 :         sc = cpl->status.sc;
    1400             : 
    1401           0 :         pthread_mutex_lock(&nbdev->mutex);
    1402             : 
    1403           0 :         nbdev->err_stat->status_type[sct]++;
    1404           0 :         switch (sct) {
    1405             :         case SPDK_NVME_SCT_GENERIC:
    1406             :         case SPDK_NVME_SCT_COMMAND_SPECIFIC:
    1407             :         case SPDK_NVME_SCT_MEDIA_ERROR:
    1408             :         case SPDK_NVME_SCT_PATH:
    1409           0 :                 nbdev->err_stat->status[sct][sc]++;
    1410           0 :                 break;
    1411             :         default:
    1412           0 :                 break;
    1413             :         }
    1414             : 
    1415           0 :         pthread_mutex_unlock(&nbdev->mutex);
    1416          12 : }
    1417             : 
    1418             : static inline void
    1419          20 : bdev_nvme_update_io_path_stat(struct nvme_bdev_io *bio)
    1420             : {
    1421          20 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    1422          20 :         uint64_t num_blocks = bdev_io->u.bdev.num_blocks;
    1423          20 :         uint32_t blocklen = bdev_io->bdev->blocklen;
    1424             :         struct spdk_bdev_io_stat *stat;
    1425             :         uint64_t tsc_diff;
    1426             : 
    1427          20 :         if (bio->io_path->stat == NULL) {
    1428          20 :                 return;
    1429             :         }
    1430             : 
    1431           0 :         tsc_diff = spdk_get_ticks() - bio->submit_tsc;
    1432           0 :         stat = bio->io_path->stat;
    1433             : 
    1434           0 :         switch (bdev_io->type) {
    1435             :         case SPDK_BDEV_IO_TYPE_READ:
    1436           0 :                 stat->bytes_read += num_blocks * blocklen;
    1437           0 :                 stat->num_read_ops++;
    1438           0 :                 stat->read_latency_ticks += tsc_diff;
    1439           0 :                 if (stat->max_read_latency_ticks < tsc_diff) {
    1440           0 :                         stat->max_read_latency_ticks = tsc_diff;
    1441           0 :                 }
    1442           0 :                 if (stat->min_read_latency_ticks > tsc_diff) {
    1443           0 :                         stat->min_read_latency_ticks = tsc_diff;
    1444           0 :                 }
    1445           0 :                 break;
    1446             :         case SPDK_BDEV_IO_TYPE_WRITE:
    1447           0 :                 stat->bytes_written += num_blocks * blocklen;
    1448           0 :                 stat->num_write_ops++;
    1449           0 :                 stat->write_latency_ticks += tsc_diff;
    1450           0 :                 if (stat->max_write_latency_ticks < tsc_diff) {
    1451           0 :                         stat->max_write_latency_ticks = tsc_diff;
    1452           0 :                 }
    1453           0 :                 if (stat->min_write_latency_ticks > tsc_diff) {
    1454           0 :                         stat->min_write_latency_ticks = tsc_diff;
    1455           0 :                 }
    1456           0 :                 break;
    1457             :         case SPDK_BDEV_IO_TYPE_UNMAP:
    1458           0 :                 stat->bytes_unmapped += num_blocks * blocklen;
    1459           0 :                 stat->num_unmap_ops++;
    1460           0 :                 stat->unmap_latency_ticks += tsc_diff;
    1461           0 :                 if (stat->max_unmap_latency_ticks < tsc_diff) {
    1462           0 :                         stat->max_unmap_latency_ticks = tsc_diff;
    1463           0 :                 }
    1464           0 :                 if (stat->min_unmap_latency_ticks > tsc_diff) {
    1465           0 :                         stat->min_unmap_latency_ticks = tsc_diff;
    1466           0 :                 }
    1467           0 :                 break;
    1468             :         case SPDK_BDEV_IO_TYPE_ZCOPY:
    1469             :                 /* Track the data in the start phase only */
    1470           0 :                 if (!bdev_io->u.bdev.zcopy.start) {
    1471           0 :                         break;
    1472             :                 }
    1473           0 :                 if (bdev_io->u.bdev.zcopy.populate) {
    1474           0 :                         stat->bytes_read += num_blocks * blocklen;
    1475           0 :                         stat->num_read_ops++;
    1476           0 :                         stat->read_latency_ticks += tsc_diff;
    1477           0 :                         if (stat->max_read_latency_ticks < tsc_diff) {
    1478           0 :                                 stat->max_read_latency_ticks = tsc_diff;
    1479           0 :                         }
    1480           0 :                         if (stat->min_read_latency_ticks > tsc_diff) {
    1481           0 :                                 stat->min_read_latency_ticks = tsc_diff;
    1482           0 :                         }
    1483           0 :                 } else {
    1484           0 :                         stat->bytes_written += num_blocks * blocklen;
    1485           0 :                         stat->num_write_ops++;
    1486           0 :                         stat->write_latency_ticks += tsc_diff;
    1487           0 :                         if (stat->max_write_latency_ticks < tsc_diff) {
    1488           0 :                                 stat->max_write_latency_ticks = tsc_diff;
    1489           0 :                         }
    1490           0 :                         if (stat->min_write_latency_ticks > tsc_diff) {
    1491           0 :                                 stat->min_write_latency_ticks = tsc_diff;
    1492           0 :                         }
    1493             :                 }
    1494           0 :                 break;
    1495             :         case SPDK_BDEV_IO_TYPE_COPY:
    1496           0 :                 stat->bytes_copied += num_blocks * blocklen;
    1497           0 :                 stat->num_copy_ops++;
    1498           0 :                 stat->copy_latency_ticks += tsc_diff;
    1499           0 :                 if (stat->max_copy_latency_ticks < tsc_diff) {
    1500           0 :                         stat->max_copy_latency_ticks = tsc_diff;
    1501           0 :                 }
    1502           0 :                 if (stat->min_copy_latency_ticks > tsc_diff) {
    1503           0 :                         stat->min_copy_latency_ticks = tsc_diff;
    1504           0 :                 }
    1505           0 :                 break;
    1506             :         default:
    1507           0 :                 break;
    1508             :         }
    1509          20 : }
    1510             : 
    1511             : static bool
    1512          11 : bdev_nvme_check_retry_io(struct nvme_bdev_io *bio,
    1513             :                          const struct spdk_nvme_cpl *cpl,
    1514             :                          struct nvme_bdev_channel *nbdev_ch,
    1515             :                          uint64_t *_delay_ms)
    1516             : {
    1517          11 :         struct nvme_io_path *io_path = bio->io_path;
    1518          11 :         struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
    1519             :         const struct spdk_nvme_ctrlr_data *cdata;
    1520             : 
    1521          15 :         if (spdk_nvme_cpl_is_path_error(cpl) ||
    1522           5 :             spdk_nvme_cpl_is_aborted_sq_deletion(cpl) ||
    1523           0 :             !nvme_io_path_is_available(io_path) ||
    1524           4 :             !nvme_ctrlr_is_available(nvme_ctrlr)) {
    1525          15 :                 bdev_nvme_clear_current_io_path(nbdev_ch);
    1526          15 :                 bio->io_path = NULL;
    1527          15 :                 if (spdk_nvme_cpl_is_ana_error(cpl)) {
    1528           1 :                         if (nvme_ctrlr_read_ana_log_page(nvme_ctrlr) == 0) {
    1529           1 :                                 io_path->nvme_ns->ana_state_updating = true;
    1530           1 :                         }
    1531           1 :                 }
    1532           3 :                 if (!any_io_path_may_become_available(nbdev_ch)) {
    1533           0 :                         return false;
    1534             :                 }
    1535           3 :                 *_delay_ms = 0;
    1536           3 :         } else {
    1537           4 :                 bio->retry_count++;
    1538             : 
    1539           4 :                 cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
    1540             : 
    1541           4 :                 if (cpl->status.crd != 0) {
    1542           1 :                         *_delay_ms = cdata->crdt[cpl->status.crd] * 100;
    1543           1 :                 } else {
    1544           3 :                         *_delay_ms = 0;
    1545             :                 }
    1546             :         }
    1547             : 
    1548           7 :         return true;
    1549           7 : }
    1550             : 
    1551             : static inline void
    1552          40 : bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio,
    1553             :                                   const struct spdk_nvme_cpl *cpl)
    1554             : {
    1555          40 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    1556             :         struct nvme_bdev_channel *nbdev_ch;
    1557             :         uint64_t delay_ms;
    1558             : 
    1559          40 :         assert(!bdev_nvme_io_type_is_admin(bdev_io->type));
    1560             : 
    1561          40 :         if (spdk_likely(spdk_nvme_cpl_is_success(cpl))) {
    1562          20 :                 bdev_nvme_update_io_path_stat(bio);
    1563          20 :                 goto complete;
    1564             :         }
    1565             : 
    1566             :         /* Update error counts before deciding if retry is needed.
    1567             :          * Hence, error counts may be more than the number of I/O errors.
    1568             :          */
    1569          20 :         bdev_nvme_update_nvme_error_stat(bdev_io, cpl);
    1570             : 
    1571          27 :         if (cpl->status.dnr != 0 || spdk_nvme_cpl_is_aborted_by_request(cpl) ||
    1572           2 :             (g_opts.bdev_retry_count != -1 && bio->retry_count >= g_opts.bdev_retry_count)) {
    1573          23 :                 goto complete;
    1574             :         }
    1575             : 
    1576             :         /* At this point we don't know whether the sequence was successfully executed or not, so we
    1577             :          * cannot retry the IO */
    1578           7 :         if (bdev_io->u.bdev.accel_sequence != NULL) {
    1579           0 :                 goto complete;
    1580             :         }
    1581             : 
    1582           7 :         nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
    1583             : 
    1584           7 :         if (bdev_nvme_check_retry_io(bio, cpl, nbdev_ch, &delay_ms)) {
    1585           7 :                 bdev_nvme_queue_retry_io(nbdev_ch, bio, delay_ms);
    1586           7 :                 return;
    1587             :         }
    1588             : 
    1589             : complete:
    1590          25 :         bio->retry_count = 0;
    1591          25 :         bio->submit_tsc = 0;
    1592          25 :         bdev_io->u.bdev.accel_sequence = NULL;
    1593          25 :         __bdev_nvme_io_complete(bdev_io, 0, cpl);
    1594          32 : }
    1595             : 
    1596             : static inline void
    1597          13 : bdev_nvme_io_complete(struct nvme_bdev_io *bio, int rc)
    1598             : {
    1599          13 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    1600             :         struct nvme_bdev_channel *nbdev_ch;
    1601             :         enum spdk_bdev_io_status io_status;
    1602             : 
    1603          13 :         assert(!bdev_nvme_io_type_is_admin(bdev_io->type));
    1604             : 
    1605          13 :         switch (rc) {
    1606             :         case 0:
    1607           1 :                 io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
    1608           1 :                 break;
    1609             :         case -ENOMEM:
    1610           0 :                 io_status = SPDK_BDEV_IO_STATUS_NOMEM;
    1611           0 :                 break;
    1612             :         case -ENXIO:
    1613          15 :                 if (g_opts.bdev_retry_count == -1 || bio->retry_count < g_opts.bdev_retry_count) {
    1614          12 :                         nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
    1615             : 
    1616          12 :                         bdev_nvme_clear_current_io_path(nbdev_ch);
    1617          12 :                         bio->io_path = NULL;
    1618             : 
    1619          12 :                         if (any_io_path_may_become_available(nbdev_ch)) {
    1620           9 :                                 bdev_nvme_queue_retry_io(nbdev_ch, bio, 1000ULL);
    1621           9 :                                 return;
    1622             :                         }
    1623           3 :                 }
    1624             : 
    1625             :         /* fallthrough */
    1626             :         default:
    1627           3 :                 spdk_accel_sequence_abort(bdev_io->u.bdev.accel_sequence);
    1628           3 :                 bdev_io->u.bdev.accel_sequence = NULL;
    1629           3 :                 io_status = SPDK_BDEV_IO_STATUS_FAILED;
    1630           3 :                 break;
    1631             :         }
    1632             : 
    1633           4 :         bio->retry_count = 0;
    1634           4 :         bio->submit_tsc = 0;
    1635           4 :         __bdev_nvme_io_complete(bdev_io, io_status, NULL);
    1636          13 : }
    1637             : 
    1638             : static inline void
    1639           4 : bdev_nvme_admin_complete(struct nvme_bdev_io *bio, int rc)
    1640             : {
    1641           4 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    1642             :         enum spdk_bdev_io_status io_status;
    1643             : 
    1644           4 :         switch (rc) {
    1645             :         case 0:
    1646           1 :                 io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
    1647           1 :                 break;
    1648             :         case -ENOMEM:
    1649           0 :                 io_status = SPDK_BDEV_IO_STATUS_NOMEM;
    1650           0 :                 break;
    1651           1 :         case -ENXIO:
    1652             :         /* fallthrough */
    1653             :         default:
    1654           3 :                 io_status = SPDK_BDEV_IO_STATUS_FAILED;
    1655           3 :                 break;
    1656             :         }
    1657             : 
    1658           4 :         __bdev_nvme_io_complete(bdev_io, io_status, NULL);
    1659           4 : }
    1660             : 
    1661             : static void
    1662           3 : bdev_nvme_clear_io_path_caches_done(struct nvme_ctrlr *nvme_ctrlr,
    1663             :                                     void *ctx, int status)
    1664             : {
    1665           3 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    1666             : 
    1667           3 :         assert(nvme_ctrlr->io_path_cache_clearing == true);
    1668           3 :         nvme_ctrlr->io_path_cache_clearing = false;
    1669             : 
    1670           3 :         if (!nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
    1671           3 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    1672           3 :                 return;
    1673             :         }
    1674             : 
    1675           0 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    1676             : 
    1677           0 :         nvme_ctrlr_unregister(nvme_ctrlr);
    1678           3 : }
    1679             : 
    1680             : static void
    1681         416 : _bdev_nvme_clear_io_path_cache(struct nvme_qpair *nvme_qpair)
    1682             : {
    1683             :         struct nvme_io_path *io_path;
    1684             : 
    1685         651 :         TAILQ_FOREACH(io_path, &nvme_qpair->io_path_list, tailq) {
    1686         235 :                 if (io_path->nbdev_ch == NULL) {
    1687          72 :                         continue;
    1688             :                 }
    1689         163 :                 bdev_nvme_clear_current_io_path(io_path->nbdev_ch);
    1690         163 :         }
    1691         416 : }
    1692             : 
    1693             : static void
    1694           1 : bdev_nvme_clear_io_path_cache(struct nvme_ctrlr_channel_iter *i,
    1695             :                               struct nvme_ctrlr *nvme_ctrlr,
    1696             :                               struct nvme_ctrlr_channel *ctrlr_ch,
    1697             :                               void *ctx)
    1698             : {
    1699           1 :         assert(ctrlr_ch->qpair != NULL);
    1700             : 
    1701           1 :         _bdev_nvme_clear_io_path_cache(ctrlr_ch->qpair);
    1702             : 
    1703           1 :         nvme_ctrlr_for_each_channel_continue(i, 0);
    1704           1 : }
    1705             : 
    1706             : static void
    1707           3 : bdev_nvme_clear_io_path_caches(struct nvme_ctrlr *nvme_ctrlr)
    1708             : {
    1709           3 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    1710           3 :         if (!nvme_ctrlr_is_available(nvme_ctrlr) ||
    1711           3 :             nvme_ctrlr->io_path_cache_clearing) {
    1712           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    1713           0 :                 return;
    1714             :         }
    1715             : 
    1716           3 :         nvme_ctrlr->io_path_cache_clearing = true;
    1717           3 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    1718             : 
    1719           3 :         nvme_ctrlr_for_each_channel(nvme_ctrlr,
    1720             :                                     bdev_nvme_clear_io_path_cache,
    1721             :                                     NULL,
    1722             :                                     bdev_nvme_clear_io_path_caches_done);
    1723           3 : }
    1724             : 
    1725             : static struct nvme_qpair *
    1726         119 : nvme_poll_group_get_qpair(struct nvme_poll_group *group, struct spdk_nvme_qpair *qpair)
    1727             : {
    1728             :         struct nvme_qpair *nvme_qpair;
    1729             : 
    1730         136 :         TAILQ_FOREACH(nvme_qpair, &group->qpair_list, tailq) {
    1731         136 :                 if (nvme_qpair->qpair == qpair) {
    1732         119 :                         break;
    1733             :                 }
    1734          17 :         }
    1735             : 
    1736         119 :         return nvme_qpair;
    1737             : }
    1738             : 
    1739             : static void nvme_qpair_delete(struct nvme_qpair *nvme_qpair);
    1740             : 
    1741             : static void
    1742         119 : bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx)
    1743             : {
    1744         119 :         struct nvme_poll_group *group = poll_group_ctx;
    1745             :         struct nvme_qpair *nvme_qpair;
    1746             :         struct nvme_ctrlr *nvme_ctrlr;
    1747             :         struct nvme_ctrlr_channel *ctrlr_ch;
    1748             :         int status;
    1749             : 
    1750         119 :         nvme_qpair = nvme_poll_group_get_qpair(group, qpair);
    1751         119 :         if (nvme_qpair == NULL) {
    1752           0 :                 return;
    1753             :         }
    1754             : 
    1755         119 :         if (nvme_qpair->qpair != NULL) {
    1756         119 :                 spdk_nvme_ctrlr_free_io_qpair(nvme_qpair->qpair);
    1757         119 :                 nvme_qpair->qpair = NULL;
    1758         119 :         }
    1759             : 
    1760         119 :         _bdev_nvme_clear_io_path_cache(nvme_qpair);
    1761             : 
    1762         119 :         nvme_ctrlr = nvme_qpair->ctrlr;
    1763         119 :         ctrlr_ch = nvme_qpair->ctrlr_ch;
    1764             : 
    1765         119 :         if (ctrlr_ch != NULL) {
    1766          74 :                 if (ctrlr_ch->reset_iter != NULL) {
    1767             :                         /* We are in a full reset sequence. */
    1768          69 :                         if (ctrlr_ch->connect_poller != NULL) {
    1769             :                                 /* qpair was failed to connect. Abort the reset sequence. */
    1770           0 :                                 NVME_CTRLR_INFOLOG(nvme_ctrlr,
    1771             :                                                    "qpair %p was failed to connect. abort the reset ctrlr sequence.\n",
    1772             :                                                    qpair);
    1773           0 :                                 spdk_poller_unregister(&ctrlr_ch->connect_poller);
    1774           0 :                                 status = -1;
    1775           0 :                         } else {
    1776             :                                 /* qpair was completed to disconnect. Just move to the next ctrlr_channel. */
    1777          69 :                                 NVME_CTRLR_INFOLOG(nvme_ctrlr,
    1778             :                                                    "qpair %p was disconnected and freed in a reset ctrlr sequence.\n",
    1779             :                                                    qpair);
    1780          69 :                                 status = 0;
    1781             :                         }
    1782          69 :                         nvme_ctrlr_for_each_channel_continue(ctrlr_ch->reset_iter, status);
    1783          69 :                         ctrlr_ch->reset_iter = NULL;
    1784          69 :                 } else {
    1785             :                         /* qpair was disconnected unexpectedly. Reset controller for recovery. */
    1786           5 :                         NVME_CTRLR_INFOLOG(nvme_ctrlr, "qpair %p was disconnected and freed. reset controller.\n",
    1787             :                                            qpair);
    1788           5 :                         bdev_nvme_failover_ctrlr(nvme_ctrlr);
    1789             :                 }
    1790          74 :         } else {
    1791             :                 /* In this case, ctrlr_channel is already deleted. */
    1792          45 :                 NVME_CTRLR_INFOLOG(nvme_ctrlr, "qpair %p was disconnected and freed. delete nvme_qpair.\n",
    1793             :                                    qpair);
    1794          45 :                 nvme_qpair_delete(nvme_qpair);
    1795             :         }
    1796         119 : }
    1797             : 
    1798             : static void
    1799           0 : bdev_nvme_check_io_qpairs(struct nvme_poll_group *group)
    1800             : {
    1801             :         struct nvme_qpair *nvme_qpair;
    1802             : 
    1803           0 :         TAILQ_FOREACH(nvme_qpair, &group->qpair_list, tailq) {
    1804           0 :                 if (nvme_qpair->qpair == NULL || nvme_qpair->ctrlr_ch == NULL) {
    1805           0 :                         continue;
    1806             :                 }
    1807             : 
    1808           0 :                 if (spdk_nvme_qpair_get_failure_reason(nvme_qpair->qpair) !=
    1809             :                     SPDK_NVME_QPAIR_FAILURE_NONE) {
    1810           0 :                         _bdev_nvme_clear_io_path_cache(nvme_qpair);
    1811           0 :                 }
    1812           0 :         }
    1813           0 : }
    1814             : 
    1815             : static int
    1816        1238 : bdev_nvme_poll(void *arg)
    1817             : {
    1818        1238 :         struct nvme_poll_group *group = arg;
    1819             :         int64_t num_completions;
    1820             : 
    1821        1238 :         if (group->collect_spin_stat && group->start_ticks == 0) {
    1822           0 :                 group->start_ticks = spdk_get_ticks();
    1823           0 :         }
    1824             : 
    1825        1238 :         num_completions = spdk_nvme_poll_group_process_completions(group->group, 0,
    1826             :                           bdev_nvme_disconnected_qpair_cb);
    1827        1238 :         if (group->collect_spin_stat) {
    1828           0 :                 if (num_completions > 0) {
    1829           0 :                         if (group->end_ticks != 0) {
    1830           0 :                                 group->spin_ticks += (group->end_ticks - group->start_ticks);
    1831           0 :                                 group->end_ticks = 0;
    1832           0 :                         }
    1833           0 :                         group->start_ticks = 0;
    1834           0 :                 } else {
    1835           0 :                         group->end_ticks = spdk_get_ticks();
    1836             :                 }
    1837           0 :         }
    1838             : 
    1839        1238 :         if (spdk_unlikely(num_completions < 0)) {
    1840           0 :                 bdev_nvme_check_io_qpairs(group);
    1841           0 :         }
    1842             : 
    1843        1238 :         return num_completions > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
    1844             : }
    1845             : 
    1846             : static int bdev_nvme_poll_adminq(void *arg);
    1847             : 
    1848             : static void
    1849         140 : bdev_nvme_change_adminq_poll_period(struct nvme_ctrlr *nvme_ctrlr, uint64_t new_period_us)
    1850             : {
    1851         140 :         if (spdk_interrupt_mode_is_enabled()) {
    1852           0 :                 return;
    1853             :         }
    1854             : 
    1855         140 :         spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
    1856             : 
    1857         140 :         nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq,
    1858             :                                           nvme_ctrlr, new_period_us);
    1859         140 : }
    1860             : 
    1861             : static int
    1862         192 : bdev_nvme_poll_adminq(void *arg)
    1863             : {
    1864             :         int32_t rc;
    1865         192 :         struct nvme_ctrlr *nvme_ctrlr = arg;
    1866             :         nvme_ctrlr_disconnected_cb disconnected_cb;
    1867             : 
    1868         192 :         assert(nvme_ctrlr != NULL);
    1869             : 
    1870         192 :         rc = spdk_nvme_ctrlr_process_admin_completions(nvme_ctrlr->ctrlr);
    1871         192 :         if (rc < 0) {
    1872          88 :                 disconnected_cb = nvme_ctrlr->disconnected_cb;
    1873          88 :                 nvme_ctrlr->disconnected_cb = NULL;
    1874             : 
    1875          88 :                 if (disconnected_cb != NULL) {
    1876         140 :                         bdev_nvme_change_adminq_poll_period(nvme_ctrlr,
    1877          70 :                                                             g_opts.nvme_adminq_poll_period_us);
    1878          70 :                         disconnected_cb(nvme_ctrlr);
    1879          70 :                 } else {
    1880          18 :                         bdev_nvme_failover_ctrlr(nvme_ctrlr);
    1881             :                 }
    1882         192 :         } else if (spdk_nvme_ctrlr_get_admin_qp_failure_reason(nvme_ctrlr->ctrlr) !=
    1883             :                    SPDK_NVME_QPAIR_FAILURE_NONE) {
    1884           0 :                 bdev_nvme_clear_io_path_caches(nvme_ctrlr);
    1885           0 :         }
    1886             : 
    1887         192 :         return rc == 0 ? SPDK_POLLER_IDLE : SPDK_POLLER_BUSY;
    1888             : }
    1889             : 
    1890             : static void
    1891          39 : nvme_bdev_free(void *io_device)
    1892             : {
    1893          39 :         struct nvme_bdev *nbdev = io_device;
    1894             : 
    1895          39 :         pthread_mutex_destroy(&nbdev->mutex);
    1896          39 :         free(nbdev->disk.name);
    1897          39 :         free(nbdev->err_stat);
    1898          39 :         free(nbdev);
    1899          39 : }
    1900             : 
    1901             : static int
    1902          38 : bdev_nvme_destruct(void *ctx)
    1903             : {
    1904          38 :         struct nvme_bdev *nbdev = ctx;
    1905             :         struct nvme_ns *nvme_ns, *tmp_nvme_ns;
    1906             : 
    1907             :         SPDK_DTRACE_PROBE2(bdev_nvme_destruct, nbdev->nbdev_ctrlr->name, nbdev->nsid);
    1908             : 
    1909          38 :         pthread_mutex_lock(&nbdev->mutex);
    1910             : 
    1911          77 :         TAILQ_FOREACH_SAFE(nvme_ns, &nbdev->nvme_ns_list, tailq, tmp_nvme_ns) {
    1912          39 :                 pthread_mutex_lock(&nvme_ns->ctrlr->mutex);
    1913             : 
    1914          39 :                 nvme_ns->bdev = NULL;
    1915             : 
    1916          39 :                 assert(nvme_ns->id > 0);
    1917             : 
    1918          39 :                 if (nvme_ctrlr_get_ns(nvme_ns->ctrlr, nvme_ns->id) == NULL) {
    1919           0 :                         pthread_mutex_unlock(&nvme_ns->ctrlr->mutex);
    1920             : 
    1921           0 :                         nvme_ctrlr_put_ref(nvme_ns->ctrlr);
    1922           0 :                         nvme_ns_free(nvme_ns);
    1923           0 :                 } else {
    1924          39 :                         pthread_mutex_unlock(&nvme_ns->ctrlr->mutex);
    1925             :                 }
    1926          39 :         }
    1927             : 
    1928          38 :         pthread_mutex_unlock(&nbdev->mutex);
    1929             : 
    1930          38 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    1931          38 :         TAILQ_REMOVE(&nbdev->nbdev_ctrlr->bdevs, nbdev, tailq);
    1932          38 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    1933             : 
    1934          38 :         spdk_io_device_unregister(nbdev, nvme_bdev_free);
    1935             : 
    1936          38 :         return 0;
    1937             : }
    1938             : 
    1939             : static int
    1940         120 : bdev_nvme_create_qpair(struct nvme_qpair *nvme_qpair)
    1941             : {
    1942             :         struct nvme_ctrlr *nvme_ctrlr;
    1943             :         struct spdk_nvme_io_qpair_opts opts;
    1944             :         struct spdk_nvme_qpair *qpair;
    1945             :         int rc;
    1946             : 
    1947         120 :         nvme_ctrlr = nvme_qpair->ctrlr;
    1948             : 
    1949         120 :         spdk_nvme_ctrlr_get_default_io_qpair_opts(nvme_ctrlr->ctrlr, &opts, sizeof(opts));
    1950         120 :         opts.create_only = true;
    1951             :         /* In interrupt mode qpairs must be created in sync mode, else it will never be connected.
    1952             :          * delay_cmd_submit must be false as in interrupt mode requests cannot be submitted in
    1953             :          * completion context.
    1954             :          */
    1955         120 :         if (!spdk_interrupt_mode_is_enabled()) {
    1956         120 :                 opts.async_mode = true;
    1957         120 :                 opts.delay_cmd_submit = g_opts.delay_cmd_submit;
    1958         120 :         }
    1959         120 :         opts.io_queue_requests = spdk_max(g_opts.io_queue_requests, opts.io_queue_requests);
    1960         120 :         g_opts.io_queue_requests = opts.io_queue_requests;
    1961             : 
    1962         120 :         qpair = spdk_nvme_ctrlr_alloc_io_qpair(nvme_ctrlr->ctrlr, &opts, sizeof(opts));
    1963         120 :         if (qpair == NULL) {
    1964           0 :                 return -1;
    1965             :         }
    1966             : 
    1967             :         SPDK_DTRACE_PROBE3(bdev_nvme_create_qpair, nvme_ctrlr->nbdev_ctrlr->name,
    1968             :                            spdk_nvme_qpair_get_id(qpair), spdk_thread_get_id(nvme_ctrlr->thread));
    1969             : 
    1970         120 :         assert(nvme_qpair->group != NULL);
    1971             : 
    1972         120 :         rc = spdk_nvme_poll_group_add(nvme_qpair->group->group, qpair);
    1973         120 :         if (rc != 0) {
    1974           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "Unable to begin polling on NVMe Channel.\n");
    1975           0 :                 goto err;
    1976             :         }
    1977             : 
    1978         120 :         rc = spdk_nvme_ctrlr_connect_io_qpair(nvme_ctrlr->ctrlr, qpair);
    1979         120 :         if (rc != 0) {
    1980           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "Unable to connect I/O qpair.\n");
    1981           0 :                 goto err;
    1982             :         }
    1983             : 
    1984         120 :         nvme_qpair->qpair = qpair;
    1985             : 
    1986         120 :         if (!g_opts.disable_auto_failback) {
    1987          83 :                 _bdev_nvme_clear_io_path_cache(nvme_qpair);
    1988          83 :         }
    1989             : 
    1990         120 :         NVME_CTRLR_INFOLOG(nvme_ctrlr, "Connecting qpair %p:%u started.\n",
    1991             :                            qpair, spdk_nvme_qpair_get_id(qpair));
    1992             : 
    1993         120 :         return 0;
    1994             : 
    1995             : err:
    1996           0 :         spdk_nvme_ctrlr_free_io_qpair(qpair);
    1997             : 
    1998           0 :         return rc;
    1999         120 : }
    2000             : 
    2001             : static void bdev_nvme_reset_io_continue(void *cb_arg, int rc);
    2002             : 
    2003             : static void
    2004          74 : bdev_nvme_complete_pending_resets(struct nvme_ctrlr *nvme_ctrlr, bool success)
    2005             : {
    2006          74 :         int rc = 0;
    2007             :         struct nvme_bdev_io *bio;
    2008             : 
    2009          74 :         if (!success) {
    2010          33 :                 rc = -1;
    2011          33 :         }
    2012             : 
    2013          86 :         while (!TAILQ_EMPTY(&nvme_ctrlr->pending_resets)) {
    2014          12 :                 bio = TAILQ_FIRST(&nvme_ctrlr->pending_resets);
    2015          12 :                 TAILQ_REMOVE(&nvme_ctrlr->pending_resets, bio, retry_link);
    2016             : 
    2017          12 :                 bdev_nvme_reset_io_continue(bio, rc);
    2018             :         }
    2019          74 : }
    2020             : 
    2021             : /* This function marks the current trid as failed by storing the current ticks
    2022             :  * and then sets the next trid to the active trid within a controller if exists.
    2023             :  *
    2024             :  * The purpose of the boolean return value is to request the caller to disconnect
    2025             :  * the current trid now to try connecting the next trid.
    2026             :  */
    2027             : static bool
    2028          62 : bdev_nvme_failover_trid(struct nvme_ctrlr *nvme_ctrlr, bool remove, bool start)
    2029             : {
    2030             :         struct nvme_path_id *path_id, *next_path;
    2031             :         int rc __attribute__((unused));
    2032             : 
    2033          62 :         path_id = TAILQ_FIRST(&nvme_ctrlr->trids);
    2034          62 :         assert(path_id);
    2035          62 :         assert(path_id == nvme_ctrlr->active_path_id);
    2036          62 :         next_path = TAILQ_NEXT(path_id, link);
    2037             : 
    2038             :         /* Update the last failed time. It means the trid is failed if its last
    2039             :          * failed time is non-zero.
    2040             :          */
    2041          62 :         path_id->last_failed_tsc = spdk_get_ticks();
    2042             : 
    2043          62 :         if (next_path == NULL) {
    2044             :                 /* There is no alternate trid within a controller. */
    2045          51 :                 return false;
    2046             :         }
    2047             : 
    2048          11 :         if (!start && nvme_ctrlr->opts.reconnect_delay_sec == 0) {
    2049             :                 /* Connect is not retried in a controller reset sequence. Connecting
    2050             :                  * the next trid will be done by the next bdev_nvme_failover_ctrlr() call.
    2051             :                  */
    2052           3 :                 return false;
    2053             :         }
    2054             : 
    2055           8 :         assert(path_id->trid.trtype != SPDK_NVME_TRANSPORT_PCIE);
    2056             : 
    2057           8 :         NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Start failover from %s:%s to %s:%s\n",
    2058             :                              path_id->trid.traddr, path_id->trid.trsvcid,
    2059             :                              next_path->trid.traddr, next_path->trid.trsvcid);
    2060             : 
    2061           8 :         spdk_nvme_ctrlr_fail(nvme_ctrlr->ctrlr);
    2062           8 :         nvme_ctrlr->active_path_id = next_path;
    2063           8 :         rc = spdk_nvme_ctrlr_set_trid(nvme_ctrlr->ctrlr, &next_path->trid);
    2064           8 :         assert(rc == 0);
    2065           8 :         TAILQ_REMOVE(&nvme_ctrlr->trids, path_id, link);
    2066           8 :         if (!remove) {
    2067             :                 /** Shuffle the old trid to the end of the list and use the new one.
    2068             :                  * Allows for round robin through multiple connections.
    2069             :                  */
    2070           6 :                 TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, path_id, link);
    2071           6 :         } else {
    2072           2 :                 free(path_id);
    2073             :         }
    2074             : 
    2075           8 :         if (start || next_path->last_failed_tsc == 0) {
    2076             :                 /* bdev_nvme_failover_ctrlr() is just called or the next trid is not failed
    2077             :                  * or used yet. Try the next trid now.
    2078             :                  */
    2079           7 :                 return true;
    2080             :         }
    2081             : 
    2082           2 :         if (spdk_get_ticks() > next_path->last_failed_tsc + spdk_get_ticks_hz() *
    2083           1 :             nvme_ctrlr->opts.reconnect_delay_sec) {
    2084             :                 /* Enough backoff passed since the next trid failed. Try the next trid now. */
    2085           0 :                 return true;
    2086             :         }
    2087             : 
    2088             :         /* The next trid will be tried after reconnect_delay_sec seconds. */
    2089           1 :         return false;
    2090          62 : }
    2091             : 
    2092             : static bool
    2093          88 : bdev_nvme_check_ctrlr_loss_timeout(struct nvme_ctrlr *nvme_ctrlr)
    2094             : {
    2095             :         int32_t elapsed;
    2096             : 
    2097          88 :         if (nvme_ctrlr->opts.ctrlr_loss_timeout_sec == 0 ||
    2098          37 :             nvme_ctrlr->opts.ctrlr_loss_timeout_sec == -1) {
    2099          62 :                 return false;
    2100             :         }
    2101             : 
    2102          26 :         elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz();
    2103          26 :         if (elapsed >= nvme_ctrlr->opts.ctrlr_loss_timeout_sec) {
    2104           6 :                 return true;
    2105             :         } else {
    2106          20 :                 return false;
    2107             :         }
    2108          88 : }
    2109             : 
    2110             : static bool
    2111          12 : bdev_nvme_check_fast_io_fail_timeout(struct nvme_ctrlr *nvme_ctrlr)
    2112             : {
    2113             :         uint32_t elapsed;
    2114             : 
    2115          12 :         if (nvme_ctrlr->opts.fast_io_fail_timeout_sec == 0) {
    2116           8 :                 return false;
    2117             :         }
    2118             : 
    2119           4 :         elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz();
    2120           4 :         if (elapsed >= nvme_ctrlr->opts.fast_io_fail_timeout_sec) {
    2121           2 :                 return true;
    2122             :         } else {
    2123           2 :                 return false;
    2124             :         }
    2125          12 : }
    2126             : 
    2127             : static void bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success);
    2128             : 
    2129             : static void
    2130          71 : nvme_ctrlr_disconnect(struct nvme_ctrlr *nvme_ctrlr, nvme_ctrlr_disconnected_cb cb_fn)
    2131             : {
    2132             :         int rc;
    2133             : 
    2134          71 :         NVME_CTRLR_INFOLOG(nvme_ctrlr, "Start disconnecting ctrlr.\n");
    2135             : 
    2136          71 :         rc = spdk_nvme_ctrlr_disconnect(nvme_ctrlr->ctrlr);
    2137          71 :         if (rc != 0) {
    2138           1 :                 NVME_CTRLR_WARNLOG(nvme_ctrlr, "disconnecting ctrlr failed.\n");
    2139             : 
    2140             :                 /* Disconnect fails if ctrlr is already resetting or removed. In this case,
    2141             :                  * fail the reset sequence immediately.
    2142             :                  */
    2143           1 :                 bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
    2144           1 :                 return;
    2145             :         }
    2146             : 
    2147             :         /* spdk_nvme_ctrlr_disconnect() may complete asynchronously later by polling adminq.
    2148             :          * Set callback here to execute the specified operation after ctrlr is really disconnected.
    2149             :          */
    2150          70 :         assert(nvme_ctrlr->disconnected_cb == NULL);
    2151          70 :         nvme_ctrlr->disconnected_cb = cb_fn;
    2152             : 
    2153             :         /* During disconnection, reduce the period to poll adminq more often. */
    2154          70 :         bdev_nvme_change_adminq_poll_period(nvme_ctrlr, 0);
    2155          71 : }
    2156             : 
    2157             : enum bdev_nvme_op_after_reset {
    2158             :         OP_NONE,
    2159             :         OP_COMPLETE_PENDING_DESTRUCT,
    2160             :         OP_DESTRUCT,
    2161             :         OP_DELAYED_RECONNECT,
    2162             :         OP_FAILOVER,
    2163             : };
    2164             : 
    2165             : typedef enum bdev_nvme_op_after_reset _bdev_nvme_op_after_reset;
    2166             : 
    2167             : static _bdev_nvme_op_after_reset
    2168          74 : bdev_nvme_check_op_after_reset(struct nvme_ctrlr *nvme_ctrlr, bool success)
    2169             : {
    2170          74 :         if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
    2171             :                 /* Complete pending destruct after reset completes. */
    2172           0 :                 return OP_COMPLETE_PENDING_DESTRUCT;
    2173          74 :         } else if (nvme_ctrlr->pending_failover) {
    2174           3 :                 nvme_ctrlr->pending_failover = false;
    2175           3 :                 nvme_ctrlr->reset_start_tsc = 0;
    2176           3 :                 return OP_FAILOVER;
    2177          71 :         } else if (success || nvme_ctrlr->opts.reconnect_delay_sec == 0) {
    2178          57 :                 nvme_ctrlr->reset_start_tsc = 0;
    2179          57 :                 return OP_NONE;
    2180          14 :         } else if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
    2181           2 :                 return OP_DESTRUCT;
    2182             :         } else {
    2183          12 :                 if (bdev_nvme_check_fast_io_fail_timeout(nvme_ctrlr)) {
    2184           2 :                         nvme_ctrlr->fast_io_fail_timedout = true;
    2185           2 :                 }
    2186          12 :                 return OP_DELAYED_RECONNECT;
    2187             :         }
    2188          74 : }
    2189             : 
    2190             : static int bdev_nvme_delete_ctrlr(struct nvme_ctrlr *nvme_ctrlr, bool hotplug);
    2191             : static void bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
    2192             : 
    2193             : static int
    2194           9 : bdev_nvme_reconnect_delay_timer_expired(void *ctx)
    2195             : {
    2196           9 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    2197             : 
    2198             :         SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect_delay, nvme_ctrlr->nbdev_ctrlr->name);
    2199           9 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2200             : 
    2201           9 :         spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
    2202             : 
    2203           9 :         if (!nvme_ctrlr->reconnect_is_delayed) {
    2204           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2205           0 :                 return SPDK_POLLER_BUSY;
    2206             :         }
    2207             : 
    2208           9 :         nvme_ctrlr->reconnect_is_delayed = false;
    2209             : 
    2210           9 :         if (nvme_ctrlr->destruct) {
    2211           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2212           0 :                 return SPDK_POLLER_BUSY;
    2213             :         }
    2214             : 
    2215           9 :         assert(nvme_ctrlr->resetting == false);
    2216           9 :         nvme_ctrlr->resetting = true;
    2217             : 
    2218           9 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2219             : 
    2220           9 :         spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
    2221             : 
    2222           9 :         bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
    2223           9 :         return SPDK_POLLER_BUSY;
    2224           9 : }
    2225             : 
    2226             : static void
    2227          12 : bdev_nvme_start_reconnect_delay_timer(struct nvme_ctrlr *nvme_ctrlr)
    2228             : {
    2229          12 :         spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);
    2230             : 
    2231          12 :         assert(nvme_ctrlr->reconnect_is_delayed == false);
    2232          12 :         nvme_ctrlr->reconnect_is_delayed = true;
    2233             : 
    2234          12 :         assert(nvme_ctrlr->reconnect_delay_timer == NULL);
    2235          12 :         nvme_ctrlr->reconnect_delay_timer = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_delay_timer_expired,
    2236             :                                             nvme_ctrlr,
    2237             :                                             nvme_ctrlr->opts.reconnect_delay_sec * SPDK_SEC_TO_USEC);
    2238          12 : }
    2239             : 
    2240             : static void remove_discovery_entry(struct nvme_ctrlr *nvme_ctrlr);
    2241             : 
    2242             : static void
    2243          74 : bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success)
    2244             : {
    2245          74 :         bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
    2246          74 :         void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
    2247             :         enum bdev_nvme_op_after_reset op_after_reset;
    2248             : 
    2249          74 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2250             : 
    2251          74 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2252          74 :         if (!success) {
    2253             :                 /* Connecting the active trid failed. Set the next alternate trid to the
    2254             :                  * active trid if it exists.
    2255             :                  */
    2256          35 :                 if (bdev_nvme_failover_trid(nvme_ctrlr, false, false)) {
    2257             :                         /* The next alternate trid exists and is ready to try. Try it now. */
    2258           2 :                         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2259             : 
    2260           2 :                         NVME_CTRLR_INFOLOG(nvme_ctrlr, "Try the next alternate trid %s:%s now.\n",
    2261             :                                            nvme_ctrlr->active_path_id->trid.traddr,
    2262             :                                            nvme_ctrlr->active_path_id->trid.trsvcid);
    2263             : 
    2264           2 :                         nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reconnect_ctrlr);
    2265           2 :                         return;
    2266             :                 }
    2267             : 
    2268             :                 /* We came here if there is no alternate trid or if the next trid exists but
    2269             :                  * is not ready to try. We will try the active trid after reconnect_delay_sec
    2270             :                  * seconds if it is non-zero or at the next reset call otherwise.
    2271             :                  */
    2272          33 :         } else {
    2273             :                 /* Connecting the active trid succeeded. Clear the last failed time because it
    2274             :                  * means the trid is failed if its last failed time is non-zero.
    2275             :                  */
    2276          39 :                 nvme_ctrlr->active_path_id->last_failed_tsc = 0;
    2277             :         }
    2278             : 
    2279          72 :         NVME_CTRLR_INFOLOG(nvme_ctrlr, "Clear pending resets.\n");
    2280             : 
    2281             :         /* Make sure we clear any pending resets before returning. */
    2282          72 :         bdev_nvme_complete_pending_resets(nvme_ctrlr, success);
    2283             : 
    2284          72 :         if (!success) {
    2285          33 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "Resetting controller failed.\n");
    2286          33 :         } else {
    2287          39 :                 NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Resetting controller successful.\n");
    2288             :         }
    2289             : 
    2290          72 :         nvme_ctrlr->resetting = false;
    2291          72 :         nvme_ctrlr->dont_retry = false;
    2292          72 :         nvme_ctrlr->in_failover = false;
    2293             : 
    2294          72 :         nvme_ctrlr->ctrlr_op_cb_fn = NULL;
    2295          72 :         nvme_ctrlr->ctrlr_op_cb_arg = NULL;
    2296             : 
    2297          72 :         op_after_reset = bdev_nvme_check_op_after_reset(nvme_ctrlr, success);
    2298          72 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2299             : 
    2300             :         /* Delay callbacks when the next operation is a failover. */
    2301          72 :         if (ctrlr_op_cb_fn && op_after_reset != OP_FAILOVER) {
    2302          18 :                 ctrlr_op_cb_fn(ctrlr_op_cb_arg, success ? 0 : -1);
    2303          18 :         }
    2304             : 
    2305          72 :         switch (op_after_reset) {
    2306             :         case OP_COMPLETE_PENDING_DESTRUCT:
    2307           0 :                 nvme_ctrlr_unregister(nvme_ctrlr);
    2308           0 :                 break;
    2309             :         case OP_DESTRUCT:
    2310           2 :                 bdev_nvme_delete_ctrlr(nvme_ctrlr, false);
    2311           2 :                 remove_discovery_entry(nvme_ctrlr);
    2312           2 :                 break;
    2313             :         case OP_DELAYED_RECONNECT:
    2314          12 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_start_reconnect_delay_timer);
    2315          12 :                 break;
    2316             :         case OP_FAILOVER:
    2317           3 :                 nvme_ctrlr->ctrlr_op_cb_fn = ctrlr_op_cb_fn;
    2318           3 :                 nvme_ctrlr->ctrlr_op_cb_arg = ctrlr_op_cb_arg;
    2319           3 :                 bdev_nvme_failover_ctrlr(nvme_ctrlr);
    2320           3 :                 break;
    2321             :         default:
    2322          55 :                 break;
    2323             :         }
    2324          74 : }
    2325             : 
    2326             : static void
    2327           0 : bdev_nvme_reset_create_qpairs_failed(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
    2328             : {
    2329           0 :         bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
    2330           0 : }
    2331             : 
    2332             : static void
    2333         110 : bdev_nvme_reset_destroy_qpair(struct nvme_ctrlr_channel_iter *i,
    2334             :                               struct nvme_ctrlr *nvme_ctrlr,
    2335             :                               struct nvme_ctrlr_channel *ctrlr_ch, void *ctx)
    2336             : {
    2337             :         struct nvme_qpair *nvme_qpair;
    2338             :         struct spdk_nvme_qpair *qpair;
    2339             : 
    2340         110 :         nvme_qpair = ctrlr_ch->qpair;
    2341         110 :         assert(nvme_qpair != NULL);
    2342             : 
    2343         110 :         _bdev_nvme_clear_io_path_cache(nvme_qpair);
    2344             : 
    2345         110 :         qpair = nvme_qpair->qpair;
    2346         110 :         if (qpair != NULL) {
    2347          69 :                 NVME_CTRLR_INFOLOG(nvme_ctrlr, "Start disconnecting qpair %p:%u.\n",
    2348             :                                    qpair, spdk_nvme_qpair_get_id(qpair));
    2349             : 
    2350          69 :                 if (nvme_qpair->ctrlr->dont_retry) {
    2351          53 :                         spdk_nvme_qpair_set_abort_dnr(qpair, true);
    2352          53 :                 }
    2353          69 :                 spdk_nvme_ctrlr_disconnect_io_qpair(qpair);
    2354             : 
    2355             :                 /* The current full reset sequence will move to the next
    2356             :                  * ctrlr_channel after the qpair is actually disconnected.
    2357             :                  */
    2358          69 :                 assert(ctrlr_ch->reset_iter == NULL);
    2359          69 :                 ctrlr_ch->reset_iter = i;
    2360          69 :         } else {
    2361          41 :                 nvme_ctrlr_for_each_channel_continue(i, 0);
    2362             :         }
    2363         110 : }
    2364             : 
    2365             : static void
    2366          35 : bdev_nvme_reset_create_qpairs_done(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
    2367             : {
    2368          35 :         if (status == 0) {
    2369          35 :                 NVME_CTRLR_INFOLOG(nvme_ctrlr, "qpairs were created after ctrlr reset.\n");
    2370             : 
    2371          35 :                 bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, true);
    2372          35 :         } else {
    2373           0 :                 NVME_CTRLR_INFOLOG(nvme_ctrlr, "qpairs were failed to create after ctrlr reset.\n");
    2374             : 
    2375             :                 /* Delete the added qpairs and quiesce ctrlr to make the states clean. */
    2376           0 :                 nvme_ctrlr_for_each_channel(nvme_ctrlr,
    2377             :                                             bdev_nvme_reset_destroy_qpair,
    2378             :                                             NULL,
    2379             :                                             bdev_nvme_reset_create_qpairs_failed);
    2380             :         }
    2381          35 : }
    2382             : 
    2383             : static int
    2384          59 : bdev_nvme_reset_check_qpair_connected(void *ctx)
    2385             : {
    2386          59 :         struct nvme_ctrlr_channel *ctrlr_ch = ctx;
    2387          59 :         struct nvme_qpair *nvme_qpair = ctrlr_ch->qpair;
    2388             :         struct spdk_nvme_qpair *qpair;
    2389             : 
    2390          59 :         if (ctrlr_ch->reset_iter == NULL) {
    2391             :                 /* qpair was already failed to connect and the reset sequence is being aborted. */
    2392           0 :                 assert(ctrlr_ch->connect_poller == NULL);
    2393           0 :                 assert(nvme_qpair->qpair == NULL);
    2394             : 
    2395           0 :                 NVME_CTRLR_INFOLOG(nvme_qpair->ctrlr,
    2396             :                                    "qpair was already failed to connect. reset is being aborted.\n");
    2397           0 :                 return SPDK_POLLER_BUSY;
    2398             :         }
    2399             : 
    2400          59 :         qpair = nvme_qpair->qpair;
    2401          59 :         assert(qpair != NULL);
    2402             : 
    2403          59 :         if (!spdk_nvme_qpair_is_connected(qpair)) {
    2404           0 :                 return SPDK_POLLER_BUSY;
    2405             :         }
    2406             : 
    2407          59 :         NVME_CTRLR_INFOLOG(nvme_qpair->ctrlr, "qpair %p:%u was connected.\n",
    2408             :                            qpair, spdk_nvme_qpair_get_id(qpair));
    2409             : 
    2410          59 :         spdk_poller_unregister(&ctrlr_ch->connect_poller);
    2411             : 
    2412             :         /* qpair was completed to connect. Move to the next ctrlr_channel */
    2413          59 :         nvme_ctrlr_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
    2414          59 :         ctrlr_ch->reset_iter = NULL;
    2415             : 
    2416          59 :         if (!g_opts.disable_auto_failback) {
    2417          42 :                 _bdev_nvme_clear_io_path_cache(nvme_qpair);
    2418          42 :         }
    2419             : 
    2420          59 :         return SPDK_POLLER_BUSY;
    2421          59 : }
    2422             : 
    2423             : static void
    2424          59 : bdev_nvme_reset_create_qpair(struct nvme_ctrlr_channel_iter *i,
    2425             :                              struct nvme_ctrlr *nvme_ctrlr,
    2426             :                              struct nvme_ctrlr_channel *ctrlr_ch,
    2427             :                              void *ctx)
    2428             : {
    2429          59 :         struct nvme_qpair *nvme_qpair = ctrlr_ch->qpair;
    2430             :         struct spdk_nvme_qpair *qpair;
    2431          59 :         int rc = 0;
    2432             : 
    2433          59 :         if (nvme_qpair->qpair == NULL) {
    2434          59 :                 rc = bdev_nvme_create_qpair(nvme_qpair);
    2435          59 :         }
    2436          59 :         if (rc == 0) {
    2437          59 :                 ctrlr_ch->connect_poller = SPDK_POLLER_REGISTER(bdev_nvme_reset_check_qpair_connected,
    2438             :                                            ctrlr_ch, 0);
    2439             : 
    2440          59 :                 qpair = nvme_qpair->qpair;
    2441             : 
    2442          59 :                 NVME_CTRLR_INFOLOG(nvme_ctrlr, "Start checking qpair %p:%u to be connected.\n",
    2443             :                                    qpair, spdk_nvme_qpair_get_id(qpair));
    2444             : 
    2445             :                 /* The current full reset sequence will move to the next
    2446             :                  * ctrlr_channel after the qpair is actually connected.
    2447             :                  */
    2448          59 :                 assert(ctrlr_ch->reset_iter == NULL);
    2449          59 :                 ctrlr_ch->reset_iter = i;
    2450          59 :         } else {
    2451           0 :                 nvme_ctrlr_for_each_channel_continue(i, rc);
    2452             :         }
    2453          59 : }
    2454             : 
    2455             : static void
    2456          35 : nvme_ctrlr_check_namespaces(struct nvme_ctrlr *nvme_ctrlr)
    2457             : {
    2458          35 :         struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
    2459             :         struct nvme_ns *nvme_ns;
    2460             : 
    2461          56 :         for (nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    2462          56 :              nvme_ns != NULL;
    2463          21 :              nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns)) {
    2464          21 :                 if (!spdk_nvme_ctrlr_is_active_ns(ctrlr, nvme_ns->id)) {
    2465           1 :                         SPDK_DEBUGLOG(bdev_nvme, "NSID %u was removed during reset.\n", nvme_ns->id);
    2466             :                         /* NS can be added again. Just nullify nvme_ns->ns. */
    2467           1 :                         nvme_ns->ns = NULL;
    2468           1 :                 }
    2469          21 :         }
    2470          35 : }
    2471             : 
    2472             : 
    2473             : static int
    2474          69 : bdev_nvme_reconnect_ctrlr_poll(void *arg)
    2475             : {
    2476          69 :         struct nvme_ctrlr *nvme_ctrlr = arg;
    2477             :         struct spdk_nvme_transport_id *trid;
    2478          69 :         int rc = -ETIMEDOUT;
    2479             : 
    2480          69 :         if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
    2481             :                 /* Mark the ctrlr as failed. The next call to
    2482             :                  * spdk_nvme_ctrlr_reconnect_poll_async() will then
    2483             :                  * do the necessary cleanup and return failure.
    2484             :                  */
    2485           2 :                 spdk_nvme_ctrlr_fail(nvme_ctrlr->ctrlr);
    2486           2 :         }
    2487             : 
    2488          69 :         rc = spdk_nvme_ctrlr_reconnect_poll_async(nvme_ctrlr->ctrlr);
    2489          69 :         if (rc == -EAGAIN) {
    2490           0 :                 return SPDK_POLLER_BUSY;
    2491             :         }
    2492             : 
    2493          69 :         spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
    2494          69 :         if (rc == 0) {
    2495          35 :                 trid = &nvme_ctrlr->active_path_id->trid;
    2496             : 
    2497          35 :                 if (spdk_nvme_trtype_is_fabrics(trid->trtype)) {
    2498          35 :                         NVME_CTRLR_INFOLOG(nvme_ctrlr, "ctrlr was connected to %s:%s. Create qpairs.\n",
    2499             :                                            trid->traddr, trid->trsvcid);
    2500          35 :                 } else {
    2501           0 :                         NVME_CTRLR_INFOLOG(nvme_ctrlr, "ctrlr was connected. Create qpairs.\n");
    2502             :                 }
    2503             : 
    2504          35 :                 nvme_ctrlr_check_namespaces(nvme_ctrlr);
    2505             : 
    2506             :                 /* Recreate all of the I/O queue pairs */
    2507          35 :                 nvme_ctrlr_for_each_channel(nvme_ctrlr,
    2508             :                                             bdev_nvme_reset_create_qpair,
    2509             :                                             NULL,
    2510             :                                             bdev_nvme_reset_create_qpairs_done);
    2511          35 :         } else {
    2512          34 :                 NVME_CTRLR_INFOLOG(nvme_ctrlr, "ctrlr could not be connected.\n");
    2513             : 
    2514          34 :                 bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
    2515             :         }
    2516          69 :         return SPDK_POLLER_BUSY;
    2517          69 : }
    2518             : 
    2519             : static void
    2520          69 : bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    2521             : {
    2522          69 :         NVME_CTRLR_INFOLOG(nvme_ctrlr, "Start reconnecting ctrlr.\n");
    2523             : 
    2524          69 :         spdk_nvme_ctrlr_reconnect_async(nvme_ctrlr->ctrlr);
    2525             : 
    2526             :         SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect, nvme_ctrlr->nbdev_ctrlr->name);
    2527          69 :         assert(nvme_ctrlr->reset_detach_poller == NULL);
    2528          69 :         nvme_ctrlr->reset_detach_poller = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_ctrlr_poll,
    2529             :                                           nvme_ctrlr, 0);
    2530          69 : }
    2531             : 
    2532             : static void
    2533          60 : bdev_nvme_reset_destroy_qpair_done(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
    2534             : {
    2535             :         SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reset, nvme_ctrlr->nbdev_ctrlr->name);
    2536          60 :         assert(status == 0);
    2537             : 
    2538          60 :         NVME_CTRLR_INFOLOG(nvme_ctrlr, "qpairs were deleted.\n");
    2539             : 
    2540          60 :         if (nvme_ctrlr->destruct) {
    2541           4 :                 bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, status == 0);
    2542           4 :                 return;
    2543             :         }
    2544             : 
    2545          56 :         if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
    2546           0 :                 bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
    2547           0 :         } else {
    2548          56 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reconnect_ctrlr);
    2549             :         }
    2550          60 : }
    2551             : 
    2552             : static void
    2553          60 : bdev_nvme_reset_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
    2554             : {
    2555          60 :         NVME_CTRLR_INFOLOG(nvme_ctrlr, "Delete qpairs for reset.\n");
    2556             : 
    2557          60 :         nvme_ctrlr_for_each_channel(nvme_ctrlr,
    2558             :                                     bdev_nvme_reset_destroy_qpair,
    2559             :                                     NULL,
    2560             :                                     bdev_nvme_reset_destroy_qpair_done);
    2561          60 : }
    2562             : 
    2563             : static void
    2564           3 : bdev_nvme_reconnect_ctrlr_now(void *ctx)
    2565             : {
    2566           3 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    2567             : 
    2568           3 :         assert(nvme_ctrlr->resetting == true);
    2569           3 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2570             : 
    2571           3 :         spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
    2572             : 
    2573           3 :         spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
    2574             : 
    2575           3 :         bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
    2576           3 : }
    2577             : 
    2578             : static void
    2579          60 : _bdev_nvme_reset_ctrlr(void *ctx)
    2580             : {
    2581          60 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    2582             : 
    2583          60 :         assert(nvme_ctrlr->resetting == true);
    2584          60 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2585             : 
    2586          60 :         if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
    2587           0 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reset_destroy_qpairs);
    2588           0 :         } else {
    2589          60 :                 bdev_nvme_reset_destroy_qpairs(nvme_ctrlr);
    2590             :         }
    2591          60 : }
    2592             : 
    2593             : static int
    2594          50 : bdev_nvme_reset_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, spdk_msg_fn *msg_fn)
    2595             : {
    2596          50 :         if (nvme_ctrlr->resetting) {
    2597          14 :                 NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Unable to perform reset, already in progress.\n");
    2598          14 :                 return -EBUSY;
    2599             :         }
    2600             : 
    2601          36 :         if (nvme_ctrlr->disabled) {
    2602           1 :                 NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Unable to perform reset. Controller is disabled.\n");
    2603           1 :                 return -EALREADY;
    2604             :         }
    2605             : 
    2606          35 :         nvme_ctrlr->resetting = true;
    2607          35 :         nvme_ctrlr->dont_retry = true;
    2608             : 
    2609          35 :         if (nvme_ctrlr->reconnect_is_delayed) {
    2610           1 :                 NVME_CTRLR_INFOLOG(nvme_ctrlr, "Reconnect is already scheduled.\n");
    2611           1 :                 *msg_fn = bdev_nvme_reconnect_ctrlr_now;
    2612           1 :                 nvme_ctrlr->reconnect_is_delayed = false;
    2613           1 :         } else {
    2614          34 :                 *msg_fn = _bdev_nvme_reset_ctrlr;
    2615          34 :                 assert(nvme_ctrlr->reset_start_tsc == 0);
    2616             :         }
    2617             : 
    2618          35 :         nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
    2619             : 
    2620          35 :         return 0;
    2621          50 : }
    2622             : 
    2623             : static int
    2624          24 : bdev_nvme_reset_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    2625             : {
    2626             :         spdk_msg_fn msg_fn;
    2627             :         int rc;
    2628             : 
    2629          24 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2630          24 :         rc = bdev_nvme_reset_ctrlr_unsafe(nvme_ctrlr, &msg_fn);
    2631          24 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2632             : 
    2633          24 :         if (rc == 0) {
    2634          22 :                 spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
    2635          22 :         }
    2636             : 
    2637          24 :         return rc;
    2638             : }
    2639             : 
    2640             : static int
    2641           3 : bdev_nvme_enable_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    2642             : {
    2643           3 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2644           3 :         if (nvme_ctrlr->destruct) {
    2645           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2646           0 :                 return -ENXIO;
    2647             :         }
    2648             : 
    2649           3 :         if (nvme_ctrlr->resetting) {
    2650           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2651           0 :                 return -EBUSY;
    2652             :         }
    2653             : 
    2654           3 :         if (!nvme_ctrlr->disabled) {
    2655           1 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2656           1 :                 return -EALREADY;
    2657             :         }
    2658             : 
    2659           2 :         nvme_ctrlr->disabled = false;
    2660           2 :         nvme_ctrlr->resetting = true;
    2661             : 
    2662           2 :         nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
    2663             : 
    2664           2 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2665             : 
    2666           2 :         spdk_thread_send_msg(nvme_ctrlr->thread, bdev_nvme_reconnect_ctrlr_now, nvme_ctrlr);
    2667           2 :         return 0;
    2668           3 : }
    2669             : 
    2670             : static void
    2671           2 : bdev_nvme_disable_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr)
    2672             : {
    2673           2 :         bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
    2674           2 :         void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
    2675             :         enum bdev_nvme_op_after_reset op_after_disable;
    2676             : 
    2677           2 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2678             : 
    2679           2 :         nvme_ctrlr->ctrlr_op_cb_fn = NULL;
    2680           2 :         nvme_ctrlr->ctrlr_op_cb_arg = NULL;
    2681             : 
    2682           2 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2683             : 
    2684           2 :         nvme_ctrlr->resetting = false;
    2685           2 :         nvme_ctrlr->dont_retry = false;
    2686             : 
    2687           2 :         op_after_disable = bdev_nvme_check_op_after_reset(nvme_ctrlr, true);
    2688             : 
    2689           2 :         nvme_ctrlr->disabled = true;
    2690           2 :         spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);
    2691             : 
    2692             :         /* Make sure we clear any pending resets before returning. */
    2693           2 :         bdev_nvme_complete_pending_resets(nvme_ctrlr, true);
    2694             : 
    2695           2 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2696             : 
    2697           2 :         if (ctrlr_op_cb_fn) {
    2698           0 :                 ctrlr_op_cb_fn(ctrlr_op_cb_arg, 0);
    2699           0 :         }
    2700             : 
    2701           2 :         switch (op_after_disable) {
    2702             :         case OP_COMPLETE_PENDING_DESTRUCT:
    2703           0 :                 nvme_ctrlr_unregister(nvme_ctrlr);
    2704           0 :                 break;
    2705             :         default:
    2706           2 :                 break;
    2707             :         }
    2708           2 : }
    2709             : 
    2710             : static void
    2711           1 : bdev_nvme_disable_destroy_qpairs_done(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
    2712             : {
    2713           1 :         assert(status == 0);
    2714             : 
    2715           1 :         if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
    2716           0 :                 bdev_nvme_disable_ctrlr_complete(nvme_ctrlr);
    2717           0 :         } else {
    2718           1 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_disable_ctrlr_complete);
    2719             :         }
    2720           1 : }
    2721             : 
    2722             : static void
    2723           1 : bdev_nvme_disable_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
    2724             : {
    2725           1 :         nvme_ctrlr_for_each_channel(nvme_ctrlr,
    2726             :                                     bdev_nvme_reset_destroy_qpair,
    2727             :                                     NULL,
    2728             :                                     bdev_nvme_disable_destroy_qpairs_done);
    2729           1 : }
    2730             : 
    2731             : static void
    2732           1 : _bdev_nvme_cancel_reconnect_and_disable_ctrlr(void *ctx)
    2733             : {
    2734           1 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    2735             : 
    2736           1 :         assert(nvme_ctrlr->resetting == true);
    2737           1 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2738             : 
    2739           1 :         spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
    2740             : 
    2741           1 :         bdev_nvme_disable_ctrlr_complete(nvme_ctrlr);
    2742           1 : }
    2743             : 
    2744             : static void
    2745           1 : _bdev_nvme_disconnect_and_disable_ctrlr(void *ctx)
    2746             : {
    2747           1 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    2748             : 
    2749           1 :         assert(nvme_ctrlr->resetting == true);
    2750           1 :         assert(nvme_ctrlr->thread == spdk_get_thread());
    2751             : 
    2752           1 :         if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
    2753           0 :                 nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_disable_destroy_qpairs);
    2754           0 :         } else {
    2755           1 :                 bdev_nvme_disable_destroy_qpairs(nvme_ctrlr);
    2756             :         }
    2757           1 : }
    2758             : 
    2759             : static int
    2760           5 : bdev_nvme_disable_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    2761             : {
    2762             :         spdk_msg_fn msg_fn;
    2763             : 
    2764           5 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    2765           5 :         if (nvme_ctrlr->destruct) {
    2766           1 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2767           1 :                 return -ENXIO;
    2768             :         }
    2769             : 
    2770           4 :         if (nvme_ctrlr->resetting) {
    2771           1 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2772           1 :                 return -EBUSY;
    2773             :         }
    2774             : 
    2775           3 :         if (nvme_ctrlr->disabled) {
    2776           1 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2777           1 :                 return -EALREADY;
    2778             :         }
    2779             : 
    2780           2 :         nvme_ctrlr->resetting = true;
    2781           2 :         nvme_ctrlr->dont_retry = true;
    2782             : 
    2783           2 :         if (nvme_ctrlr->reconnect_is_delayed) {
    2784           1 :                 msg_fn = _bdev_nvme_cancel_reconnect_and_disable_ctrlr;
    2785           1 :                 nvme_ctrlr->reconnect_is_delayed = false;
    2786           1 :         } else {
    2787           1 :                 msg_fn = _bdev_nvme_disconnect_and_disable_ctrlr;
    2788             :         }
    2789             : 
    2790           2 :         nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
    2791             : 
    2792           2 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    2793             : 
    2794           2 :         spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
    2795           2 :         return 0;
    2796           5 : }
    2797             : 
    2798             : static int
    2799           6 : nvme_ctrlr_op(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op,
    2800             :               bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
    2801             : {
    2802             :         int rc;
    2803             : 
    2804           6 :         switch (op) {
    2805             :         case NVME_CTRLR_OP_RESET:
    2806           5 :                 rc = bdev_nvme_reset_ctrlr(nvme_ctrlr);
    2807           5 :                 break;
    2808             :         case NVME_CTRLR_OP_ENABLE:
    2809           0 :                 rc = bdev_nvme_enable_ctrlr(nvme_ctrlr);
    2810           0 :                 break;
    2811             :         case NVME_CTRLR_OP_DISABLE:
    2812           0 :                 rc = bdev_nvme_disable_ctrlr(nvme_ctrlr);
    2813           0 :                 break;
    2814             :         default:
    2815           1 :                 rc = -EINVAL;
    2816           1 :                 break;
    2817             :         }
    2818             : 
    2819           6 :         if (rc == 0) {
    2820           4 :                 assert(nvme_ctrlr->ctrlr_op_cb_fn == NULL);
    2821           4 :                 assert(nvme_ctrlr->ctrlr_op_cb_arg == NULL);
    2822           4 :                 nvme_ctrlr->ctrlr_op_cb_fn = cb_fn;
    2823           4 :                 nvme_ctrlr->ctrlr_op_cb_arg = cb_arg;
    2824           4 :         }
    2825           6 :         return rc;
    2826             : }
    2827             : 
    2828             : struct nvme_ctrlr_op_rpc_ctx {
    2829             :         struct nvme_ctrlr *nvme_ctrlr;
    2830             :         struct spdk_thread *orig_thread;
    2831             :         enum nvme_ctrlr_op op;
    2832             :         int rc;
    2833             :         bdev_nvme_ctrlr_op_cb cb_fn;
    2834             :         void *cb_arg;
    2835             : };
    2836             : 
    2837             : static void
    2838           4 : _nvme_ctrlr_op_rpc_complete(void *_ctx)
    2839             : {
    2840           4 :         struct nvme_ctrlr_op_rpc_ctx *ctx = _ctx;
    2841             : 
    2842           4 :         assert(ctx != NULL);
    2843           4 :         assert(ctx->cb_fn != NULL);
    2844             : 
    2845           4 :         ctx->cb_fn(ctx->cb_arg, ctx->rc);
    2846             : 
    2847           4 :         free(ctx);
    2848           4 : }
    2849             : 
    2850             : static void
    2851           4 : nvme_ctrlr_op_rpc_complete(void *cb_arg, int rc)
    2852             : {
    2853           4 :         struct nvme_ctrlr_op_rpc_ctx *ctx = cb_arg;
    2854             : 
    2855           4 :         ctx->rc = rc;
    2856             : 
    2857           4 :         spdk_thread_send_msg(ctx->orig_thread, _nvme_ctrlr_op_rpc_complete, ctx);
    2858           4 : }
    2859             : 
    2860             : void
    2861           4 : nvme_ctrlr_op_rpc(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op,
    2862             :                   bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
    2863             : {
    2864             :         struct nvme_ctrlr_op_rpc_ctx *ctx;
    2865             :         int rc;
    2866             : 
    2867           4 :         assert(cb_fn != NULL);
    2868             : 
    2869           4 :         ctx = calloc(1, sizeof(*ctx));
    2870           4 :         if (ctx == NULL) {
    2871           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to allocate nvme_ctrlr_op_rpc_ctx.\n");
    2872           0 :                 cb_fn(cb_arg, -ENOMEM);
    2873           0 :                 return;
    2874             :         }
    2875             : 
    2876           4 :         ctx->orig_thread = spdk_get_thread();
    2877           4 :         ctx->cb_fn = cb_fn;
    2878           4 :         ctx->cb_arg = cb_arg;
    2879             : 
    2880           4 :         rc = nvme_ctrlr_op(nvme_ctrlr, op, nvme_ctrlr_op_rpc_complete, ctx);
    2881           4 :         if (rc == 0) {
    2882           2 :                 return;
    2883           2 :         } else if (rc == -EALREADY) {
    2884           0 :                 rc = 0;
    2885           0 :         }
    2886             : 
    2887           2 :         nvme_ctrlr_op_rpc_complete(ctx, rc);
    2888           4 : }
    2889             : 
    2890             : static void nvme_bdev_ctrlr_op_rpc_continue(void *cb_arg, int rc);
    2891             : 
    2892             : static void
    2893           2 : _nvme_bdev_ctrlr_op_rpc_continue(void *_ctx)
    2894             : {
    2895           2 :         struct nvme_ctrlr_op_rpc_ctx *ctx = _ctx;
    2896             :         struct nvme_ctrlr *prev_nvme_ctrlr, *next_nvme_ctrlr;
    2897             :         int rc;
    2898             : 
    2899           2 :         prev_nvme_ctrlr = ctx->nvme_ctrlr;
    2900           2 :         ctx->nvme_ctrlr = NULL;
    2901             : 
    2902           2 :         if (ctx->rc != 0) {
    2903           0 :                 goto complete;
    2904             :         }
    2905             : 
    2906           2 :         next_nvme_ctrlr = TAILQ_NEXT(prev_nvme_ctrlr, tailq);
    2907           2 :         if (next_nvme_ctrlr == NULL) {
    2908           1 :                 goto complete;
    2909             :         }
    2910             : 
    2911           1 :         rc = nvme_ctrlr_op(next_nvme_ctrlr, ctx->op, nvme_bdev_ctrlr_op_rpc_continue, ctx);
    2912           1 :         if (rc == 0) {
    2913           1 :                 ctx->nvme_ctrlr = next_nvme_ctrlr;
    2914           1 :                 return;
    2915           0 :         } else if (rc == -EALREADY) {
    2916           0 :                 ctx->nvme_ctrlr = next_nvme_ctrlr;
    2917           0 :                 rc = 0;
    2918           0 :         }
    2919             : 
    2920           0 :         ctx->rc = rc;
    2921             : 
    2922             : complete:
    2923           1 :         ctx->cb_fn(ctx->cb_arg, ctx->rc);
    2924           1 :         free(ctx);
    2925           2 : }
    2926             : 
    2927             : static void
    2928           2 : nvme_bdev_ctrlr_op_rpc_continue(void *cb_arg, int rc)
    2929             : {
    2930           2 :         struct nvme_ctrlr_op_rpc_ctx *ctx = cb_arg;
    2931             : 
    2932           2 :         ctx->rc = rc;
    2933             : 
    2934           2 :         spdk_thread_send_msg(ctx->orig_thread, _nvme_bdev_ctrlr_op_rpc_continue, ctx);
    2935           2 : }
    2936             : 
    2937             : void
    2938           1 : nvme_bdev_ctrlr_op_rpc(struct nvme_bdev_ctrlr *nbdev_ctrlr, enum nvme_ctrlr_op op,
    2939             :                        bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
    2940             : {
    2941             :         struct nvme_ctrlr_op_rpc_ctx *ctx;
    2942             :         struct nvme_ctrlr *nvme_ctrlr;
    2943             :         int rc;
    2944             : 
    2945           1 :         assert(cb_fn != NULL);
    2946             : 
    2947           1 :         ctx = calloc(1, sizeof(*ctx));
    2948           1 :         if (ctx == NULL) {
    2949           0 :                 SPDK_ERRLOG("Failed to allocate nvme_ctrlr_op_rpc_ctx.\n");
    2950           0 :                 cb_fn(cb_arg, -ENOMEM);
    2951           0 :                 return;
    2952             :         }
    2953             : 
    2954           1 :         ctx->orig_thread = spdk_get_thread();
    2955           1 :         ctx->op = op;
    2956           1 :         ctx->cb_fn = cb_fn;
    2957           1 :         ctx->cb_arg = cb_arg;
    2958             : 
    2959           1 :         nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
    2960           1 :         assert(nvme_ctrlr != NULL);
    2961             : 
    2962           1 :         rc = nvme_ctrlr_op(nvme_ctrlr, op, nvme_bdev_ctrlr_op_rpc_continue, ctx);
    2963           1 :         if (rc == 0) {
    2964           1 :                 ctx->nvme_ctrlr = nvme_ctrlr;
    2965           1 :                 return;
    2966           0 :         } else if (rc == -EALREADY) {
    2967           0 :                 ctx->nvme_ctrlr = nvme_ctrlr;
    2968           0 :                 rc = 0;
    2969           0 :         }
    2970             : 
    2971           0 :         nvme_bdev_ctrlr_op_rpc_continue(ctx, rc);
    2972           1 : }
    2973             : 
    2974             : static int _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio);
    2975             : 
    2976             : static void
    2977          16 : bdev_nvme_unfreeze_bdev_channel_done(struct nvme_bdev *nbdev, void *ctx, int status)
    2978             : {
    2979          16 :         struct nvme_bdev_io *bio = ctx;
    2980             :         enum spdk_bdev_io_status io_status;
    2981             : 
    2982          16 :         if (bio->cpl.cdw0 == 0) {
    2983          12 :                 io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
    2984          12 :         } else {
    2985           4 :                 io_status = SPDK_BDEV_IO_STATUS_FAILED;
    2986             :         }
    2987             : 
    2988          16 :         NVME_BDEV_INFOLOG(nbdev, "reset_io %p completed, status:%d\n", bio, io_status);
    2989             : 
    2990          16 :         __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), io_status, NULL);
    2991          16 : }
    2992             : 
    2993             : static void
    2994          32 : bdev_nvme_unfreeze_bdev_channel(struct nvme_bdev_channel_iter *i,
    2995             :                                 struct nvme_bdev *nbdev,
    2996             :                                 struct nvme_bdev_channel *nbdev_ch, void *ctx)
    2997             : {
    2998          32 :         bdev_nvme_abort_retry_ios(nbdev_ch);
    2999          32 :         nbdev_ch->resetting = false;
    3000             : 
    3001          32 :         nvme_bdev_for_each_channel_continue(i, 0);
    3002          32 : }
    3003             : 
    3004             : static void
    3005          16 : bdev_nvme_reset_io_complete(struct nvme_bdev_io *bio)
    3006             : {
    3007          16 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    3008          16 :         struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt;
    3009             : 
    3010             :         /* Abort all queued I/Os for retry. */
    3011          32 :         nvme_bdev_for_each_channel(nbdev,
    3012             :                                    bdev_nvme_unfreeze_bdev_channel,
    3013          16 :                                    bio,
    3014             :                                    bdev_nvme_unfreeze_bdev_channel_done);
    3015          16 : }
    3016             : 
    3017             : static void
    3018          26 : _bdev_nvme_reset_io_continue(void *ctx)
    3019             : {
    3020          26 :         struct nvme_bdev_io *bio = ctx;
    3021             :         struct nvme_io_path *prev_io_path, *next_io_path;
    3022             :         int rc;
    3023             : 
    3024          26 :         prev_io_path = bio->io_path;
    3025          26 :         bio->io_path = NULL;
    3026             : 
    3027          26 :         next_io_path = STAILQ_NEXT(prev_io_path, stailq);
    3028          26 :         if (next_io_path == NULL) {
    3029          16 :                 goto complete;
    3030             :         }
    3031             : 
    3032          10 :         rc = _bdev_nvme_reset_io(next_io_path, bio);
    3033          10 :         if (rc == 0) {
    3034          10 :                 return;
    3035             :         }
    3036             : 
    3037             : complete:
    3038          16 :         bdev_nvme_reset_io_complete(bio);
    3039          26 : }
    3040             : 
    3041             : static void
    3042          26 : bdev_nvme_reset_io_continue(void *cb_arg, int rc)
    3043             : {
    3044          26 :         struct nvme_bdev_io *bio = cb_arg;
    3045          26 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    3046          26 :         struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt;
    3047             : 
    3048          26 :         NVME_BDEV_INFOLOG(nbdev, "continue reset_io %p, rc:%d\n", bio, rc);
    3049             : 
    3050             :         /* Reset status is initialized as "failed". Set to "success" once we have at least one
    3051             :          * successfully reset nvme_ctrlr.
    3052             :          */
    3053          26 :         if (rc == 0) {
    3054          16 :                 bio->cpl.cdw0 = 0;
    3055          16 :         }
    3056             : 
    3057          26 :         spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), _bdev_nvme_reset_io_continue, bio);
    3058          26 : }
    3059             : 
    3060             : static int
    3061          26 : _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio)
    3062             : {
    3063          26 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    3064          26 :         struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt;
    3065          26 :         struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
    3066             :         spdk_msg_fn msg_fn;
    3067             :         int rc;
    3068             : 
    3069          26 :         assert(bio->io_path == NULL);
    3070          26 :         bio->io_path = io_path;
    3071             : 
    3072          26 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    3073          26 :         rc = bdev_nvme_reset_ctrlr_unsafe(nvme_ctrlr, &msg_fn);
    3074          26 :         if (rc == -EBUSY) {
    3075             :                 /*
    3076             :                  * Reset call is queued only if it is from the app framework. This is on purpose so that
    3077             :                  * we don't interfere with the app framework reset strategy. i.e. we are deferring to the
    3078             :                  * upper level. If they are in the middle of a reset, we won't try to schedule another one.
    3079             :                  */
    3080          12 :                 TAILQ_INSERT_TAIL(&nvme_ctrlr->pending_resets, bio, retry_link);
    3081          12 :         }
    3082          26 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    3083             : 
    3084          26 :         if (rc == 0) {
    3085          13 :                 assert(nvme_ctrlr->ctrlr_op_cb_fn == NULL);
    3086          13 :                 assert(nvme_ctrlr->ctrlr_op_cb_arg == NULL);
    3087          13 :                 nvme_ctrlr->ctrlr_op_cb_fn = bdev_nvme_reset_io_continue;
    3088          13 :                 nvme_ctrlr->ctrlr_op_cb_arg = bio;
    3089             : 
    3090          13 :                 spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
    3091             : 
    3092          13 :                 NVME_BDEV_INFOLOG(nbdev, "reset_io %p started resetting ctrlr [%s, %u].\n",
    3093             :                                   bio, CTRLR_STRING(nvme_ctrlr), CTRLR_ID(nvme_ctrlr));
    3094          26 :         } else if (rc == -EBUSY) {
    3095          12 :                 rc = 0;
    3096             : 
    3097          12 :                 NVME_BDEV_INFOLOG(nbdev, "reset_io %p was queued to ctrlr [%s, %u].\n",
    3098             :                                   bio, CTRLR_STRING(nvme_ctrlr), CTRLR_ID(nvme_ctrlr));
    3099          12 :         } else {
    3100           1 :                 NVME_BDEV_INFOLOG(nbdev, "reset_io %p could not reset ctrlr [%s, %u], rc:%d\n",
    3101             :                                   bio, CTRLR_STRING(nvme_ctrlr), CTRLR_ID(nvme_ctrlr), rc);
    3102             :         }
    3103             : 
    3104          26 :         return rc;
    3105             : }
    3106             : 
    3107             : static void
    3108          16 : bdev_nvme_freeze_bdev_channel_done(struct nvme_bdev *nbdev, void *ctx, int status)
    3109             : {
    3110          16 :         struct nvme_bdev_io *bio = ctx;
    3111          16 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    3112             :         struct nvme_bdev_channel *nbdev_ch;
    3113             :         struct nvme_io_path *io_path;
    3114             :         int rc;
    3115             : 
    3116          16 :         nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
    3117             : 
    3118             :         /* Initialize with failed status. With multipath it is enough to have at least one successful
    3119             :          * nvme_ctrlr reset. If there is none, reset status will remain failed.
    3120             :          */
    3121          16 :         bio->cpl.cdw0 = 1;
    3122             : 
    3123             :         /* Reset all nvme_ctrlrs of a bdev controller sequentially. */
    3124          16 :         io_path = STAILQ_FIRST(&nbdev_ch->io_path_list);
    3125          16 :         assert(io_path != NULL);
    3126             : 
    3127          16 :         rc = _bdev_nvme_reset_io(io_path, bio);
    3128          16 :         if (rc != 0) {
    3129             :                 /* If the current nvme_ctrlr is disabled, skip it and move to the next nvme_ctrlr. */
    3130           1 :                 rc = (rc == -EALREADY) ? 0 : rc;
    3131             : 
    3132           1 :                 bdev_nvme_reset_io_continue(bio, rc);
    3133           1 :         }
    3134          16 : }
    3135             : 
    3136             : static void
    3137          30 : bdev_nvme_freeze_bdev_channel(struct nvme_bdev_channel_iter *i,
    3138             :                               struct nvme_bdev *nbdev,
    3139             :                               struct nvme_bdev_channel *nbdev_ch, void *ctx)
    3140             : {
    3141          30 :         nbdev_ch->resetting = true;
    3142             : 
    3143          30 :         nvme_bdev_for_each_channel_continue(i, 0);
    3144          30 : }
    3145             : 
    3146             : static void
    3147          15 : bdev_nvme_reset_io(struct nvme_bdev *nbdev, struct nvme_bdev_io *bio)
    3148             : {
    3149          15 :         NVME_BDEV_INFOLOG(nbdev, "reset_io %p started.\n", bio);
    3150             : 
    3151          30 :         nvme_bdev_for_each_channel(nbdev,
    3152             :                                    bdev_nvme_freeze_bdev_channel,
    3153          15 :                                    bio,
    3154             :                                    bdev_nvme_freeze_bdev_channel_done);
    3155          15 : }
    3156             : 
    3157             : static int
    3158          35 : bdev_nvme_failover_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, bool remove)
    3159             : {
    3160          35 :         if (nvme_ctrlr->destruct) {
    3161             :                 /* Don't bother resetting if the controller is in the process of being destructed. */
    3162           5 :                 return -ENXIO;
    3163             :         }
    3164             : 
    3165          30 :         if (nvme_ctrlr->resetting) {
    3166           3 :                 if (!nvme_ctrlr->in_failover) {
    3167           3 :                         NVME_CTRLR_NOTICELOG(nvme_ctrlr,
    3168             :                                              "Reset is already in progress. Defer failover until reset completes.\n");
    3169             : 
    3170             :                         /* Defer failover until reset completes. */
    3171           3 :                         nvme_ctrlr->pending_failover = true;
    3172           3 :                         return -EINPROGRESS;
    3173             :                 } else {
    3174           0 :                         NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Unable to perform failover, already in progress.\n");
    3175           0 :                         return -EBUSY;
    3176             :                 }
    3177             :         }
    3178             : 
    3179          27 :         bdev_nvme_failover_trid(nvme_ctrlr, remove, true);
    3180             : 
    3181          27 :         if (nvme_ctrlr->reconnect_is_delayed) {
    3182           1 :                 NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Reconnect is already scheduled.\n");
    3183             : 
    3184             :                 /* We rely on the next reconnect for the failover. */
    3185           1 :                 return -EALREADY;
    3186             :         }
    3187             : 
    3188          26 :         if (nvme_ctrlr->disabled) {
    3189           0 :                 NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Controller is disabled.\n");
    3190             : 
    3191             :                 /* We rely on the enablement for the failover. */
    3192           0 :                 return -EALREADY;
    3193             :         }
    3194             : 
    3195          26 :         nvme_ctrlr->resetting = true;
    3196          26 :         nvme_ctrlr->in_failover = true;
    3197             : 
    3198          26 :         assert(nvme_ctrlr->reset_start_tsc == 0);
    3199          26 :         nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
    3200             : 
    3201          26 :         return 0;
    3202          35 : }
    3203             : 
    3204             : static int
    3205          33 : bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
    3206             : {
    3207             :         int rc;
    3208             : 
    3209          33 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    3210          33 :         rc = bdev_nvme_failover_ctrlr_unsafe(nvme_ctrlr, false);
    3211          33 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    3212             : 
    3213          33 :         if (rc == 0) {
    3214          25 :                 spdk_thread_send_msg(nvme_ctrlr->thread, _bdev_nvme_reset_ctrlr, nvme_ctrlr);
    3215          33 :         } else if (rc == -EALREADY) {
    3216           0 :                 rc = 0;
    3217           0 :         }
    3218             : 
    3219          33 :         return rc;
    3220             : }
    3221             : 
    3222             : static int bdev_nvme_unmap(struct nvme_bdev_io *bio, uint64_t offset_blocks,
    3223             :                            uint64_t num_blocks);
    3224             : 
    3225             : static int bdev_nvme_write_zeroes(struct nvme_bdev_io *bio, uint64_t offset_blocks,
    3226             :                                   uint64_t num_blocks);
    3227             : 
    3228             : static int bdev_nvme_copy(struct nvme_bdev_io *bio, uint64_t dst_offset_blocks,
    3229             :                           uint64_t src_offset_blocks,
    3230             :                           uint64_t num_blocks);
    3231             : 
    3232             : static void
    3233           1 : bdev_nvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
    3234             :                      bool success)
    3235             : {
    3236           1 :         struct nvme_bdev_io *bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
    3237             :         int ret;
    3238             : 
    3239           1 :         if (!success) {
    3240           0 :                 ret = -EINVAL;
    3241           0 :                 goto exit;
    3242             :         }
    3243             : 
    3244           1 :         if (spdk_unlikely(!nvme_io_path_is_available(bio->io_path))) {
    3245           0 :                 ret = -ENXIO;
    3246           0 :                 goto exit;
    3247             :         }
    3248             : 
    3249           2 :         ret = bdev_nvme_readv(bio,
    3250           1 :                               bdev_io->u.bdev.iovs,
    3251           1 :                               bdev_io->u.bdev.iovcnt,
    3252           1 :                               bdev_io->u.bdev.md_buf,
    3253           1 :                               bdev_io->u.bdev.num_blocks,
    3254           1 :                               bdev_io->u.bdev.offset_blocks,
    3255           1 :                               bdev_io->u.bdev.dif_check_flags,
    3256           1 :                               bdev_io->u.bdev.memory_domain,
    3257           1 :                               bdev_io->u.bdev.memory_domain_ctx,
    3258           1 :                               bdev_io->u.bdev.accel_sequence);
    3259             : 
    3260             : exit:
    3261           1 :         if (spdk_unlikely(ret != 0)) {
    3262           0 :                 bdev_nvme_io_complete(bio, ret);
    3263           0 :         }
    3264           1 : }
    3265             : 
    3266             : static inline void
    3267          59 : _bdev_nvme_submit_request(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io)
    3268             : {
    3269          59 :         struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
    3270          59 :         struct spdk_bdev *bdev = bdev_io->bdev;
    3271             :         struct nvme_bdev_io *nbdev_io_to_abort;
    3272          59 :         int rc = 0;
    3273             : 
    3274          59 :         switch (bdev_io->type) {
    3275             :         case SPDK_BDEV_IO_TYPE_READ:
    3276           3 :                 if (bdev_io->u.bdev.iovs && bdev_io->u.bdev.iovs[0].iov_base) {
    3277             : 
    3278           4 :                         rc = bdev_nvme_readv(nbdev_io,
    3279           2 :                                              bdev_io->u.bdev.iovs,
    3280           2 :                                              bdev_io->u.bdev.iovcnt,
    3281           2 :                                              bdev_io->u.bdev.md_buf,
    3282           2 :                                              bdev_io->u.bdev.num_blocks,
    3283           2 :                                              bdev_io->u.bdev.offset_blocks,
    3284           2 :                                              bdev_io->u.bdev.dif_check_flags,
    3285           2 :                                              bdev_io->u.bdev.memory_domain,
    3286           2 :                                              bdev_io->u.bdev.memory_domain_ctx,
    3287           2 :                                              bdev_io->u.bdev.accel_sequence);
    3288           2 :                 } else {
    3289           2 :                         spdk_bdev_io_get_buf(bdev_io, bdev_nvme_get_buf_cb,
    3290           1 :                                              bdev_io->u.bdev.num_blocks * bdev->blocklen);
    3291           1 :                         rc = 0;
    3292             :                 }
    3293           3 :                 break;
    3294             :         case SPDK_BDEV_IO_TYPE_WRITE:
    3295          50 :                 rc = bdev_nvme_writev(nbdev_io,
    3296          25 :                                       bdev_io->u.bdev.iovs,
    3297          25 :                                       bdev_io->u.bdev.iovcnt,
    3298          25 :                                       bdev_io->u.bdev.md_buf,
    3299          25 :                                       bdev_io->u.bdev.num_blocks,
    3300          25 :                                       bdev_io->u.bdev.offset_blocks,
    3301          25 :                                       bdev_io->u.bdev.dif_check_flags,
    3302          25 :                                       bdev_io->u.bdev.memory_domain,
    3303          25 :                                       bdev_io->u.bdev.memory_domain_ctx,
    3304          25 :                                       bdev_io->u.bdev.accel_sequence,
    3305          25 :                                       bdev_io->u.bdev.nvme_cdw12,
    3306          25 :                                       bdev_io->u.bdev.nvme_cdw13);
    3307          25 :                 break;
    3308             :         case SPDK_BDEV_IO_TYPE_COMPARE:
    3309           2 :                 rc = bdev_nvme_comparev(nbdev_io,
    3310           1 :                                         bdev_io->u.bdev.iovs,
    3311           1 :                                         bdev_io->u.bdev.iovcnt,
    3312           1 :                                         bdev_io->u.bdev.md_buf,
    3313           1 :                                         bdev_io->u.bdev.num_blocks,
    3314           1 :                                         bdev_io->u.bdev.offset_blocks,
    3315           1 :                                         bdev_io->u.bdev.dif_check_flags);
    3316           1 :                 break;
    3317             :         case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
    3318           4 :                 rc = bdev_nvme_comparev_and_writev(nbdev_io,
    3319           2 :                                                    bdev_io->u.bdev.iovs,
    3320           2 :                                                    bdev_io->u.bdev.iovcnt,
    3321           2 :                                                    bdev_io->u.bdev.fused_iovs,
    3322           2 :                                                    bdev_io->u.bdev.fused_iovcnt,
    3323           2 :                                                    bdev_io->u.bdev.md_buf,
    3324           2 :                                                    bdev_io->u.bdev.num_blocks,
    3325           2 :                                                    bdev_io->u.bdev.offset_blocks,
    3326           2 :                                                    bdev_io->u.bdev.dif_check_flags);
    3327           2 :                 break;
    3328             :         case SPDK_BDEV_IO_TYPE_UNMAP:
    3329           2 :                 rc = bdev_nvme_unmap(nbdev_io,
    3330           1 :                                      bdev_io->u.bdev.offset_blocks,
    3331           1 :                                      bdev_io->u.bdev.num_blocks);
    3332           1 :                 break;
    3333             :         case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
    3334           0 :                 rc =  bdev_nvme_write_zeroes(nbdev_io,
    3335           0 :                                              bdev_io->u.bdev.offset_blocks,
    3336           0 :                                              bdev_io->u.bdev.num_blocks);
    3337           0 :                 break;
    3338             :         case SPDK_BDEV_IO_TYPE_RESET:
    3339          15 :                 nbdev_io->io_path = NULL;
    3340          15 :                 bdev_nvme_reset_io(bdev->ctxt, nbdev_io);
    3341          15 :                 return;
    3342             : 
    3343             :         case SPDK_BDEV_IO_TYPE_FLUSH:
    3344           1 :                 bdev_nvme_io_complete(nbdev_io, 0);
    3345           1 :                 return;
    3346             : 
    3347             :         case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
    3348           0 :                 rc = bdev_nvme_zone_appendv(nbdev_io,
    3349           0 :                                             bdev_io->u.bdev.iovs,
    3350           0 :                                             bdev_io->u.bdev.iovcnt,
    3351           0 :                                             bdev_io->u.bdev.md_buf,
    3352           0 :                                             bdev_io->u.bdev.num_blocks,
    3353           0 :                                             bdev_io->u.bdev.offset_blocks,
    3354           0 :                                             bdev_io->u.bdev.dif_check_flags);
    3355           0 :                 break;
    3356             :         case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
    3357           0 :                 rc = bdev_nvme_get_zone_info(nbdev_io,
    3358           0 :                                              bdev_io->u.zone_mgmt.zone_id,
    3359           0 :                                              bdev_io->u.zone_mgmt.num_zones,
    3360           0 :                                              bdev_io->u.zone_mgmt.buf);
    3361           0 :                 break;
    3362             :         case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
    3363           0 :                 rc = bdev_nvme_zone_management(nbdev_io,
    3364           0 :                                                bdev_io->u.zone_mgmt.zone_id,
    3365           0 :                                                bdev_io->u.zone_mgmt.zone_action);
    3366           0 :                 break;
    3367             :         case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
    3368           5 :                 nbdev_io->io_path = NULL;
    3369          10 :                 bdev_nvme_admin_passthru(nbdev_ch,
    3370           5 :                                          nbdev_io,
    3371           5 :                                          &bdev_io->u.nvme_passthru.cmd,
    3372           5 :                                          bdev_io->u.nvme_passthru.buf,
    3373           5 :                                          bdev_io->u.nvme_passthru.nbytes);
    3374           5 :                 return;
    3375             : 
    3376             :         case SPDK_BDEV_IO_TYPE_NVME_IO:
    3377           0 :                 rc = bdev_nvme_io_passthru(nbdev_io,
    3378           0 :                                            &bdev_io->u.nvme_passthru.cmd,
    3379           0 :                                            bdev_io->u.nvme_passthru.buf,
    3380           0 :                                            bdev_io->u.nvme_passthru.nbytes);
    3381           0 :                 break;
    3382             :         case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
    3383           0 :                 rc = bdev_nvme_io_passthru_md(nbdev_io,
    3384           0 :                                               &bdev_io->u.nvme_passthru.cmd,
    3385           0 :                                               bdev_io->u.nvme_passthru.buf,
    3386           0 :                                               bdev_io->u.nvme_passthru.nbytes,
    3387           0 :                                               bdev_io->u.nvme_passthru.md_buf,
    3388           0 :                                               bdev_io->u.nvme_passthru.md_len);
    3389           0 :                 break;
    3390             :         case SPDK_BDEV_IO_TYPE_NVME_IOV_MD:
    3391           0 :                 rc = bdev_nvme_iov_passthru_md(nbdev_io,
    3392           0 :                                                &bdev_io->u.nvme_passthru.cmd,
    3393           0 :                                                bdev_io->u.nvme_passthru.iovs,
    3394           0 :                                                bdev_io->u.nvme_passthru.iovcnt,
    3395           0 :                                                bdev_io->u.nvme_passthru.nbytes,
    3396           0 :                                                bdev_io->u.nvme_passthru.md_buf,
    3397           0 :                                                bdev_io->u.nvme_passthru.md_len);
    3398           0 :                 break;
    3399             :         case SPDK_BDEV_IO_TYPE_ABORT:
    3400           6 :                 nbdev_io->io_path = NULL;
    3401           6 :                 nbdev_io_to_abort = (struct nvme_bdev_io *)bdev_io->u.abort.bio_to_abort->driver_ctx;
    3402          12 :                 bdev_nvme_abort(nbdev_ch,
    3403           6 :                                 nbdev_io,
    3404           6 :                                 nbdev_io_to_abort);
    3405           6 :                 return;
    3406             : 
    3407             :         case SPDK_BDEV_IO_TYPE_COPY:
    3408           0 :                 rc = bdev_nvme_copy(nbdev_io,
    3409           0 :                                     bdev_io->u.bdev.offset_blocks,
    3410           0 :                                     bdev_io->u.bdev.copy.src_offset_blocks,
    3411           0 :                                     bdev_io->u.bdev.num_blocks);
    3412           0 :                 break;
    3413             :         default:
    3414           0 :                 rc = -EINVAL;
    3415           0 :                 break;
    3416             :         }
    3417             : 
    3418          32 :         if (spdk_unlikely(rc != 0)) {
    3419           0 :                 bdev_nvme_io_complete(nbdev_io, rc);
    3420           0 :         }
    3421          59 : }
    3422             : 
    3423             : static void
    3424          68 : bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
    3425             : {
    3426          68 :         struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
    3427          68 :         struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
    3428             : 
    3429          68 :         if (spdk_likely(nbdev_io->submit_tsc == 0)) {
    3430          68 :                 nbdev_io->submit_tsc = spdk_bdev_io_get_submit_tsc(bdev_io);
    3431          68 :         } else {
    3432             :                 /* There are cases where submit_tsc != 0, i.e. retry I/O.
    3433             :                  * We need to update submit_tsc here.
    3434             :                  */
    3435           0 :                 nbdev_io->submit_tsc = spdk_get_ticks();
    3436             :         }
    3437             : 
    3438          68 :         spdk_trace_record(TRACE_BDEV_NVME_IO_START, 0, 0, (uintptr_t)nbdev_io, (uintptr_t)bdev_io);
    3439          68 :         nbdev_io->io_path = bdev_nvme_find_io_path(nbdev_ch);
    3440          68 :         if (spdk_unlikely(!nbdev_io->io_path)) {
    3441          13 :                 if (!bdev_nvme_io_type_is_admin(bdev_io->type)) {
    3442          12 :                         bdev_nvme_io_complete(nbdev_io, -ENXIO);
    3443          12 :                         return;
    3444             :                 }
    3445             : 
    3446             :                 /* Admin commands do not use the optimal I/O path.
    3447             :                  * Simply fall through even if it is not found.
    3448             :                  */
    3449           1 :         }
    3450             : 
    3451          56 :         _bdev_nvme_submit_request(nbdev_ch, bdev_io);
    3452          68 : }
    3453             : 
    3454             : static bool
    3455           0 : bdev_nvme_is_supported_csi(enum spdk_nvme_csi csi)
    3456             : {
    3457           0 :         switch (csi) {
    3458             :         case SPDK_NVME_CSI_NVM:
    3459           0 :                 return true;
    3460             :         case SPDK_NVME_CSI_ZNS:
    3461           0 :                 return true;
    3462             :         default:
    3463           0 :                 return false;
    3464             :         }
    3465           0 : }
    3466             : 
    3467             : static bool
    3468           0 : bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
    3469             : {
    3470           0 :         struct nvme_bdev *nbdev = ctx;
    3471             :         struct nvme_ns *nvme_ns;
    3472             :         struct spdk_nvme_ns *ns;
    3473             :         struct spdk_nvme_ctrlr *ctrlr;
    3474             :         const struct spdk_nvme_ctrlr_data *cdata;
    3475             : 
    3476           0 :         nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
    3477           0 :         assert(nvme_ns != NULL);
    3478           0 :         ns = nvme_ns->ns;
    3479           0 :         if (ns == NULL) {
    3480           0 :                 return false;
    3481             :         }
    3482             : 
    3483           0 :         if (!bdev_nvme_is_supported_csi(spdk_nvme_ns_get_csi(ns))) {
    3484           0 :                 switch (io_type) {
    3485             :                 case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
    3486             :                 case SPDK_BDEV_IO_TYPE_NVME_IO:
    3487           0 :                         return true;
    3488             : 
    3489             :                 case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
    3490           0 :                         return spdk_nvme_ns_get_md_size(ns) ? true : false;
    3491             : 
    3492             :                 default:
    3493           0 :                         return false;
    3494             :                 }
    3495             :         }
    3496             : 
    3497           0 :         ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    3498             : 
    3499           0 :         switch (io_type) {
    3500             :         case SPDK_BDEV_IO_TYPE_READ:
    3501             :         case SPDK_BDEV_IO_TYPE_WRITE:
    3502             :         case SPDK_BDEV_IO_TYPE_RESET:
    3503             :         case SPDK_BDEV_IO_TYPE_FLUSH:
    3504             :         case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
    3505             :         case SPDK_BDEV_IO_TYPE_NVME_IO:
    3506             :         case SPDK_BDEV_IO_TYPE_ABORT:
    3507           0 :                 return true;
    3508             : 
    3509             :         case SPDK_BDEV_IO_TYPE_COMPARE:
    3510           0 :                 return spdk_nvme_ns_supports_compare(ns);
    3511             : 
    3512             :         case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
    3513           0 :                 return spdk_nvme_ns_get_md_size(ns) ? true : false;
    3514             : 
    3515             :         case SPDK_BDEV_IO_TYPE_UNMAP:
    3516           0 :                 cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    3517           0 :                 return cdata->oncs.dsm;
    3518             : 
    3519             :         case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
    3520           0 :                 cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    3521           0 :                 return cdata->oncs.write_zeroes;
    3522             : 
    3523             :         case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
    3524           0 :                 if (spdk_nvme_ctrlr_get_flags(ctrlr) &
    3525             :                     SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED) {
    3526           0 :                         return true;
    3527             :                 }
    3528           0 :                 return false;
    3529             : 
    3530             :         case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
    3531             :         case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
    3532           0 :                 return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS;
    3533             : 
    3534             :         case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
    3535           0 :                 return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS &&
    3536           0 :                        spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED;
    3537             : 
    3538             :         case SPDK_BDEV_IO_TYPE_COPY:
    3539           0 :                 cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    3540           0 :                 return cdata->oncs.copy;
    3541             : 
    3542             :         default:
    3543           0 :                 return false;
    3544             :         }
    3545           0 : }
    3546             : 
    3547             : static int
    3548          61 : nvme_qpair_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ctrlr_channel *ctrlr_ch)
    3549             : {
    3550             :         struct nvme_qpair *nvme_qpair;
    3551             :         struct spdk_io_channel *pg_ch;
    3552             :         int rc;
    3553             : 
    3554          61 :         nvme_qpair = calloc(1, sizeof(*nvme_qpair));
    3555          61 :         if (!nvme_qpair) {
    3556           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to alloc nvme_qpair.\n");
    3557           0 :                 return -1;
    3558             :         }
    3559             : 
    3560          61 :         TAILQ_INIT(&nvme_qpair->io_path_list);
    3561             : 
    3562          61 :         nvme_qpair->ctrlr = nvme_ctrlr;
    3563          61 :         nvme_qpair->ctrlr_ch = ctrlr_ch;
    3564             : 
    3565          61 :         pg_ch = spdk_get_io_channel(&g_nvme_bdev_ctrlrs);
    3566          61 :         if (!pg_ch) {
    3567           0 :                 free(nvme_qpair);
    3568           0 :                 return -1;
    3569             :         }
    3570             : 
    3571          61 :         nvme_qpair->group = spdk_io_channel_get_ctx(pg_ch);
    3572             : 
    3573             : #ifdef SPDK_CONFIG_VTUNE
    3574             :         nvme_qpair->group->collect_spin_stat = true;
    3575             : #else
    3576          61 :         nvme_qpair->group->collect_spin_stat = false;
    3577             : #endif
    3578             : 
    3579          61 :         if (!nvme_ctrlr->disabled) {
    3580             :                 /* If a nvme_ctrlr is disabled, don't try to create qpair for it. Qpair will
    3581             :                  * be created when it's enabled.
    3582             :                  */
    3583          61 :                 rc = bdev_nvme_create_qpair(nvme_qpair);
    3584          61 :                 if (rc != 0) {
    3585             :                         /* nvme_ctrlr can't create IO qpair if connection is down.
    3586             :                          * If reconnect_delay_sec is non-zero, creating IO qpair is retried
    3587             :                          * after reconnect_delay_sec seconds. If bdev_retry_count is non-zero,
    3588             :                          * submitted IO will be queued until IO qpair is successfully created.
    3589             :                          *
    3590             :                          * Hence, if both are satisfied, ignore the failure.
    3591             :                          */
    3592           0 :                         if (nvme_ctrlr->opts.reconnect_delay_sec == 0 || g_opts.bdev_retry_count == 0) {
    3593           0 :                                 spdk_put_io_channel(pg_ch);
    3594           0 :                                 free(nvme_qpair);
    3595           0 :                                 return rc;
    3596             :                         }
    3597           0 :                 }
    3598          61 :         }
    3599             : 
    3600          61 :         TAILQ_INSERT_TAIL(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);
    3601             : 
    3602          61 :         ctrlr_ch->qpair = nvme_qpair;
    3603             : 
    3604          61 :         nvme_ctrlr_get_ref(nvme_ctrlr);
    3605             : 
    3606          61 :         return 0;
    3607          61 : }
    3608             : 
    3609             : static int
    3610          61 : bdev_nvme_create_ctrlr_channel_cb(void *io_device, void *ctx_buf)
    3611             : {
    3612          61 :         struct nvme_ctrlr *nvme_ctrlr = io_device;
    3613          61 :         struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf;
    3614             : 
    3615          61 :         return nvme_qpair_create(nvme_ctrlr, ctrlr_ch);
    3616             : }
    3617             : 
    3618             : static void
    3619          61 : nvme_qpair_delete(struct nvme_qpair *nvme_qpair)
    3620             : {
    3621             :         struct nvme_io_path *io_path, *next;
    3622             : 
    3623          61 :         assert(nvme_qpair->group != NULL);
    3624             : 
    3625         100 :         TAILQ_FOREACH_SAFE(io_path, &nvme_qpair->io_path_list, tailq, next) {
    3626          39 :                 TAILQ_REMOVE(&nvme_qpair->io_path_list, io_path, tailq);
    3627          39 :                 nvme_io_path_free(io_path);
    3628          39 :         }
    3629             : 
    3630          61 :         TAILQ_REMOVE(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);
    3631             : 
    3632          61 :         spdk_put_io_channel(spdk_io_channel_from_ctx(nvme_qpair->group));
    3633             : 
    3634          61 :         nvme_ctrlr_put_ref(nvme_qpair->ctrlr);
    3635             : 
    3636          61 :         free(nvme_qpair);
    3637          61 : }
    3638             : 
    3639             : static void
    3640          61 : bdev_nvme_destroy_ctrlr_channel_cb(void *io_device, void *ctx_buf)
    3641             : {
    3642          61 :         struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf;
    3643             :         struct nvme_qpair *nvme_qpair;
    3644             : 
    3645          61 :         nvme_qpair = ctrlr_ch->qpair;
    3646          61 :         assert(nvme_qpair != NULL);
    3647             : 
    3648          61 :         _bdev_nvme_clear_io_path_cache(nvme_qpair);
    3649             : 
    3650          61 :         if (nvme_qpair->qpair != NULL) {
    3651             :                 /* Always try to disconnect the qpair, even if a reset is in progress.
    3652             :                  * The qpair may have been created after the reset process started.
    3653             :                  */
    3654          45 :                 spdk_nvme_ctrlr_disconnect_io_qpair(nvme_qpair->qpair);
    3655          45 :                 if (ctrlr_ch->reset_iter) {
    3656             :                         /* Skip current ctrlr_channel in a full reset sequence because
    3657             :                          * it is being deleted now.
    3658             :                          */
    3659           0 :                         nvme_ctrlr_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
    3660           0 :                 }
    3661             : 
    3662             :                 /* We cannot release a reference to the poll group now.
    3663             :                  * The qpair may be disconnected asynchronously later.
    3664             :                  * We need to poll it until it is actually disconnected.
    3665             :                  * Just detach the qpair from the deleting ctrlr_channel.
    3666             :                  */
    3667          45 :                 nvme_qpair->ctrlr_ch = NULL;
    3668          45 :         } else {
    3669          16 :                 assert(ctrlr_ch->reset_iter == NULL);
    3670             : 
    3671          16 :                 nvme_qpair_delete(nvme_qpair);
    3672             :         }
    3673          61 : }
    3674             : 
    3675             : static inline struct spdk_io_channel *
    3676           0 : bdev_nvme_get_accel_channel(struct nvme_poll_group *group)
    3677             : {
    3678           0 :         if (spdk_unlikely(!group->accel_channel)) {
    3679           0 :                 group->accel_channel = spdk_accel_get_io_channel();
    3680           0 :                 if (!group->accel_channel) {
    3681           0 :                         SPDK_ERRLOG("Cannot get the accel_channel for bdev nvme polling group=%p\n",
    3682             :                                     group);
    3683           0 :                         return NULL;
    3684             :                 }
    3685           0 :         }
    3686             : 
    3687           0 :         return group->accel_channel;
    3688           0 : }
    3689             : 
    3690             : static void
    3691           0 : bdev_nvme_finish_sequence(void *seq, spdk_nvme_accel_completion_cb cb_fn, void *cb_arg)
    3692             : {
    3693           0 :         spdk_accel_sequence_finish(seq, cb_fn, cb_arg);
    3694           0 : }
    3695             : 
    3696             : static void
    3697           0 : bdev_nvme_abort_sequence(void *seq)
    3698             : {
    3699           0 :         spdk_accel_sequence_abort(seq);
    3700           0 : }
    3701             : 
    3702             : static void
    3703           0 : bdev_nvme_reverse_sequence(void *seq)
    3704             : {
    3705           0 :         spdk_accel_sequence_reverse(seq);
    3706           0 : }
    3707             : 
    3708             : static int
    3709           0 : bdev_nvme_append_crc32c(void *ctx, void **seq, uint32_t *dst, struct iovec *iovs, uint32_t iovcnt,
    3710             :                         struct spdk_memory_domain *domain, void *domain_ctx, uint32_t seed,
    3711             :                         spdk_nvme_accel_step_cb cb_fn, void *cb_arg)
    3712             : {
    3713             :         struct spdk_io_channel *ch;
    3714           0 :         struct nvme_poll_group *group = ctx;
    3715             : 
    3716           0 :         ch = bdev_nvme_get_accel_channel(group);
    3717           0 :         if (spdk_unlikely(ch == NULL)) {
    3718           0 :                 return -ENOMEM;
    3719             :         }
    3720             : 
    3721           0 :         return spdk_accel_append_crc32c((struct spdk_accel_sequence **)seq, ch, dst, iovs, iovcnt,
    3722           0 :                                         domain, domain_ctx, seed, cb_fn, cb_arg);
    3723           0 : }
    3724             : 
    3725             : static int
    3726           0 : bdev_nvme_append_copy(void *ctx, void **seq, struct iovec *dst_iovs, uint32_t dst_iovcnt,
    3727             :                       struct spdk_memory_domain *dst_domain, void *dst_domain_ctx,
    3728             :                       struct iovec *src_iovs, uint32_t src_iovcnt,
    3729             :                       struct spdk_memory_domain *src_domain, void *src_domain_ctx,
    3730             :                       spdk_nvme_accel_step_cb cb_fn, void *cb_arg)
    3731             : {
    3732             :         struct spdk_io_channel *ch;
    3733           0 :         struct nvme_poll_group *group = ctx;
    3734             : 
    3735           0 :         ch = bdev_nvme_get_accel_channel(group);
    3736           0 :         if (spdk_unlikely(ch == NULL)) {
    3737           0 :                 return -ENOMEM;
    3738             :         }
    3739             : 
    3740           0 :         return spdk_accel_append_copy((struct spdk_accel_sequence **)seq, ch,
    3741           0 :                                       dst_iovs, dst_iovcnt, dst_domain, dst_domain_ctx,
    3742           0 :                                       src_iovs, src_iovcnt, src_domain, src_domain_ctx,
    3743           0 :                                       cb_fn, cb_arg);
    3744           0 : }
    3745             : 
    3746             : static struct spdk_nvme_accel_fn_table g_bdev_nvme_accel_fn_table = {
    3747             :         .table_size             = sizeof(struct spdk_nvme_accel_fn_table),
    3748             :         .append_crc32c          = bdev_nvme_append_crc32c,
    3749             :         .append_copy            = bdev_nvme_append_copy,
    3750             :         .finish_sequence        = bdev_nvme_finish_sequence,
    3751             :         .reverse_sequence       = bdev_nvme_reverse_sequence,
    3752             :         .abort_sequence         = bdev_nvme_abort_sequence,
    3753             : };
    3754             : 
    3755             : static void
    3756           0 : bdev_nvme_poll_group_interrupt_cb(struct spdk_nvme_poll_group *group, void *ctx)
    3757             : {
    3758           0 :         bdev_nvme_poll(ctx);
    3759           0 : }
    3760             : 
    3761             : static int
    3762          46 : bdev_nvme_create_poll_group_cb(void *io_device, void *ctx_buf)
    3763             : {
    3764          46 :         struct nvme_poll_group *group = ctx_buf;
    3765             :         struct spdk_fd_group *fgrp;
    3766             :         uint64_t period;
    3767             :         int rc;
    3768             : 
    3769          46 :         TAILQ_INIT(&group->qpair_list);
    3770             : 
    3771          46 :         group->group = spdk_nvme_poll_group_create(group, &g_bdev_nvme_accel_fn_table);
    3772          46 :         if (group->group == NULL) {
    3773           0 :                 return -1;
    3774             :         }
    3775             : 
    3776          46 :         period = spdk_interrupt_mode_is_enabled() ? 0 : g_opts.nvme_ioq_poll_period_us;
    3777          46 :         group->poller = SPDK_POLLER_REGISTER(bdev_nvme_poll, group, period);
    3778             : 
    3779          46 :         if (group->poller == NULL) {
    3780           0 :                 spdk_nvme_poll_group_destroy(group->group);
    3781           0 :                 return -1;
    3782             :         }
    3783             : 
    3784          46 :         if (spdk_interrupt_mode_is_enabled()) {
    3785           0 :                 spdk_poller_register_interrupt(group->poller, NULL, NULL);
    3786             : 
    3787           0 :                 fgrp = spdk_nvme_poll_group_get_fd_group(group->group);
    3788           0 :                 if (fgrp == NULL) {
    3789           0 :                         spdk_nvme_poll_group_destroy(group->group);
    3790           0 :                         return -1;
    3791             :                 }
    3792             : 
    3793           0 :                 rc = spdk_nvme_poll_group_set_interrupt_callback(group->group,
    3794           0 :                                 bdev_nvme_poll_group_interrupt_cb, group);
    3795           0 :                 if (rc != 0) {
    3796           0 :                         spdk_nvme_poll_group_destroy(group->group);
    3797           0 :                         return -1;
    3798             :                 }
    3799             : 
    3800           0 :                 group->intr = spdk_interrupt_register_fd_group(fgrp, "bdev_nvme_interrupt");
    3801           0 :                 if (!group->intr) {
    3802           0 :                         spdk_nvme_poll_group_destroy(group->group);
    3803           0 :                         return -1;
    3804             :                 }
    3805           0 :         }
    3806             : 
    3807          46 :         return 0;
    3808          46 : }
    3809             : 
    3810             : static void
    3811          46 : bdev_nvme_destroy_poll_group_cb(void *io_device, void *ctx_buf)
    3812             : {
    3813          46 :         struct nvme_poll_group *group = ctx_buf;
    3814             : 
    3815          46 :         assert(TAILQ_EMPTY(&group->qpair_list));
    3816             : 
    3817          46 :         if (group->accel_channel) {
    3818           0 :                 spdk_put_io_channel(group->accel_channel);
    3819           0 :         }
    3820             : 
    3821          46 :         if (spdk_interrupt_mode_is_enabled()) {
    3822           0 :                 spdk_interrupt_unregister(&group->intr);
    3823           0 :         }
    3824             : 
    3825          46 :         spdk_poller_unregister(&group->poller);
    3826          46 :         if (spdk_nvme_poll_group_destroy(group->group)) {
    3827           0 :                 SPDK_ERRLOG("Unable to destroy a poll group for the NVMe bdev module.\n");
    3828           0 :                 assert(false);
    3829             :         }
    3830          46 : }
    3831             : 
    3832             : static struct spdk_io_channel *
    3833           0 : bdev_nvme_get_io_channel(void *ctx)
    3834             : {
    3835           0 :         struct nvme_bdev *nbdev = ctx;
    3836             : 
    3837           0 :         return spdk_get_io_channel(nbdev);
    3838             : }
    3839             : 
    3840             : static void *
    3841           0 : bdev_nvme_get_module_ctx(void *ctx)
    3842             : {
    3843           0 :         struct nvme_bdev *nbdev = ctx;
    3844             :         struct nvme_ns *nvme_ns;
    3845             : 
    3846           0 :         if (!nbdev || nbdev->disk.module != &nvme_if) {
    3847           0 :                 return NULL;
    3848             :         }
    3849             : 
    3850           0 :         nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
    3851           0 :         if (!nvme_ns) {
    3852           0 :                 return NULL;
    3853             :         }
    3854             : 
    3855           0 :         return nvme_ns->ns;
    3856           0 : }
    3857             : 
    3858             : static const char *
    3859           0 : _nvme_ana_state_str(enum spdk_nvme_ana_state ana_state)
    3860             : {
    3861           0 :         switch (ana_state) {
    3862             :         case SPDK_NVME_ANA_OPTIMIZED_STATE:
    3863           0 :                 return "optimized";
    3864             :         case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
    3865           0 :                 return "non_optimized";
    3866             :         case SPDK_NVME_ANA_INACCESSIBLE_STATE:
    3867           0 :                 return "inaccessible";
    3868             :         case SPDK_NVME_ANA_PERSISTENT_LOSS_STATE:
    3869           0 :                 return "persistent_loss";
    3870             :         case SPDK_NVME_ANA_CHANGE_STATE:
    3871           0 :                 return "change";
    3872             :         default:
    3873           0 :                 return NULL;
    3874             :         }
    3875           0 : }
    3876             : 
    3877             : static int
    3878           8 : bdev_nvme_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size)
    3879             : {
    3880           8 :         struct spdk_memory_domain **_domains = NULL;
    3881           8 :         struct nvme_bdev *nbdev = ctx;
    3882             :         struct nvme_ns *nvme_ns;
    3883           8 :         int i = 0, _array_size = array_size;
    3884           8 :         int rc = 0;
    3885             : 
    3886          22 :         TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
    3887          14 :                 if (domains && array_size >= i) {
    3888          11 :                         _domains = &domains[i];
    3889          11 :                 } else {
    3890           3 :                         _domains = NULL;
    3891             :                 }
    3892          14 :                 rc = spdk_nvme_ctrlr_get_memory_domains(nvme_ns->ctrlr->ctrlr, _domains, _array_size);
    3893          14 :                 if (rc > 0) {
    3894          13 :                         i += rc;
    3895          13 :                         if (_array_size >= rc) {
    3896           9 :                                 _array_size -= rc;
    3897           9 :                         } else {
    3898           4 :                                 _array_size = 0;
    3899             :                         }
    3900          14 :                 } else if (rc < 0) {
    3901           0 :                         return rc;
    3902             :                 }
    3903          14 :         }
    3904             : 
    3905           8 :         return i;
    3906           8 : }
    3907             : 
    3908             : static const char *
    3909           0 : nvme_ctrlr_get_state_str(struct nvme_ctrlr *nvme_ctrlr)
    3910             : {
    3911           0 :         if (nvme_ctrlr->destruct) {
    3912           0 :                 return "deleting";
    3913           0 :         } else if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
    3914           0 :                 return "failed";
    3915           0 :         } else if (nvme_ctrlr->resetting) {
    3916           0 :                 return "resetting";
    3917           0 :         } else if (nvme_ctrlr->reconnect_is_delayed > 0) {
    3918           0 :                 return "reconnect_is_delayed";
    3919           0 :         } else if (nvme_ctrlr->disabled) {
    3920           0 :                 return "disabled";
    3921             :         } else {
    3922           0 :                 return "enabled";
    3923             :         }
    3924           0 : }
    3925             : 
    3926             : void
    3927           0 : nvme_ctrlr_info_json(struct spdk_json_write_ctx *w, struct nvme_ctrlr *nvme_ctrlr)
    3928             : {
    3929             :         struct spdk_nvme_transport_id *trid;
    3930             :         const struct spdk_nvme_ctrlr_opts *opts;
    3931             :         const struct spdk_nvme_ctrlr_data *cdata;
    3932             :         struct nvme_path_id *path_id;
    3933             :         int32_t numa_id;
    3934             : 
    3935           0 :         spdk_json_write_object_begin(w);
    3936             : 
    3937           0 :         spdk_json_write_named_string(w, "state", nvme_ctrlr_get_state_str(nvme_ctrlr));
    3938             : 
    3939             : #ifdef SPDK_CONFIG_NVME_CUSE
    3940             :         size_t cuse_name_size = 128;
    3941             :         char cuse_name[cuse_name_size];
    3942             : 
    3943             :         int rc = spdk_nvme_cuse_get_ctrlr_name(nvme_ctrlr->ctrlr, cuse_name, &cuse_name_size);
    3944             :         if (rc == 0) {
    3945             :                 spdk_json_write_named_string(w, "cuse_device", cuse_name);
    3946             :         }
    3947             : #endif
    3948           0 :         trid = &nvme_ctrlr->active_path_id->trid;
    3949           0 :         spdk_json_write_named_object_begin(w, "trid");
    3950           0 :         nvme_bdev_dump_trid_json(trid, w);
    3951           0 :         spdk_json_write_object_end(w);
    3952             : 
    3953           0 :         path_id = TAILQ_NEXT(nvme_ctrlr->active_path_id, link);
    3954           0 :         if (path_id != NULL) {
    3955           0 :                 spdk_json_write_named_array_begin(w, "alternate_trids");
    3956           0 :                 do {
    3957           0 :                         trid = &path_id->trid;
    3958           0 :                         spdk_json_write_object_begin(w);
    3959           0 :                         nvme_bdev_dump_trid_json(trid, w);
    3960           0 :                         spdk_json_write_object_end(w);
    3961             : 
    3962           0 :                         path_id = TAILQ_NEXT(path_id, link);
    3963           0 :                 } while (path_id != NULL);
    3964           0 :                 spdk_json_write_array_end(w);
    3965           0 :         }
    3966             : 
    3967           0 :         cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
    3968           0 :         spdk_json_write_named_uint16(w, "cntlid", cdata->cntlid);
    3969             : 
    3970           0 :         opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
    3971           0 :         spdk_json_write_named_object_begin(w, "host");
    3972           0 :         spdk_json_write_named_string(w, "nqn", opts->hostnqn);
    3973           0 :         spdk_json_write_named_string(w, "addr", opts->src_addr);
    3974           0 :         spdk_json_write_named_string(w, "svcid", opts->src_svcid);
    3975           0 :         spdk_json_write_object_end(w);
    3976             : 
    3977           0 :         numa_id = spdk_nvme_ctrlr_get_numa_id(nvme_ctrlr->ctrlr);
    3978           0 :         if (numa_id != SPDK_ENV_NUMA_ID_ANY) {
    3979           0 :                 spdk_json_write_named_uint32(w, "numa_id", numa_id);
    3980           0 :         }
    3981           0 :         spdk_json_write_object_end(w);
    3982           0 : }
    3983             : 
    3984             : static void
    3985           0 : nvme_namespace_info_json(struct spdk_json_write_ctx *w,
    3986             :                          struct nvme_ns *nvme_ns)
    3987             : {
    3988             :         struct spdk_nvme_ns *ns;
    3989             :         struct spdk_nvme_ctrlr *ctrlr;
    3990             :         const struct spdk_nvme_ctrlr_data *cdata;
    3991             :         const struct spdk_nvme_transport_id *trid;
    3992             :         union spdk_nvme_vs_register vs;
    3993             :         const struct spdk_nvme_ns_data *nsdata;
    3994             :         char buf[128];
    3995             : 
    3996           0 :         ns = nvme_ns->ns;
    3997           0 :         if (ns == NULL) {
    3998           0 :                 return;
    3999             :         }
    4000             : 
    4001           0 :         ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    4002             : 
    4003           0 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    4004           0 :         trid = spdk_nvme_ctrlr_get_transport_id(ctrlr);
    4005           0 :         vs = spdk_nvme_ctrlr_get_regs_vs(ctrlr);
    4006             : 
    4007           0 :         spdk_json_write_object_begin(w);
    4008             : 
    4009           0 :         if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
    4010           0 :                 spdk_json_write_named_string(w, "pci_address", trid->traddr);
    4011           0 :         }
    4012             : 
    4013           0 :         spdk_json_write_named_object_begin(w, "trid");
    4014             : 
    4015           0 :         nvme_bdev_dump_trid_json(trid, w);
    4016             : 
    4017           0 :         spdk_json_write_object_end(w);
    4018             : 
    4019             : #ifdef SPDK_CONFIG_NVME_CUSE
    4020             :         size_t cuse_name_size = 128;
    4021             :         char cuse_name[cuse_name_size];
    4022             : 
    4023             :         int rc = spdk_nvme_cuse_get_ns_name(ctrlr, spdk_nvme_ns_get_id(ns),
    4024             :                                             cuse_name, &cuse_name_size);
    4025             :         if (rc == 0) {
    4026             :                 spdk_json_write_named_string(w, "cuse_device", cuse_name);
    4027             :         }
    4028             : #endif
    4029             : 
    4030           0 :         spdk_json_write_named_object_begin(w, "ctrlr_data");
    4031             : 
    4032           0 :         spdk_json_write_named_uint16(w, "cntlid", cdata->cntlid);
    4033             : 
    4034           0 :         spdk_json_write_named_string_fmt(w, "vendor_id", "0x%04x", cdata->vid);
    4035             : 
    4036           0 :         snprintf(buf, sizeof(cdata->mn) + 1, "%s", cdata->mn);
    4037           0 :         spdk_str_trim(buf);
    4038           0 :         spdk_json_write_named_string(w, "model_number", buf);
    4039             : 
    4040           0 :         snprintf(buf, sizeof(cdata->sn) + 1, "%s", cdata->sn);
    4041           0 :         spdk_str_trim(buf);
    4042           0 :         spdk_json_write_named_string(w, "serial_number", buf);
    4043             : 
    4044           0 :         snprintf(buf, sizeof(cdata->fr) + 1, "%s", cdata->fr);
    4045           0 :         spdk_str_trim(buf);
    4046           0 :         spdk_json_write_named_string(w, "firmware_revision", buf);
    4047             : 
    4048           0 :         if (cdata->subnqn[0] != '\0') {
    4049           0 :                 spdk_json_write_named_string(w, "subnqn", cdata->subnqn);
    4050           0 :         }
    4051             : 
    4052           0 :         spdk_json_write_named_object_begin(w, "oacs");
    4053             : 
    4054           0 :         spdk_json_write_named_uint32(w, "security", cdata->oacs.security);
    4055           0 :         spdk_json_write_named_uint32(w, "format", cdata->oacs.format);
    4056           0 :         spdk_json_write_named_uint32(w, "firmware", cdata->oacs.firmware);
    4057           0 :         spdk_json_write_named_uint32(w, "ns_manage", cdata->oacs.ns_manage);
    4058             : 
    4059           0 :         spdk_json_write_object_end(w);
    4060             : 
    4061           0 :         spdk_json_write_named_bool(w, "multi_ctrlr", cdata->cmic.multi_ctrlr);
    4062           0 :         spdk_json_write_named_bool(w, "ana_reporting", cdata->cmic.ana_reporting);
    4063             : 
    4064           0 :         spdk_json_write_object_end(w);
    4065             : 
    4066           0 :         spdk_json_write_named_object_begin(w, "vs");
    4067             : 
    4068           0 :         spdk_json_write_name(w, "nvme_version");
    4069           0 :         if (vs.bits.ter) {
    4070           0 :                 spdk_json_write_string_fmt(w, "%u.%u.%u", vs.bits.mjr, vs.bits.mnr, vs.bits.ter);
    4071           0 :         } else {
    4072           0 :                 spdk_json_write_string_fmt(w, "%u.%u", vs.bits.mjr, vs.bits.mnr);
    4073             :         }
    4074             : 
    4075           0 :         spdk_json_write_object_end(w);
    4076             : 
    4077           0 :         nsdata = spdk_nvme_ns_get_data(ns);
    4078             : 
    4079           0 :         spdk_json_write_named_object_begin(w, "ns_data");
    4080             : 
    4081           0 :         spdk_json_write_named_uint32(w, "id", spdk_nvme_ns_get_id(ns));
    4082             : 
    4083           0 :         if (cdata->cmic.ana_reporting) {
    4084           0 :                 spdk_json_write_named_string(w, "ana_state",
    4085           0 :                                              _nvme_ana_state_str(nvme_ns->ana_state));
    4086           0 :         }
    4087             : 
    4088           0 :         spdk_json_write_named_bool(w, "can_share", nsdata->nmic.can_share);
    4089             : 
    4090           0 :         spdk_json_write_object_end(w);
    4091             : 
    4092           0 :         if (cdata->oacs.security) {
    4093           0 :                 spdk_json_write_named_object_begin(w, "security");
    4094             : 
    4095           0 :                 spdk_json_write_named_bool(w, "opal", nvme_ns->bdev->opal);
    4096             : 
    4097           0 :                 spdk_json_write_object_end(w);
    4098           0 :         }
    4099             : 
    4100           0 :         spdk_json_write_object_end(w);
    4101           0 : }
    4102             : 
    4103             : static const char *
    4104           0 : nvme_bdev_get_mp_policy_str(struct nvme_bdev *nbdev)
    4105             : {
    4106           0 :         switch (nbdev->mp_policy) {
    4107             :         case BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE:
    4108           0 :                 return "active_passive";
    4109             :         case BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE:
    4110           0 :                 return "active_active";
    4111             :         default:
    4112           0 :                 assert(false);
    4113             :                 return "invalid";
    4114             :         }
    4115           0 : }
    4116             : 
    4117             : static const char *
    4118           0 : nvme_bdev_get_mp_selector_str(struct nvme_bdev *nbdev)
    4119             : {
    4120           0 :         switch (nbdev->mp_selector) {
    4121             :         case BDEV_NVME_MP_SELECTOR_ROUND_ROBIN:
    4122           0 :                 return "round_robin";
    4123             :         case BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH:
    4124           0 :                 return "queue_depth";
    4125             :         default:
    4126           0 :                 assert(false);
    4127             :                 return "invalid";
    4128             :         }
    4129           0 : }
    4130             : 
    4131             : static int
    4132           0 : bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
    4133             : {
    4134           0 :         struct nvme_bdev *nbdev = ctx;
    4135             :         struct nvme_ns *nvme_ns;
    4136             : 
    4137           0 :         pthread_mutex_lock(&nbdev->mutex);
    4138           0 :         spdk_json_write_named_array_begin(w, "nvme");
    4139           0 :         TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
    4140           0 :                 nvme_namespace_info_json(w, nvme_ns);
    4141           0 :         }
    4142           0 :         spdk_json_write_array_end(w);
    4143           0 :         spdk_json_write_named_string(w, "mp_policy", nvme_bdev_get_mp_policy_str(nbdev));
    4144           0 :         if (nbdev->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
    4145           0 :                 spdk_json_write_named_string(w, "selector", nvme_bdev_get_mp_selector_str(nbdev));
    4146           0 :                 if (nbdev->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
    4147           0 :                         spdk_json_write_named_uint32(w, "rr_min_io", nbdev->rr_min_io);
    4148           0 :                 }
    4149           0 :         }
    4150           0 :         pthread_mutex_unlock(&nbdev->mutex);
    4151             : 
    4152           0 :         return 0;
    4153             : }
    4154             : 
    4155             : static void
    4156           0 : bdev_nvme_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
    4157             : {
    4158             :         /* No config per bdev needed */
    4159           0 : }
    4160             : 
    4161             : static uint64_t
    4162           0 : bdev_nvme_get_spin_time(struct spdk_io_channel *ch)
    4163             : {
    4164           0 :         struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
    4165             :         struct nvme_io_path *io_path;
    4166             :         struct nvme_poll_group *group;
    4167           0 :         uint64_t spin_time = 0;
    4168             : 
    4169           0 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    4170           0 :                 group = io_path->qpair->group;
    4171             : 
    4172           0 :                 if (!group || !group->collect_spin_stat) {
    4173           0 :                         continue;
    4174             :                 }
    4175             : 
    4176           0 :                 if (group->end_ticks != 0) {
    4177           0 :                         group->spin_ticks += (group->end_ticks - group->start_ticks);
    4178           0 :                         group->end_ticks = 0;
    4179           0 :                 }
    4180             : 
    4181           0 :                 spin_time += group->spin_ticks;
    4182           0 :                 group->start_ticks = 0;
    4183           0 :                 group->spin_ticks = 0;
    4184           0 :         }
    4185             : 
    4186           0 :         return (spin_time * 1000000ULL) / spdk_get_ticks_hz();
    4187             : }
    4188             : 
    4189             : static void
    4190           0 : bdev_nvme_reset_device_stat(void *ctx)
    4191             : {
    4192           0 :         struct nvme_bdev *nbdev = ctx;
    4193             : 
    4194           0 :         if (nbdev->err_stat != NULL) {
    4195           0 :                 memset(nbdev->err_stat, 0, sizeof(struct nvme_error_stat));
    4196           0 :         }
    4197           0 : }
    4198             : 
    4199             : /* JSON string should be lowercases and underscore delimited string. */
    4200             : static void
    4201           0 : bdev_nvme_format_nvme_status(char *dst, const char *src)
    4202             : {
    4203             :         char tmp[256];
    4204             : 
    4205           0 :         spdk_strcpy_replace(dst, 256, src, " - ", "_");
    4206           0 :         spdk_strcpy_replace(tmp, 256, dst, "-", "_");
    4207           0 :         spdk_strcpy_replace(dst, 256, tmp, " ", "_");
    4208           0 :         spdk_strlwr(dst);
    4209           0 : }
    4210             : 
    4211             : static void
    4212           0 : bdev_nvme_dump_device_stat_json(void *ctx, struct spdk_json_write_ctx *w)
    4213             : {
    4214           0 :         struct nvme_bdev *nbdev = ctx;
    4215           0 :         struct spdk_nvme_status status = {};
    4216             :         uint16_t sct, sc;
    4217             :         char status_json[256];
    4218             :         const char *status_str;
    4219             : 
    4220           0 :         if (nbdev->err_stat == NULL) {
    4221           0 :                 return;
    4222             :         }
    4223             : 
    4224           0 :         spdk_json_write_named_object_begin(w, "nvme_error");
    4225             : 
    4226           0 :         spdk_json_write_named_object_begin(w, "status_type");
    4227           0 :         for (sct = 0; sct < 8; sct++) {
    4228           0 :                 if (nbdev->err_stat->status_type[sct] == 0) {
    4229           0 :                         continue;
    4230             :                 }
    4231           0 :                 status.sct = sct;
    4232             : 
    4233           0 :                 status_str = spdk_nvme_cpl_get_status_type_string(&status);
    4234           0 :                 assert(status_str != NULL);
    4235           0 :                 bdev_nvme_format_nvme_status(status_json, status_str);
    4236             : 
    4237           0 :                 spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status_type[sct]);
    4238           0 :         }
    4239           0 :         spdk_json_write_object_end(w);
    4240             : 
    4241           0 :         spdk_json_write_named_object_begin(w, "status_code");
    4242           0 :         for (sct = 0; sct < 4; sct++) {
    4243           0 :                 status.sct = sct;
    4244           0 :                 for (sc = 0; sc < 256; sc++) {
    4245           0 :                         if (nbdev->err_stat->status[sct][sc] == 0) {
    4246           0 :                                 continue;
    4247             :                         }
    4248           0 :                         status.sc = sc;
    4249             : 
    4250           0 :                         status_str = spdk_nvme_cpl_get_status_string(&status);
    4251           0 :                         assert(status_str != NULL);
    4252           0 :                         bdev_nvme_format_nvme_status(status_json, status_str);
    4253             : 
    4254           0 :                         spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status[sct][sc]);
    4255           0 :                 }
    4256           0 :         }
    4257           0 :         spdk_json_write_object_end(w);
    4258             : 
    4259           0 :         spdk_json_write_object_end(w);
    4260           0 : }
    4261             : 
    4262             : static bool
    4263           0 : bdev_nvme_accel_sequence_supported(void *ctx, enum spdk_bdev_io_type type)
    4264             : {
    4265           0 :         struct nvme_bdev *nbdev = ctx;
    4266             :         struct nvme_ns *nvme_ns;
    4267             :         struct spdk_nvme_ctrlr *ctrlr;
    4268             : 
    4269           0 :         if (!g_opts.allow_accel_sequence) {
    4270           0 :                 return false;
    4271             :         }
    4272             : 
    4273           0 :         switch (type) {
    4274             :         case SPDK_BDEV_IO_TYPE_WRITE:
    4275             :         case SPDK_BDEV_IO_TYPE_READ:
    4276           0 :                 break;
    4277             :         default:
    4278           0 :                 return false;
    4279             :         }
    4280             : 
    4281           0 :         nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
    4282           0 :         assert(nvme_ns != NULL);
    4283             : 
    4284           0 :         ctrlr = nvme_ns->ctrlr->ctrlr;
    4285           0 :         assert(ctrlr != NULL);
    4286             : 
    4287           0 :         return spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ACCEL_SEQUENCE_SUPPORTED;
    4288           0 : }
    4289             : 
    4290             : static const struct spdk_bdev_fn_table nvmelib_fn_table = {
    4291             :         .destruct                       = bdev_nvme_destruct,
    4292             :         .submit_request                 = bdev_nvme_submit_request,
    4293             :         .io_type_supported              = bdev_nvme_io_type_supported,
    4294             :         .get_io_channel                 = bdev_nvme_get_io_channel,
    4295             :         .dump_info_json                 = bdev_nvme_dump_info_json,
    4296             :         .write_config_json              = bdev_nvme_write_config_json,
    4297             :         .get_spin_time                  = bdev_nvme_get_spin_time,
    4298             :         .get_module_ctx                 = bdev_nvme_get_module_ctx,
    4299             :         .get_memory_domains             = bdev_nvme_get_memory_domains,
    4300             :         .accel_sequence_supported       = bdev_nvme_accel_sequence_supported,
    4301             :         .reset_device_stat              = bdev_nvme_reset_device_stat,
    4302             :         .dump_device_stat_json          = bdev_nvme_dump_device_stat_json,
    4303             : };
    4304             : 
    4305             : typedef int (*bdev_nvme_parse_ana_log_page_cb)(
    4306             :         const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg);
    4307             : 
    4308             : static int
    4309          42 : bdev_nvme_parse_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
    4310             :                              bdev_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg)
    4311             : {
    4312             :         struct spdk_nvme_ana_group_descriptor *copied_desc;
    4313             :         uint8_t *orig_desc;
    4314             :         uint32_t i, desc_size, copy_len;
    4315          42 :         int rc = 0;
    4316             : 
    4317          42 :         if (nvme_ctrlr->ana_log_page == NULL) {
    4318           0 :                 return -EINVAL;
    4319             :         }
    4320             : 
    4321          42 :         copied_desc = nvme_ctrlr->copied_ana_desc;
    4322             : 
    4323          42 :         orig_desc = (uint8_t *)nvme_ctrlr->ana_log_page + sizeof(struct spdk_nvme_ana_page);
    4324          42 :         copy_len = nvme_ctrlr->max_ana_log_page_size - sizeof(struct spdk_nvme_ana_page);
    4325             : 
    4326          72 :         for (i = 0; i < nvme_ctrlr->ana_log_page->num_ana_group_desc; i++) {
    4327          67 :                 memcpy(copied_desc, orig_desc, copy_len);
    4328             : 
    4329          67 :                 rc = cb_fn(copied_desc, cb_arg);
    4330          67 :                 if (rc != 0) {
    4331          37 :                         break;
    4332             :                 }
    4333             : 
    4334          30 :                 desc_size = sizeof(struct spdk_nvme_ana_group_descriptor) +
    4335          30 :                             copied_desc->num_of_nsid * sizeof(uint32_t);
    4336          30 :                 orig_desc += desc_size;
    4337          30 :                 copy_len -= desc_size;
    4338          30 :         }
    4339             : 
    4340          42 :         return rc;
    4341          42 : }
    4342             : 
    4343             : static int
    4344           5 : nvme_ns_ana_transition_timedout(void *ctx)
    4345             : {
    4346           5 :         struct nvme_ns *nvme_ns = ctx;
    4347             : 
    4348           5 :         spdk_poller_unregister(&nvme_ns->anatt_timer);
    4349           5 :         nvme_ns->ana_transition_timedout = true;
    4350             : 
    4351           5 :         return SPDK_POLLER_BUSY;
    4352             : }
    4353             : 
    4354             : static void
    4355          46 : _nvme_ns_set_ana_state(struct nvme_ns *nvme_ns,
    4356             :                        const struct spdk_nvme_ana_group_descriptor *desc)
    4357             : {
    4358             :         const struct spdk_nvme_ctrlr_data *cdata;
    4359             : 
    4360          46 :         nvme_ns->ana_group_id = desc->ana_group_id;
    4361          46 :         nvme_ns->ana_state = desc->ana_state;
    4362          46 :         nvme_ns->ana_state_updating = false;
    4363             : 
    4364          46 :         switch (nvme_ns->ana_state) {
    4365             :         case SPDK_NVME_ANA_OPTIMIZED_STATE:
    4366             :         case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
    4367          39 :                 nvme_ns->ana_transition_timedout = false;
    4368          39 :                 spdk_poller_unregister(&nvme_ns->anatt_timer);
    4369          39 :                 break;
    4370             : 
    4371             :         case SPDK_NVME_ANA_INACCESSIBLE_STATE:
    4372             :         case SPDK_NVME_ANA_CHANGE_STATE:
    4373           6 :                 if (nvme_ns->anatt_timer != NULL) {
    4374           1 :                         break;
    4375             :                 }
    4376             : 
    4377           5 :                 cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
    4378           5 :                 nvme_ns->anatt_timer = SPDK_POLLER_REGISTER(nvme_ns_ana_transition_timedout,
    4379             :                                        nvme_ns,
    4380             :                                        cdata->anatt * SPDK_SEC_TO_USEC);
    4381           5 :                 break;
    4382             :         default:
    4383           1 :                 break;
    4384             :         }
    4385          46 : }
    4386             : 
    4387             : static int
    4388          60 : nvme_ns_set_ana_state(const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg)
    4389             : {
    4390          60 :         struct nvme_ns *nvme_ns = cb_arg;
    4391             :         uint32_t i;
    4392             : 
    4393          60 :         assert(nvme_ns->ns != NULL);
    4394             : 
    4395          82 :         for (i = 0; i < desc->num_of_nsid; i++) {
    4396          59 :                 if (desc->nsid[i] != spdk_nvme_ns_get_id(nvme_ns->ns)) {
    4397          22 :                         continue;
    4398             :                 }
    4399             : 
    4400          37 :                 _nvme_ns_set_ana_state(nvme_ns, desc);
    4401          37 :                 return 1;
    4402             :         }
    4403             : 
    4404          23 :         return 0;
    4405          60 : }
    4406             : 
    4407             : static int
    4408           5 : nvme_generate_uuid(const char *sn, uint32_t nsid, struct spdk_uuid *uuid)
    4409             : {
    4410           5 :         int rc = 0;
    4411             :         struct spdk_uuid new_uuid, namespace_uuid;
    4412           5 :         char merged_str[SPDK_NVME_CTRLR_SN_LEN + NSID_STR_LEN + 1] = {'\0'};
    4413             :         /* This namespace UUID was generated using uuid_generate() method. */
    4414           5 :         const char *namespace_str = {"edaed2de-24bc-4b07-b559-f47ecbe730fd"};
    4415             :         int size;
    4416             : 
    4417           5 :         assert(strlen(sn) <= SPDK_NVME_CTRLR_SN_LEN);
    4418             : 
    4419           5 :         spdk_uuid_set_null(&new_uuid);
    4420           5 :         spdk_uuid_set_null(&namespace_uuid);
    4421             : 
    4422           5 :         size = snprintf(merged_str, sizeof(merged_str), "%s%"PRIu32, sn, nsid);
    4423           5 :         if (size <= 0 || (unsigned long)size >= sizeof(merged_str)) {
    4424           0 :                 return -EINVAL;
    4425             :         }
    4426             : 
    4427           5 :         spdk_uuid_parse(&namespace_uuid, namespace_str);
    4428             : 
    4429           5 :         rc = spdk_uuid_generate_sha1(&new_uuid, &namespace_uuid, merged_str, size);
    4430           5 :         if (rc == 0) {
    4431           5 :                 memcpy(uuid, &new_uuid, sizeof(struct spdk_uuid));
    4432           5 :         }
    4433             : 
    4434           5 :         return rc;
    4435           5 : }
    4436             : 
    4437             : static int
    4438          39 : nbdev_create(struct spdk_bdev *disk, const char *base_name,
    4439             :              struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns,
    4440             :              struct spdk_bdev_nvme_ctrlr_opts *bdev_opts, void *ctx)
    4441             : {
    4442             :         const struct spdk_uuid          *uuid;
    4443             :         const uint8_t *nguid;
    4444             :         const struct spdk_nvme_ctrlr_data *cdata;
    4445             :         const struct spdk_nvme_ns_data  *nsdata;
    4446             :         const struct spdk_nvme_ctrlr_opts *opts;
    4447             :         enum spdk_nvme_csi              csi;
    4448             :         uint32_t atomic_bs, phys_bs, bs;
    4449          39 :         char sn_tmp[SPDK_NVME_CTRLR_SN_LEN + 1] = {'\0'};
    4450             :         int rc;
    4451             : 
    4452          39 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    4453          39 :         csi = spdk_nvme_ns_get_csi(ns);
    4454          39 :         opts = spdk_nvme_ctrlr_get_opts(ctrlr);
    4455             : 
    4456          39 :         switch (csi) {
    4457             :         case SPDK_NVME_CSI_NVM:
    4458          39 :                 disk->product_name = "NVMe disk";
    4459          39 :                 break;
    4460             :         case SPDK_NVME_CSI_ZNS:
    4461           0 :                 disk->product_name = "NVMe ZNS disk";
    4462           0 :                 disk->zoned = true;
    4463           0 :                 disk->zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
    4464           0 :                 disk->max_zone_append_size = spdk_nvme_zns_ctrlr_get_max_zone_append_size(ctrlr) /
    4465           0 :                                              spdk_nvme_ns_get_extended_sector_size(ns);
    4466           0 :                 disk->max_open_zones = spdk_nvme_zns_ns_get_max_open_zones(ns);
    4467           0 :                 disk->max_active_zones = spdk_nvme_zns_ns_get_max_active_zones(ns);
    4468           0 :                 break;
    4469             :         default:
    4470           0 :                 if (bdev_opts->allow_unrecognized_csi) {
    4471           0 :                         disk->product_name = "NVMe Passthrough disk";
    4472           0 :                         break;
    4473             :                 }
    4474           0 :                 SPDK_ERRLOG("unsupported CSI: %u\n", csi);
    4475           0 :                 return -ENOTSUP;
    4476             :         }
    4477             : 
    4478          39 :         nguid = spdk_nvme_ns_get_nguid(ns);
    4479          39 :         if (!nguid) {
    4480          39 :                 uuid = spdk_nvme_ns_get_uuid(ns);
    4481          39 :                 if (uuid) {
    4482          12 :                         disk->uuid = *uuid;
    4483          39 :                 } else if (g_opts.generate_uuids) {
    4484           0 :                         spdk_strcpy_pad(sn_tmp, cdata->sn, SPDK_NVME_CTRLR_SN_LEN, '\0');
    4485           0 :                         rc = nvme_generate_uuid(sn_tmp, spdk_nvme_ns_get_id(ns), &disk->uuid);
    4486           0 :                         if (rc < 0) {
    4487           0 :                                 SPDK_ERRLOG("UUID generation failed (%s)\n", spdk_strerror(-rc));
    4488           0 :                                 return rc;
    4489             :                         }
    4490           0 :                 }
    4491          39 :         } else {
    4492           0 :                 memcpy(&disk->uuid, nguid, sizeof(disk->uuid));
    4493             :         }
    4494             : 
    4495          39 :         disk->name = spdk_sprintf_alloc("%sn%d", base_name, spdk_nvme_ns_get_id(ns));
    4496          39 :         if (!disk->name) {
    4497           0 :                 return -ENOMEM;
    4498             :         }
    4499             : 
    4500          39 :         disk->write_cache = 0;
    4501          39 :         if (cdata->vwc.present) {
    4502             :                 /* Enable if the Volatile Write Cache exists */
    4503           0 :                 disk->write_cache = 1;
    4504           0 :         }
    4505          39 :         if (cdata->oncs.write_zeroes) {
    4506           0 :                 disk->max_write_zeroes = UINT16_MAX + 1;
    4507           0 :         }
    4508          39 :         disk->blocklen = spdk_nvme_ns_get_extended_sector_size(ns);
    4509          39 :         disk->blockcnt = spdk_nvme_ns_get_num_sectors(ns);
    4510          39 :         disk->max_segment_size = spdk_nvme_ctrlr_get_max_xfer_size(ctrlr);
    4511          39 :         disk->ctratt.raw = cdata->ctratt.raw;
    4512          39 :         disk->nsid = spdk_nvme_ns_get_id(ns);
    4513             :         /* NVMe driver will split one request into multiple requests
    4514             :          * based on MDTS and stripe boundary, the bdev layer will use
    4515             :          * max_segment_size and max_num_segments to split one big IO
    4516             :          * into multiple requests, then small request can't run out
    4517             :          * of NVMe internal requests data structure.
    4518             :          */
    4519          39 :         if (opts && opts->io_queue_requests) {
    4520           0 :                 disk->max_num_segments = opts->io_queue_requests / 2;
    4521           0 :         }
    4522          39 :         if (spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_SGL_SUPPORTED) {
    4523             :                 /* The nvme driver will try to split I/O that have too many
    4524             :                  * SGEs, but it doesn't work if that last SGE doesn't end on
    4525             :                  * an aggregate total that is block aligned. The bdev layer has
    4526             :                  * a more robust splitting framework, so use that instead for
    4527             :                  * this case. (See issue #3269.)
    4528             :                  */
    4529           0 :                 uint16_t max_sges = spdk_nvme_ctrlr_get_max_sges(ctrlr);
    4530             : 
    4531           0 :                 if (disk->max_num_segments == 0) {
    4532           0 :                         disk->max_num_segments = max_sges;
    4533           0 :                 } else {
    4534           0 :                         disk->max_num_segments = spdk_min(disk->max_num_segments, max_sges);
    4535             :                 }
    4536           0 :         }
    4537          39 :         disk->optimal_io_boundary = spdk_nvme_ns_get_optimal_io_boundary(ns);
    4538             : 
    4539          39 :         nsdata = spdk_nvme_ns_get_data(ns);
    4540          39 :         bs = spdk_nvme_ns_get_sector_size(ns);
    4541          39 :         atomic_bs = bs;
    4542          39 :         phys_bs = bs;
    4543          39 :         if (nsdata->nabo == 0) {
    4544          39 :                 if (nsdata->nsfeat.ns_atomic_write_unit && nsdata->nawupf) {
    4545           0 :                         atomic_bs = bs * (1 + nsdata->nawupf);
    4546           0 :                 } else {
    4547          39 :                         atomic_bs = bs * (1 + cdata->awupf);
    4548             :                 }
    4549          39 :         }
    4550          39 :         if (nsdata->nsfeat.optperf) {
    4551           0 :                 phys_bs = bs * (1 + nsdata->npwg);
    4552           0 :         }
    4553          39 :         disk->phys_blocklen = spdk_min(phys_bs, atomic_bs);
    4554             : 
    4555          39 :         disk->md_len = spdk_nvme_ns_get_md_size(ns);
    4556          39 :         if (disk->md_len != 0) {
    4557           0 :                 disk->md_interleave = nsdata->flbas.extended;
    4558           0 :                 disk->dif_type = (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns);
    4559           0 :                 if (disk->dif_type != SPDK_DIF_DISABLE) {
    4560           0 :                         disk->dif_is_head_of_md = nsdata->dps.md_start;
    4561           0 :                         disk->dif_check_flags = bdev_opts->prchk_flags;
    4562           0 :                         disk->dif_pi_format = (enum spdk_dif_pi_format)spdk_nvme_ns_get_pi_format(ns);
    4563           0 :                 }
    4564           0 :         }
    4565             : 
    4566          39 :         if (!(spdk_nvme_ctrlr_get_flags(ctrlr) &
    4567             :               SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED)) {
    4568          39 :                 disk->acwu = 0;
    4569          39 :         } else if (nsdata->nsfeat.ns_atomic_write_unit) {
    4570           0 :                 disk->acwu = nsdata->nacwu + 1; /* 0-based */
    4571           0 :         } else {
    4572           0 :                 disk->acwu = cdata->acwu + 1; /* 0-based */
    4573             :         }
    4574             : 
    4575          39 :         if (cdata->oncs.copy) {
    4576             :                 /* For now bdev interface allows only single segment copy */
    4577           0 :                 disk->max_copy = nsdata->mssrl;
    4578           0 :         }
    4579             : 
    4580          39 :         disk->ctxt = ctx;
    4581          39 :         disk->fn_table = &nvmelib_fn_table;
    4582          39 :         disk->module = &nvme_if;
    4583             : 
    4584          39 :         disk->numa.id_valid = 1;
    4585          39 :         disk->numa.id = spdk_nvme_ctrlr_get_numa_id(ctrlr);
    4586             : 
    4587          39 :         return 0;
    4588          39 : }
    4589             : 
    4590             : static struct nvme_bdev *
    4591          39 : nvme_bdev_alloc(void)
    4592             : {
    4593             :         struct nvme_bdev *nbdev;
    4594             :         int rc;
    4595             : 
    4596          39 :         nbdev = calloc(1, sizeof(*nbdev));
    4597          39 :         if (!nbdev) {
    4598           0 :                 SPDK_ERRLOG("nbdev calloc() failed\n");
    4599           0 :                 return NULL;
    4600             :         }
    4601             : 
    4602          39 :         if (g_opts.nvme_error_stat) {
    4603           0 :                 nbdev->err_stat = calloc(1, sizeof(struct nvme_error_stat));
    4604           0 :                 if (!nbdev->err_stat) {
    4605           0 :                         SPDK_ERRLOG("err_stat calloc() failed\n");
    4606           0 :                         free(nbdev);
    4607           0 :                         return NULL;
    4608             :                 }
    4609           0 :         }
    4610             : 
    4611          39 :         rc = pthread_mutex_init(&nbdev->mutex, NULL);
    4612          39 :         if (rc != 0) {
    4613           0 :                 free(nbdev->err_stat);
    4614           0 :                 free(nbdev);
    4615           0 :                 return NULL;
    4616             :         }
    4617             : 
    4618          39 :         nbdev->ref = 1;
    4619          39 :         nbdev->mp_policy = BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE;
    4620          39 :         nbdev->mp_selector = BDEV_NVME_MP_SELECTOR_ROUND_ROBIN;
    4621          39 :         nbdev->rr_min_io = UINT32_MAX;
    4622          39 :         TAILQ_INIT(&nbdev->nvme_ns_list);
    4623             : 
    4624          39 :         return nbdev;
    4625          39 : }
    4626             : 
    4627             : static int
    4628          39 : nvme_bdev_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
    4629             : {
    4630             :         struct nvme_bdev *nbdev;
    4631          39 :         struct nvme_bdev_ctrlr *nbdev_ctrlr = nvme_ctrlr->nbdev_ctrlr;
    4632             :         int rc;
    4633             : 
    4634          39 :         nbdev = nvme_bdev_alloc();
    4635          39 :         if (nbdev == NULL) {
    4636           0 :                 SPDK_ERRLOG("Failed to allocate NVMe bdev\n");
    4637           0 :                 return -ENOMEM;
    4638             :         }
    4639             : 
    4640          39 :         nbdev->opal = nvme_ctrlr->opal_dev != NULL;
    4641             : 
    4642          78 :         rc = nbdev_create(&nbdev->disk, nbdev_ctrlr->name, nvme_ctrlr->ctrlr,
    4643          39 :                           nvme_ns->ns, &nvme_ctrlr->opts, nbdev);
    4644          39 :         if (rc != 0) {
    4645           0 :                 SPDK_ERRLOG("Failed to create NVMe disk\n");
    4646           0 :                 nvme_bdev_free(nbdev);
    4647           0 :                 return rc;
    4648             :         }
    4649             : 
    4650          78 :         spdk_io_device_register(nbdev,
    4651             :                                 bdev_nvme_create_bdev_channel_cb,
    4652             :                                 bdev_nvme_destroy_bdev_channel_cb,
    4653             :                                 sizeof(struct nvme_bdev_channel),
    4654          39 :                                 nbdev->disk.name);
    4655             : 
    4656          39 :         nvme_ns->bdev = nbdev;
    4657          39 :         nbdev->nsid = nvme_ns->id;
    4658          39 :         TAILQ_INSERT_TAIL(&nbdev->nvme_ns_list, nvme_ns, tailq);
    4659             : 
    4660          39 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    4661             : 
    4662          39 :         nbdev->nbdev_ctrlr = nbdev_ctrlr;
    4663          39 :         TAILQ_INSERT_TAIL(&nbdev_ctrlr->bdevs, nbdev, tailq);
    4664             : 
    4665          39 :         rc = spdk_bdev_register(&nbdev->disk);
    4666          39 :         if (rc != 0) {
    4667           1 :                 SPDK_ERRLOG("spdk_bdev_register() failed\n");
    4668           1 :                 spdk_io_device_unregister(nbdev, NULL);
    4669           1 :                 nvme_ns->bdev = NULL;
    4670             : 
    4671           1 :                 TAILQ_REMOVE(&nbdev_ctrlr->bdevs, nbdev, tailq);
    4672             : 
    4673           1 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    4674             : 
    4675           1 :                 nvme_bdev_free(nbdev);
    4676           1 :                 return rc;
    4677             :         }
    4678             : 
    4679          38 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    4680             : 
    4681          38 :         return 0;
    4682          39 : }
    4683             : 
    4684             : static bool
    4685          23 : bdev_nvme_compare_ns(struct spdk_nvme_ns *ns1, struct spdk_nvme_ns *ns2)
    4686             : {
    4687             :         const struct spdk_nvme_ns_data *nsdata1, *nsdata2;
    4688             :         const struct spdk_uuid *uuid1, *uuid2;
    4689             : 
    4690          23 :         nsdata1 = spdk_nvme_ns_get_data(ns1);
    4691          23 :         nsdata2 = spdk_nvme_ns_get_data(ns2);
    4692          23 :         uuid1 = spdk_nvme_ns_get_uuid(ns1);
    4693          23 :         uuid2 = spdk_nvme_ns_get_uuid(ns2);
    4694             : 
    4695          71 :         return memcmp(nsdata1->nguid, nsdata2->nguid, sizeof(nsdata1->nguid)) == 0 &&
    4696          22 :                nsdata1->eui64 == nsdata2->eui64 &&
    4697          21 :                ((uuid1 == NULL && uuid2 == NULL) ||
    4698          29 :                 (uuid1 != NULL && uuid2 != NULL && spdk_uuid_compare(uuid1, uuid2) == 0)) &&
    4699          18 :                spdk_nvme_ns_get_csi(ns1) == spdk_nvme_ns_get_csi(ns2);
    4700             : }
    4701             : 
    4702             : static bool
    4703           0 : hotplug_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    4704             :                  struct spdk_nvme_ctrlr_opts *opts)
    4705             : {
    4706             :         struct nvme_probe_skip_entry *entry;
    4707             : 
    4708           0 :         TAILQ_FOREACH(entry, &g_skipped_nvme_ctrlrs, tailq) {
    4709           0 :                 if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
    4710           0 :                         return false;
    4711             :                 }
    4712           0 :         }
    4713             : 
    4714           0 :         opts->arbitration_burst = (uint8_t)g_opts.arbitration_burst;
    4715           0 :         opts->low_priority_weight = (uint8_t)g_opts.low_priority_weight;
    4716           0 :         opts->medium_priority_weight = (uint8_t)g_opts.medium_priority_weight;
    4717           0 :         opts->high_priority_weight = (uint8_t)g_opts.high_priority_weight;
    4718           0 :         opts->disable_read_ana_log_page = true;
    4719             : 
    4720           0 :         SPDK_DEBUGLOG(bdev_nvme, "Attaching to %s\n", trid->traddr);
    4721             : 
    4722           0 :         return true;
    4723           0 : }
    4724             : 
    4725             : static void
    4726           0 : nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl)
    4727             : {
    4728           0 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    4729             : 
    4730           0 :         if (spdk_nvme_cpl_is_error(cpl)) {
    4731           0 :                 NVME_CTRLR_WARNLOG(nvme_ctrlr, "Abort failed. Resetting controller. sc is %u, sct is %u.\n",
    4732             :                                    cpl->status.sc, cpl->status.sct);
    4733           0 :                 bdev_nvme_reset_ctrlr(nvme_ctrlr);
    4734           0 :         } else if (cpl->cdw0 & 0x1) {
    4735           0 :                 NVME_CTRLR_WARNLOG(nvme_ctrlr, "Specified command could not be aborted.\n");
    4736           0 :                 bdev_nvme_reset_ctrlr(nvme_ctrlr);
    4737           0 :         }
    4738           0 : }
    4739             : 
    4740             : static void
    4741           0 : timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
    4742             :            struct spdk_nvme_qpair *qpair, uint16_t cid)
    4743             : {
    4744           0 :         struct nvme_ctrlr *nvme_ctrlr = cb_arg;
    4745             :         union spdk_nvme_csts_register csts;
    4746             :         int rc;
    4747             : 
    4748           0 :         assert(nvme_ctrlr->ctrlr == ctrlr);
    4749             : 
    4750           0 :         NVME_CTRLR_WARNLOG(nvme_ctrlr, "Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n",
    4751             :                            ctrlr, qpair, cid);
    4752             : 
    4753             :         /* Only try to read CSTS if it's a PCIe controller or we have a timeout on an I/O
    4754             :          * queue.  (Note: qpair == NULL when there's an admin cmd timeout.)  Otherwise we
    4755             :          * would submit another fabrics cmd on the admin queue to read CSTS and check for its
    4756             :          * completion recursively.
    4757             :          */
    4758           0 :         if (nvme_ctrlr->active_path_id->trid.trtype == SPDK_NVME_TRANSPORT_PCIE || qpair != NULL) {
    4759           0 :                 csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr);
    4760           0 :                 if (csts.bits.cfs) {
    4761           0 :                         NVME_CTRLR_ERRLOG(nvme_ctrlr, "Controller Fatal Status, reset required\n");
    4762           0 :                         bdev_nvme_reset_ctrlr(nvme_ctrlr);
    4763           0 :                         return;
    4764             :                 }
    4765           0 :         }
    4766             : 
    4767           0 :         switch (g_opts.action_on_timeout) {
    4768             :         case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT:
    4769           0 :                 if (qpair) {
    4770             :                         /* Don't send abort to ctrlr when ctrlr is not available. */
    4771           0 :                         pthread_mutex_lock(&nvme_ctrlr->mutex);
    4772           0 :                         if (!nvme_ctrlr_is_available(nvme_ctrlr)) {
    4773           0 :                                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4774           0 :                                 NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Quit abort. Ctrlr is not available.\n");
    4775           0 :                                 return;
    4776             :                         }
    4777           0 :                         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4778             : 
    4779           0 :                         rc = spdk_nvme_ctrlr_cmd_abort(ctrlr, qpair, cid,
    4780           0 :                                                        nvme_abort_cpl, nvme_ctrlr);
    4781           0 :                         if (rc == 0) {
    4782           0 :                                 return;
    4783             :                         }
    4784             : 
    4785           0 :                         NVME_CTRLR_ERRLOG(nvme_ctrlr, "Unable to send abort. Resetting, rc is %d.\n", rc);
    4786           0 :                 }
    4787             : 
    4788             :         /* FALLTHROUGH */
    4789             :         case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET:
    4790           0 :                 bdev_nvme_reset_ctrlr(nvme_ctrlr);
    4791           0 :                 break;
    4792             :         case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE:
    4793           0 :                 NVME_CTRLR_DEBUGLOG(nvme_ctrlr, "No action for nvme controller timeout.\n");
    4794           0 :                 break;
    4795             :         default:
    4796           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "An invalid timeout action value is found.\n");
    4797           0 :                 break;
    4798             :         }
    4799           0 : }
    4800             : 
    4801             : static struct nvme_ns *
    4802          52 : nvme_ns_alloc(void)
    4803             : {
    4804             :         struct nvme_ns *nvme_ns;
    4805             : 
    4806          52 :         nvme_ns = calloc(1, sizeof(struct nvme_ns));
    4807          52 :         if (nvme_ns == NULL) {
    4808           0 :                 return NULL;
    4809             :         }
    4810             : 
    4811          52 :         if (g_opts.io_path_stat) {
    4812           0 :                 nvme_ns->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
    4813           0 :                 if (nvme_ns->stat == NULL) {
    4814           0 :                         free(nvme_ns);
    4815           0 :                         return NULL;
    4816             :                 }
    4817           0 :                 spdk_bdev_reset_io_stat(nvme_ns->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
    4818           0 :         }
    4819             : 
    4820          52 :         return nvme_ns;
    4821          52 : }
    4822             : 
    4823             : static void
    4824          52 : nvme_ns_free(struct nvme_ns *nvme_ns)
    4825             : {
    4826          52 :         free(nvme_ns->stat);
    4827          52 :         free(nvme_ns);
    4828          52 : }
    4829             : 
    4830             : static void
    4831          52 : nvme_ctrlr_populate_namespace_done(struct nvme_ns *nvme_ns, int rc)
    4832             : {
    4833          52 :         struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr;
    4834          52 :         struct nvme_async_probe_ctx *ctx = nvme_ns->probe_ctx;
    4835             : 
    4836          52 :         if (rc == 0) {
    4837          50 :                 nvme_ns->probe_ctx = NULL;
    4838          50 :                 nvme_ctrlr_get_ref(nvme_ctrlr);
    4839          50 :         } else {
    4840           2 :                 pthread_mutex_lock(&nvme_ctrlr->mutex);
    4841           2 :                 RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
    4842           2 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4843             : 
    4844           2 :                 nvme_ns_free(nvme_ns);
    4845             :         }
    4846             : 
    4847          52 :         if (ctx) {
    4848          51 :                 ctx->populates_in_progress--;
    4849          51 :                 if (ctx->populates_in_progress == 0) {
    4850          12 :                         nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx);
    4851          12 :                 }
    4852          51 :         }
    4853          52 : }
    4854             : 
    4855             : static void
    4856           2 : bdev_nvme_add_io_path(struct nvme_bdev_channel_iter *i,
    4857             :                       struct nvme_bdev *nbdev,
    4858             :                       struct nvme_bdev_channel *nbdev_ch, void *ctx)
    4859             : {
    4860           2 :         struct nvme_ns *nvme_ns = ctx;
    4861             :         int rc;
    4862             : 
    4863           2 :         rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns);
    4864           2 :         if (rc != 0) {
    4865           0 :                 SPDK_ERRLOG("Failed to add I/O path to bdev_channel dynamically.\n");
    4866           0 :         }
    4867             : 
    4868           2 :         nvme_bdev_for_each_channel_continue(i, rc);
    4869           2 : }
    4870             : 
    4871             : static void
    4872           2 : bdev_nvme_delete_io_path(struct nvme_bdev_channel_iter *i,
    4873             :                          struct nvme_bdev *nbdev,
    4874             :                          struct nvme_bdev_channel *nbdev_ch, void *ctx)
    4875             : {
    4876           2 :         struct nvme_ns *nvme_ns = ctx;
    4877             :         struct nvme_io_path *io_path;
    4878             : 
    4879           2 :         io_path = _bdev_nvme_get_io_path(nbdev_ch, nvme_ns);
    4880           2 :         if (io_path != NULL) {
    4881           2 :                 _bdev_nvme_delete_io_path(nbdev_ch, io_path);
    4882           2 :         }
    4883             : 
    4884           2 :         nvme_bdev_for_each_channel_continue(i, 0);
    4885           2 : }
    4886             : 
    4887             : static void
    4888           0 : bdev_nvme_add_io_path_failed(struct nvme_bdev *nbdev, void *ctx, int status)
    4889             : {
    4890           0 :         struct nvme_ns *nvme_ns = ctx;
    4891             : 
    4892           0 :         nvme_ctrlr_populate_namespace_done(nvme_ns, -1);
    4893           0 : }
    4894             : 
    4895             : static void
    4896          12 : bdev_nvme_add_io_path_done(struct nvme_bdev *nbdev, void *ctx, int status)
    4897             : {
    4898          12 :         struct nvme_ns *nvme_ns = ctx;
    4899             : 
    4900          12 :         if (status == 0) {
    4901          12 :                 nvme_ctrlr_populate_namespace_done(nvme_ns, 0);
    4902          12 :         } else {
    4903             :                 /* Delete the added io_paths and fail populating the namespace. */
    4904           0 :                 nvme_bdev_for_each_channel(nbdev,
    4905             :                                            bdev_nvme_delete_io_path,
    4906           0 :                                            nvme_ns,
    4907             :                                            bdev_nvme_add_io_path_failed);
    4908             :         }
    4909          12 : }
    4910             : 
    4911             : static int
    4912          13 : nvme_bdev_add_ns(struct nvme_bdev *nbdev, struct nvme_ns *nvme_ns)
    4913             : {
    4914             :         struct nvme_ns *tmp_ns;
    4915             :         const struct spdk_nvme_ns_data *nsdata;
    4916             : 
    4917          13 :         nsdata = spdk_nvme_ns_get_data(nvme_ns->ns);
    4918          13 :         if (!nsdata->nmic.can_share) {
    4919           0 :                 SPDK_ERRLOG("Namespace cannot be shared.\n");
    4920           0 :                 return -EINVAL;
    4921             :         }
    4922             : 
    4923          13 :         pthread_mutex_lock(&nbdev->mutex);
    4924             : 
    4925          13 :         tmp_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
    4926          13 :         assert(tmp_ns != NULL);
    4927             : 
    4928          13 :         if (tmp_ns->ns != NULL && !bdev_nvme_compare_ns(nvme_ns->ns, tmp_ns->ns)) {
    4929           1 :                 pthread_mutex_unlock(&nbdev->mutex);
    4930           1 :                 SPDK_ERRLOG("Namespaces are not identical.\n");
    4931           1 :                 return -EINVAL;
    4932             :         }
    4933             : 
    4934          12 :         nbdev->ref++;
    4935          12 :         TAILQ_INSERT_TAIL(&nbdev->nvme_ns_list, nvme_ns, tailq);
    4936          12 :         nvme_ns->bdev = nbdev;
    4937             : 
    4938          12 :         pthread_mutex_unlock(&nbdev->mutex);
    4939             : 
    4940             :         /* Add nvme_io_path to nvme_bdev_channels dynamically. */
    4941          24 :         nvme_bdev_for_each_channel(nbdev,
    4942             :                                    bdev_nvme_add_io_path,
    4943          12 :                                    nvme_ns,
    4944             :                                    bdev_nvme_add_io_path_done);
    4945             : 
    4946          12 :         return 0;
    4947          13 : }
    4948             : 
    4949             : static void
    4950          52 : nvme_ctrlr_populate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
    4951             : {
    4952             :         struct spdk_nvme_ns     *ns;
    4953             :         struct nvme_bdev        *bdev;
    4954          52 :         int                     rc = 0;
    4955             : 
    4956          52 :         ns = spdk_nvme_ctrlr_get_ns(nvme_ctrlr->ctrlr, nvme_ns->id);
    4957          52 :         if (!ns) {
    4958           0 :                 NVME_CTRLR_DEBUGLOG(nvme_ctrlr, "Invalid NS %d\n", nvme_ns->id);
    4959           0 :                 rc = -EINVAL;
    4960           0 :                 goto done;
    4961             :         }
    4962             : 
    4963          52 :         nvme_ns->ns = ns;
    4964          52 :         nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
    4965             : 
    4966          52 :         if (nvme_ctrlr->ana_log_page != NULL) {
    4967          38 :                 bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ns_set_ana_state, nvme_ns);
    4968          38 :         }
    4969             : 
    4970          52 :         bdev = nvme_bdev_ctrlr_get_bdev(nvme_ctrlr->nbdev_ctrlr, nvme_ns->id);
    4971          92 :         if (bdev == NULL) {
    4972          39 :                 rc = nvme_bdev_create(nvme_ctrlr, nvme_ns);
    4973          39 :         } else {
    4974          13 :                 rc = nvme_bdev_add_ns(bdev, nvme_ns);
    4975          13 :                 if (rc == 0) {
    4976          12 :                         return;
    4977             :                 }
    4978             :         }
    4979             : done:
    4980          40 :         nvme_ctrlr_populate_namespace_done(nvme_ns, rc);
    4981          52 : }
    4982             : 
    4983             : static void
    4984          50 : nvme_ctrlr_depopulate_namespace_done(struct nvme_ns *nvme_ns)
    4985             : {
    4986          50 :         struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr;
    4987             : 
    4988          50 :         assert(nvme_ctrlr != NULL);
    4989             : 
    4990          50 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    4991             : 
    4992          50 :         RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
    4993             : 
    4994          50 :         if (nvme_ns->bdev != NULL) {
    4995           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    4996           0 :                 return;
    4997             :         }
    4998             : 
    4999          50 :         nvme_ns_free(nvme_ns);
    5000          50 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    5001             : 
    5002          50 :         nvme_ctrlr_put_ref(nvme_ctrlr);
    5003          50 : }
    5004             : 
    5005             : static void
    5006          11 : bdev_nvme_delete_io_path_done(struct nvme_bdev *nbdev, void *ctx, int status)
    5007             : {
    5008          11 :         struct nvme_ns *nvme_ns = ctx;
    5009             : 
    5010          11 :         nvme_ctrlr_depopulate_namespace_done(nvme_ns);
    5011          11 : }
    5012             : 
    5013             : static void
    5014          50 : nvme_ctrlr_depopulate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
    5015             : {
    5016             :         struct nvme_bdev *nbdev;
    5017             : 
    5018          50 :         if (nvme_ns->depopulating) {
    5019             :                 /* Maybe we received 2 AENs in a row */
    5020           0 :                 return;
    5021             :         }
    5022          50 :         nvme_ns->depopulating = true;
    5023             : 
    5024          50 :         spdk_poller_unregister(&nvme_ns->anatt_timer);
    5025             : 
    5026          50 :         nbdev = nvme_ns->bdev;
    5027          50 :         if (nbdev != NULL) {
    5028          46 :                 pthread_mutex_lock(&nbdev->mutex);
    5029             : 
    5030          46 :                 assert(nbdev->ref > 0);
    5031          46 :                 nbdev->ref--;
    5032          46 :                 if (nbdev->ref == 0) {
    5033          35 :                         pthread_mutex_unlock(&nbdev->mutex);
    5034             : 
    5035          35 :                         spdk_bdev_unregister(&nbdev->disk, NULL, NULL);
    5036          35 :                 } else {
    5037             :                         /* spdk_bdev_unregister() is not called until the last nvme_ns is
    5038             :                          * depopulated. Hence we need to remove nvme_ns from bdev->nvme_ns_list
    5039             :                          * and clear nvme_ns->bdev here.
    5040             :                          */
    5041          11 :                         TAILQ_REMOVE(&nbdev->nvme_ns_list, nvme_ns, tailq);
    5042             : 
    5043          11 :                         pthread_mutex_lock(&nvme_ns->ctrlr->mutex);
    5044          11 :                         nvme_ns->bdev = NULL;
    5045          11 :                         pthread_mutex_unlock(&nvme_ns->ctrlr->mutex);
    5046             : 
    5047          11 :                         pthread_mutex_unlock(&nbdev->mutex);
    5048             : 
    5049             :                         /* Delete nvme_io_paths from nvme_bdev_channels dynamically. After that,
    5050             :                          * we call depopulate_namespace_done() to avoid use-after-free.
    5051             :                          */
    5052          22 :                         nvme_bdev_for_each_channel(nbdev,
    5053             :                                                    bdev_nvme_delete_io_path,
    5054          11 :                                                    nvme_ns,
    5055             :                                                    bdev_nvme_delete_io_path_done);
    5056          11 :                         return;
    5057             :                 }
    5058          35 :         }
    5059             : 
    5060          39 :         nvme_ctrlr_depopulate_namespace_done(nvme_ns);
    5061          50 : }
    5062             : 
    5063             : static void
    5064          63 : nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr,
    5065             :                                struct nvme_async_probe_ctx *ctx)
    5066             : {
    5067          63 :         struct spdk_nvme_ctrlr  *ctrlr = nvme_ctrlr->ctrlr;
    5068             :         struct nvme_ns  *nvme_ns, *next;
    5069             :         struct spdk_nvme_ns     *ns;
    5070             :         struct nvme_bdev        *nbdev;
    5071             :         uint32_t                nsid;
    5072             :         int                     rc;
    5073             :         uint64_t                num_sectors;
    5074             : 
    5075          63 :         if (ctx) {
    5076             :                 /* Initialize this count to 1 to handle the populate functions
    5077             :                  * calling nvme_ctrlr_populate_namespace_done() immediately.
    5078             :                  */
    5079          47 :                 ctx->populates_in_progress = 1;
    5080          47 :         }
    5081             : 
    5082             :         /* First loop over our existing namespaces and see if they have been
    5083             :          * removed. */
    5084          63 :         nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    5085          67 :         while (nvme_ns != NULL) {
    5086           4 :                 next = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
    5087             : 
    5088           4 :                 if (spdk_nvme_ctrlr_is_active_ns(ctrlr, nvme_ns->id)) {
    5089             :                         /* NS is still there or added again. Its attributes may have changed. */
    5090           3 :                         ns = spdk_nvme_ctrlr_get_ns(ctrlr, nvme_ns->id);
    5091           3 :                         if (nvme_ns->ns != ns) {
    5092           1 :                                 assert(nvme_ns->ns == NULL);
    5093           1 :                                 nvme_ns->ns = ns;
    5094           1 :                                 NVME_CTRLR_DEBUGLOG(nvme_ctrlr, "NSID %u was added\n", nvme_ns->id);
    5095           1 :                         }
    5096             : 
    5097           3 :                         num_sectors = spdk_nvme_ns_get_num_sectors(ns);
    5098           3 :                         nbdev = nvme_ns->bdev;
    5099           3 :                         assert(nbdev != NULL);
    5100           3 :                         if (nbdev->disk.blockcnt != num_sectors) {
    5101           1 :                                 NVME_CTRLR_NOTICELOG(nvme_ctrlr,
    5102             :                                                      "NSID %u is resized: bdev name %s, old size %" PRIu64 ", new size %" PRIu64 "\n",
    5103             :                                                      nvme_ns->id,
    5104             :                                                      nbdev->disk.name,
    5105             :                                                      nbdev->disk.blockcnt,
    5106             :                                                      num_sectors);
    5107           1 :                                 rc = spdk_bdev_notify_blockcnt_change(&nbdev->disk, num_sectors);
    5108           1 :                                 if (rc != 0) {
    5109           0 :                                         NVME_CTRLR_ERRLOG(nvme_ctrlr,
    5110             :                                                           "Could not change num blocks for nvme bdev: name %s, errno: %d.\n",
    5111             :                                                           nbdev->disk.name, rc);
    5112           0 :                                 }
    5113           1 :                         }
    5114           3 :                 } else {
    5115             :                         /* Namespace was removed */
    5116           1 :                         nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns);
    5117             :                 }
    5118             : 
    5119           4 :                 nvme_ns = next;
    5120             :         }
    5121             : 
    5122             :         /* Loop through all of the namespaces at the nvme level and see if any of them are new */
    5123          63 :         nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
    5124         118 :         while (nsid != 0) {
    5125          55 :                 nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid);
    5126             : 
    5127          55 :                 if (nvme_ns == NULL) {
    5128             :                         /* Found a new one */
    5129          52 :                         nvme_ns = nvme_ns_alloc();
    5130          52 :                         if (nvme_ns == NULL) {
    5131           0 :                                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to allocate namespace\n");
    5132             :                                 /* This just fails to attach the namespace. It may work on a future attempt. */
    5133           0 :                                 continue;
    5134             :                         }
    5135             : 
    5136          52 :                         nvme_ns->id = nsid;
    5137          52 :                         nvme_ns->ctrlr = nvme_ctrlr;
    5138             : 
    5139          52 :                         nvme_ns->bdev = NULL;
    5140             : 
    5141          52 :                         if (ctx) {
    5142          51 :                                 ctx->populates_in_progress++;
    5143          51 :                         }
    5144          52 :                         nvme_ns->probe_ctx = ctx;
    5145             : 
    5146          52 :                         pthread_mutex_lock(&nvme_ctrlr->mutex);
    5147          52 :                         RB_INSERT(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
    5148          52 :                         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    5149             : 
    5150          52 :                         nvme_ctrlr_populate_namespace(nvme_ctrlr, nvme_ns);
    5151          52 :                 }
    5152             : 
    5153          55 :                 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid);
    5154             :         }
    5155             : 
    5156          63 :         if (ctx) {
    5157             :                 /* Decrement this count now that the loop is over to account
    5158             :                  * for the one we started with.  If the count is then 0, we
    5159             :                  * know any populate_namespace functions completed immediately,
    5160             :                  * so we'll kick the callback here.
    5161             :                  */
    5162          47 :                 ctx->populates_in_progress--;
    5163          47 :                 if (ctx->populates_in_progress == 0) {
    5164          35 :                         nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx);
    5165          35 :                 }
    5166          47 :         }
    5167             : 
    5168          63 : }
    5169             : 
    5170             : static void
    5171          62 : nvme_ctrlr_depopulate_namespaces(struct nvme_ctrlr *nvme_ctrlr)
    5172             : {
    5173             :         struct nvme_ns *nvme_ns, *tmp;
    5174             : 
    5175         111 :         RB_FOREACH_SAFE(nvme_ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp) {
    5176          49 :                 nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns);
    5177          49 :         }
    5178          62 : }
    5179             : 
    5180             : static uint32_t
    5181          37 : nvme_ctrlr_get_ana_log_page_size(struct nvme_ctrlr *nvme_ctrlr)
    5182             : {
    5183          37 :         struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
    5184             :         const struct spdk_nvme_ctrlr_data *cdata;
    5185          37 :         uint32_t nsid, ns_count = 0;
    5186             : 
    5187          37 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    5188             : 
    5189          82 :         for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
    5190          82 :              nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) {
    5191          45 :                 ns_count++;
    5192          45 :         }
    5193             : 
    5194          74 :         return sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid *
    5195          37 :                sizeof(struct spdk_nvme_ana_group_descriptor) + ns_count *
    5196             :                sizeof(uint32_t);
    5197             : }
    5198             : 
    5199             : static int
    5200           7 : nvme_ctrlr_set_ana_states(const struct spdk_nvme_ana_group_descriptor *desc,
    5201             :                           void *cb_arg)
    5202             : {
    5203           7 :         struct nvme_ctrlr *nvme_ctrlr = cb_arg;
    5204             :         struct nvme_ns *nvme_ns;
    5205             :         uint32_t i, nsid;
    5206             : 
    5207          13 :         for (i = 0; i < desc->num_of_nsid; i++) {
    5208           6 :                 nsid = desc->nsid[i];
    5209           6 :                 if (nsid == 0) {
    5210           0 :                         continue;
    5211             :                 }
    5212             : 
    5213           6 :                 nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid);
    5214             : 
    5215           6 :                 if (nvme_ns == NULL) {
    5216             :                         /* Target told us that an inactive namespace had an ANA change */
    5217           1 :                         continue;
    5218             :                 }
    5219             : 
    5220           5 :                 _nvme_ns_set_ana_state(nvme_ns, desc);
    5221           5 :         }
    5222             : 
    5223           7 :         return 0;
    5224             : }
    5225             : 
    5226             : static void
    5227           0 : bdev_nvme_disable_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
    5228             : {
    5229             :         struct nvme_ns *nvme_ns;
    5230             : 
    5231           0 :         spdk_free(nvme_ctrlr->ana_log_page);
    5232           0 :         nvme_ctrlr->ana_log_page = NULL;
    5233             : 
    5234           0 :         for (nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    5235           0 :              nvme_ns != NULL;
    5236           0 :              nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns)) {
    5237           0 :                 nvme_ns->ana_state_updating = false;
    5238           0 :                 nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
    5239           0 :         }
    5240           0 : }
    5241             : 
    5242             : static void
    5243           3 : nvme_ctrlr_read_ana_log_page_done(void *ctx, const struct spdk_nvme_cpl *cpl)
    5244             : {
    5245           3 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    5246             : 
    5247           3 :         if (cpl != NULL && spdk_nvme_cpl_is_success(cpl)) {
    5248           6 :                 bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ctrlr_set_ana_states,
    5249           3 :                                              nvme_ctrlr);
    5250           3 :         } else {
    5251           0 :                 bdev_nvme_disable_read_ana_log_page(nvme_ctrlr);
    5252             :         }
    5253             : 
    5254           3 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    5255             : 
    5256           3 :         assert(nvme_ctrlr->ana_log_page_updating == true);
    5257           3 :         nvme_ctrlr->ana_log_page_updating = false;
    5258             : 
    5259           3 :         if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
    5260           0 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    5261             : 
    5262           0 :                 nvme_ctrlr_unregister(nvme_ctrlr);
    5263           0 :         } else {
    5264           3 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    5265             : 
    5266           3 :                 bdev_nvme_clear_io_path_caches(nvme_ctrlr);
    5267             :         }
    5268           3 : }
    5269             : 
    5270             : static int
    5271           6 : nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
    5272             : {
    5273             :         uint32_t ana_log_page_size;
    5274             :         int rc;
    5275             : 
    5276           6 :         if (nvme_ctrlr->ana_log_page == NULL) {
    5277           0 :                 return -EINVAL;
    5278             :         }
    5279             : 
    5280           6 :         ana_log_page_size = nvme_ctrlr_get_ana_log_page_size(nvme_ctrlr);
    5281             : 
    5282           6 :         if (ana_log_page_size > nvme_ctrlr->max_ana_log_page_size) {
    5283           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr,
    5284             :                                   "ANA log page size %" PRIu32 " is larger than allowed %" PRIu32 "\n",
    5285             :                                   ana_log_page_size, nvme_ctrlr->max_ana_log_page_size);
    5286           0 :                 return -EINVAL;
    5287             :         }
    5288             : 
    5289           6 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    5290           6 :         if (!nvme_ctrlr_is_available(nvme_ctrlr) ||
    5291           5 :             nvme_ctrlr->ana_log_page_updating) {
    5292           3 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    5293           3 :                 return -EBUSY;
    5294             :         }
    5295             : 
    5296           3 :         nvme_ctrlr->ana_log_page_updating = true;
    5297           3 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    5298             : 
    5299           6 :         rc = spdk_nvme_ctrlr_cmd_get_log_page(nvme_ctrlr->ctrlr,
    5300             :                                               SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
    5301             :                                               SPDK_NVME_GLOBAL_NS_TAG,
    5302           3 :                                               nvme_ctrlr->ana_log_page,
    5303           3 :                                               ana_log_page_size, 0,
    5304             :                                               nvme_ctrlr_read_ana_log_page_done,
    5305           3 :                                               nvme_ctrlr);
    5306           3 :         if (rc != 0) {
    5307           0 :                 nvme_ctrlr_read_ana_log_page_done(nvme_ctrlr, NULL);
    5308           0 :         }
    5309             : 
    5310           3 :         return rc;
    5311           6 : }
    5312             : 
    5313             : static void
    5314           0 : dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
    5315             : {
    5316           0 : }
    5317             : 
    5318             : struct bdev_nvme_set_preferred_path_ctx {
    5319             :         struct spdk_bdev_desc *desc;
    5320             :         struct nvme_ns *nvme_ns;
    5321             :         bdev_nvme_set_preferred_path_cb cb_fn;
    5322             :         void *cb_arg;
    5323             : };
    5324             : 
    5325             : static void
    5326           3 : bdev_nvme_set_preferred_path_done(struct nvme_bdev *nbdev, void *_ctx, int status)
    5327             : {
    5328           3 :         struct bdev_nvme_set_preferred_path_ctx *ctx = _ctx;
    5329             : 
    5330           3 :         assert(ctx != NULL);
    5331           3 :         assert(ctx->desc != NULL);
    5332           3 :         assert(ctx->cb_fn != NULL);
    5333             : 
    5334           3 :         spdk_bdev_close(ctx->desc);
    5335             : 
    5336           3 :         ctx->cb_fn(ctx->cb_arg, status);
    5337             : 
    5338           3 :         free(ctx);
    5339           3 : }
    5340             : 
    5341             : static void
    5342           2 : _bdev_nvme_set_preferred_path(struct nvme_bdev_channel_iter *i,
    5343             :                               struct nvme_bdev *nbdev,
    5344             :                               struct nvme_bdev_channel *nbdev_ch, void *_ctx)
    5345             : {
    5346           2 :         struct bdev_nvme_set_preferred_path_ctx *ctx = _ctx;
    5347             :         struct nvme_io_path *io_path, *prev;
    5348             : 
    5349           2 :         prev = NULL;
    5350           3 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    5351           3 :                 if (io_path->nvme_ns == ctx->nvme_ns) {
    5352           2 :                         break;
    5353             :                 }
    5354           1 :                 prev = io_path;
    5355           1 :         }
    5356             : 
    5357           2 :         if (io_path != NULL) {
    5358           2 :                 if (prev != NULL) {
    5359           1 :                         STAILQ_REMOVE_AFTER(&nbdev_ch->io_path_list, prev, stailq);
    5360           1 :                         STAILQ_INSERT_HEAD(&nbdev_ch->io_path_list, io_path, stailq);
    5361           1 :                 }
    5362             : 
    5363             :                 /* We can set io_path to nbdev_ch->current_io_path directly here.
    5364             :                  * However, it needs to be conditional. To simplify the code,
    5365             :                  * just clear nbdev_ch->current_io_path and let find_io_path()
    5366             :                  * fill it.
    5367             :                  *
    5368             :                  * Automatic failback may be disabled. Hence even if the io_path is
    5369             :                  * already at the head, clear nbdev_ch->current_io_path.
    5370             :                  */
    5371           2 :                 bdev_nvme_clear_current_io_path(nbdev_ch);
    5372           2 :         }
    5373             : 
    5374           2 :         nvme_bdev_for_each_channel_continue(i, 0);
    5375           2 : }
    5376             : 
    5377             : static struct nvme_ns *
    5378           3 : bdev_nvme_set_preferred_ns(struct nvme_bdev *nbdev, uint16_t cntlid)
    5379             : {
    5380             :         struct nvme_ns *nvme_ns, *prev;
    5381             :         const struct spdk_nvme_ctrlr_data *cdata;
    5382             : 
    5383           3 :         prev = NULL;
    5384           6 :         TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
    5385           6 :                 cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
    5386             : 
    5387           6 :                 if (cdata->cntlid == cntlid) {
    5388           3 :                         break;
    5389             :                 }
    5390           3 :                 prev = nvme_ns;
    5391           3 :         }
    5392             : 
    5393           3 :         if (nvme_ns != NULL && prev != NULL) {
    5394           2 :                 TAILQ_REMOVE(&nbdev->nvme_ns_list, nvme_ns, tailq);
    5395           2 :                 TAILQ_INSERT_HEAD(&nbdev->nvme_ns_list, nvme_ns, tailq);
    5396           2 :         }
    5397             : 
    5398           3 :         return nvme_ns;
    5399             : }
    5400             : 
    5401             : /* This function supports only multipath mode. There is only a single I/O path
    5402             :  * for each NVMe-oF controller. Hence, just move the matched I/O path to the
    5403             :  * head of the I/O path list for each NVMe bdev channel.
    5404             :  *
    5405             :  * NVMe bdev channel may be acquired after completing this function. move the
    5406             :  * matched namespace to the head of the namespace list for the NVMe bdev too.
    5407             :  */
    5408             : void
    5409           3 : bdev_nvme_set_preferred_path(const char *name, uint16_t cntlid,
    5410             :                              bdev_nvme_set_preferred_path_cb cb_fn, void *cb_arg)
    5411             : {
    5412             :         struct bdev_nvme_set_preferred_path_ctx *ctx;
    5413             :         struct spdk_bdev *bdev;
    5414             :         struct nvme_bdev *nbdev;
    5415           3 :         int rc = 0;
    5416             : 
    5417           3 :         assert(cb_fn != NULL);
    5418             : 
    5419           3 :         ctx = calloc(1, sizeof(*ctx));
    5420           3 :         if (ctx == NULL) {
    5421           0 :                 SPDK_ERRLOG("Failed to alloc context.\n");
    5422           0 :                 rc = -ENOMEM;
    5423           0 :                 goto err_alloc;
    5424             :         }
    5425             : 
    5426           3 :         ctx->cb_fn = cb_fn;
    5427           3 :         ctx->cb_arg = cb_arg;
    5428             : 
    5429           3 :         rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &ctx->desc);
    5430           3 :         if (rc != 0) {
    5431           0 :                 SPDK_ERRLOG("Failed to open bdev %s.\n", name);
    5432           0 :                 goto err_open;
    5433             :         }
    5434             : 
    5435           3 :         bdev = spdk_bdev_desc_get_bdev(ctx->desc);
    5436             : 
    5437           3 :         if (bdev->module != &nvme_if) {
    5438           0 :                 SPDK_ERRLOG("bdev %s is not registered in this module.\n", name);
    5439           0 :                 rc = -ENODEV;
    5440           0 :                 goto err_bdev;
    5441             :         }
    5442             : 
    5443           3 :         nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
    5444             : 
    5445           3 :         pthread_mutex_lock(&nbdev->mutex);
    5446             : 
    5447           3 :         ctx->nvme_ns = bdev_nvme_set_preferred_ns(nbdev, cntlid);
    5448           3 :         if (ctx->nvme_ns == NULL) {
    5449           0 :                 pthread_mutex_unlock(&nbdev->mutex);
    5450             : 
    5451           0 :                 SPDK_ERRLOG("bdev %s does not have namespace to controller %u.\n", name, cntlid);
    5452           0 :                 rc = -ENODEV;
    5453           0 :                 goto err_bdev;
    5454             :         }
    5455             : 
    5456           3 :         pthread_mutex_unlock(&nbdev->mutex);
    5457             : 
    5458           6 :         nvme_bdev_for_each_channel(nbdev,
    5459             :                                    _bdev_nvme_set_preferred_path,
    5460           3 :                                    ctx,
    5461             :                                    bdev_nvme_set_preferred_path_done);
    5462           3 :         return;
    5463             : 
    5464             : err_bdev:
    5465           0 :         spdk_bdev_close(ctx->desc);
    5466             : err_open:
    5467           0 :         free(ctx);
    5468             : err_alloc:
    5469           0 :         cb_fn(cb_arg, rc);
    5470           3 : }
    5471             : 
    5472             : struct bdev_nvme_set_multipath_policy_ctx {
    5473             :         struct spdk_bdev_desc *desc;
    5474             :         spdk_bdev_nvme_set_multipath_policy_cb cb_fn;
    5475             :         void *cb_arg;
    5476             : };
    5477             : 
    5478             : static void
    5479           3 : bdev_nvme_set_multipath_policy_done(struct nvme_bdev *nbdev, void *_ctx, int status)
    5480             : {
    5481           3 :         struct bdev_nvme_set_multipath_policy_ctx *ctx = _ctx;
    5482             : 
    5483           3 :         assert(ctx != NULL);
    5484           3 :         assert(ctx->desc != NULL);
    5485           3 :         assert(ctx->cb_fn != NULL);
    5486             : 
    5487           3 :         spdk_bdev_close(ctx->desc);
    5488             : 
    5489           3 :         ctx->cb_fn(ctx->cb_arg, status);
    5490             : 
    5491           3 :         free(ctx);
    5492           3 : }
    5493             : 
    5494             : static void
    5495           1 : _bdev_nvme_set_multipath_policy(struct nvme_bdev_channel_iter *i,
    5496             :                                 struct nvme_bdev *nbdev,
    5497             :                                 struct nvme_bdev_channel *nbdev_ch, void *ctx)
    5498             : {
    5499           1 :         nbdev_ch->mp_policy = nbdev->mp_policy;
    5500           1 :         nbdev_ch->mp_selector = nbdev->mp_selector;
    5501           1 :         nbdev_ch->rr_min_io = nbdev->rr_min_io;
    5502           1 :         bdev_nvme_clear_current_io_path(nbdev_ch);
    5503             : 
    5504           1 :         nvme_bdev_for_each_channel_continue(i, 0);
    5505           1 : }
    5506             : 
    5507             : void
    5508           3 : spdk_bdev_nvme_set_multipath_policy(const char *name, enum spdk_bdev_nvme_multipath_policy policy,
    5509             :                                     enum spdk_bdev_nvme_multipath_selector selector, uint32_t rr_min_io,
    5510             :                                     spdk_bdev_nvme_set_multipath_policy_cb cb_fn, void *cb_arg)
    5511             : {
    5512             :         struct bdev_nvme_set_multipath_policy_ctx *ctx;
    5513             :         struct spdk_bdev *bdev;
    5514             :         struct nvme_bdev *nbdev;
    5515             :         int rc;
    5516             : 
    5517           3 :         assert(cb_fn != NULL);
    5518             : 
    5519           3 :         switch (policy) {
    5520             :         case BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE:
    5521           1 :                 break;
    5522             :         case BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE:
    5523           2 :                 switch (selector) {
    5524             :                 case BDEV_NVME_MP_SELECTOR_ROUND_ROBIN:
    5525           1 :                         if (rr_min_io == UINT32_MAX) {
    5526           0 :                                 rr_min_io = 1;
    5527           1 :                         } else if (rr_min_io == 0) {
    5528           0 :                                 rc = -EINVAL;
    5529           0 :                                 goto exit;
    5530             :                         }
    5531           1 :                         break;
    5532             :                 case BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH:
    5533           1 :                         break;
    5534             :                 default:
    5535           0 :                         rc = -EINVAL;
    5536           0 :                         goto exit;
    5537             :                 }
    5538           2 :                 break;
    5539             :         default:
    5540           0 :                 rc = -EINVAL;
    5541           0 :                 goto exit;
    5542             :         }
    5543             : 
    5544           3 :         ctx = calloc(1, sizeof(*ctx));
    5545           3 :         if (ctx == NULL) {
    5546           0 :                 SPDK_ERRLOG("Failed to alloc context.\n");
    5547           0 :                 rc = -ENOMEM;
    5548           0 :                 goto exit;
    5549             :         }
    5550             : 
    5551           3 :         ctx->cb_fn = cb_fn;
    5552           3 :         ctx->cb_arg = cb_arg;
    5553             : 
    5554           3 :         rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &ctx->desc);
    5555           3 :         if (rc != 0) {
    5556           0 :                 SPDK_ERRLOG("Failed to open bdev %s.\n", name);
    5557           0 :                 rc = -ENODEV;
    5558           0 :                 goto err_open;
    5559             :         }
    5560             : 
    5561           3 :         bdev = spdk_bdev_desc_get_bdev(ctx->desc);
    5562           3 :         if (bdev->module != &nvme_if) {
    5563           0 :                 SPDK_ERRLOG("bdev %s is not registered in this module.\n", name);
    5564           0 :                 rc = -ENODEV;
    5565           0 :                 goto err_module;
    5566             :         }
    5567           3 :         nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
    5568             : 
    5569           3 :         pthread_mutex_lock(&nbdev->mutex);
    5570           3 :         nbdev->mp_policy = policy;
    5571           3 :         nbdev->mp_selector = selector;
    5572           3 :         nbdev->rr_min_io = rr_min_io;
    5573           3 :         pthread_mutex_unlock(&nbdev->mutex);
    5574             : 
    5575           6 :         nvme_bdev_for_each_channel(nbdev,
    5576             :                                    _bdev_nvme_set_multipath_policy,
    5577           3 :                                    ctx,
    5578             :                                    bdev_nvme_set_multipath_policy_done);
    5579           3 :         return;
    5580             : 
    5581             : err_module:
    5582           0 :         spdk_bdev_close(ctx->desc);
    5583             : err_open:
    5584           0 :         free(ctx);
    5585             : exit:
    5586           0 :         cb_fn(cb_arg, rc);
    5587           3 : }
    5588             : 
    5589             : static void
    5590           3 : aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
    5591             : {
    5592           3 :         struct nvme_ctrlr *nvme_ctrlr           = arg;
    5593             :         union spdk_nvme_async_event_completion  event;
    5594             : 
    5595           3 :         if (spdk_nvme_cpl_is_error(cpl)) {
    5596           0 :                 SPDK_WARNLOG("AER request execute failed\n");
    5597           0 :                 return;
    5598             :         }
    5599             : 
    5600           3 :         event.raw = cpl->cdw0;
    5601           3 :         if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
    5602           3 :             (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) {
    5603           2 :                 nvme_ctrlr_populate_namespaces(nvme_ctrlr, NULL);
    5604           3 :         } else if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
    5605           1 :                    (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_ANA_CHANGE)) {
    5606           1 :                 nvme_ctrlr_read_ana_log_page(nvme_ctrlr);
    5607           1 :         }
    5608           3 : }
    5609             : 
    5610             : static void
    5611          53 : free_nvme_async_probe_ctx(struct nvme_async_probe_ctx *ctx)
    5612             : {
    5613          53 :         spdk_keyring_put_key(ctx->drv_opts.tls_psk);
    5614          53 :         spdk_keyring_put_key(ctx->drv_opts.dhchap_key);
    5615          53 :         spdk_keyring_put_key(ctx->drv_opts.dhchap_ctrlr_key);
    5616          53 :         free(ctx->base_name);
    5617          53 :         free(ctx);
    5618          53 : }
    5619             : 
    5620             : static void
    5621          53 : populate_namespaces_cb(struct nvme_async_probe_ctx *ctx, int rc)
    5622             : {
    5623          53 :         if (ctx->cb_fn) {
    5624          53 :                 ctx->cb_fn(ctx->cb_ctx, ctx->reported_bdevs, rc);
    5625          53 :         }
    5626             : 
    5627          53 :         ctx->namespaces_populated = true;
    5628          53 :         if (ctx->probe_done) {
    5629             :                 /* The probe was already completed, so we need to free the context
    5630             :                  * here.  This can happen for cases like OCSSD, where we need to
    5631             :                  * send additional commands to the SSD after attach.
    5632             :                  */
    5633          32 :                 free_nvme_async_probe_ctx(ctx);
    5634          32 :         }
    5635          53 : }
    5636             : 
    5637             : static int
    5638          20 : bdev_nvme_remove_poller(void *ctx)
    5639             : {
    5640             :         struct spdk_nvme_transport_id trid_pcie;
    5641             : 
    5642          20 :         if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
    5643           2 :                 spdk_poller_unregister(&g_hotplug_poller);
    5644           2 :                 return SPDK_POLLER_IDLE;
    5645             :         }
    5646             : 
    5647          18 :         memset(&trid_pcie, 0, sizeof(trid_pcie));
    5648          18 :         spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
    5649             : 
    5650          18 :         if (spdk_nvme_scan_attached(&trid_pcie)) {
    5651           0 :                 SPDK_ERRLOG_RATELIMIT("spdk_nvme_scan_attached() failed\n");
    5652           0 :         }
    5653             : 
    5654          18 :         return SPDK_POLLER_BUSY;
    5655          20 : }
    5656             : 
    5657             : static void set_nvme_hotplug_period_cb(void *_ctx);
    5658             : 
    5659             : static void
    5660          61 : nvme_ctrlr_create_done(struct nvme_ctrlr *nvme_ctrlr,
    5661             :                        struct nvme_async_probe_ctx *ctx)
    5662             : {
    5663          61 :         struct spdk_nvme_transport_id *trid = &nvme_ctrlr->active_path_id->trid;
    5664             : 
    5665          61 :         if (spdk_nvme_trtype_is_fabrics(trid->trtype)) {
    5666          61 :                 NVME_CTRLR_INFOLOG(nvme_ctrlr, "ctrlr was created to %s:%s\n",
    5667             :                                    trid->traddr, trid->trsvcid);
    5668          61 :         } else {
    5669           0 :                 NVME_CTRLR_INFOLOG(nvme_ctrlr, "ctrlr was created\n");
    5670             :         }
    5671             : 
    5672         122 :         spdk_io_device_register(nvme_ctrlr,
    5673             :                                 bdev_nvme_create_ctrlr_channel_cb,
    5674             :                                 bdev_nvme_destroy_ctrlr_channel_cb,
    5675             :                                 sizeof(struct nvme_ctrlr_channel),
    5676          61 :                                 nvme_ctrlr->nbdev_ctrlr->name);
    5677             : 
    5678          61 :         nvme_ctrlr_populate_namespaces(nvme_ctrlr, ctx);
    5679             : 
    5680          61 :         if (g_hotplug_poller == NULL) {
    5681           3 :                 spdk_thread_send_msg(g_bdev_nvme_init_thread, set_nvme_hotplug_period_cb, NULL);
    5682           3 :         }
    5683          61 : }
    5684             : 
    5685             : static void
    5686          31 : nvme_ctrlr_init_ana_log_page_done(void *_ctx, const struct spdk_nvme_cpl *cpl)
    5687             : {
    5688          31 :         struct nvme_ctrlr *nvme_ctrlr = _ctx;
    5689          31 :         struct nvme_async_probe_ctx *ctx = nvme_ctrlr->probe_ctx;
    5690             : 
    5691          31 :         nvme_ctrlr->probe_ctx = NULL;
    5692             : 
    5693          31 :         if (spdk_nvme_cpl_is_error(cpl)) {
    5694           0 :                 nvme_ctrlr_delete(nvme_ctrlr);
    5695             : 
    5696           0 :                 if (ctx != NULL) {
    5697           0 :                         ctx->reported_bdevs = 0;
    5698           0 :                         populate_namespaces_cb(ctx, -1);
    5699           0 :                 }
    5700           0 :                 return;
    5701             :         }
    5702             : 
    5703          31 :         nvme_ctrlr_create_done(nvme_ctrlr, ctx);
    5704          31 : }
    5705             : 
    5706             : static int
    5707          31 : nvme_ctrlr_init_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
    5708             :                              struct nvme_async_probe_ctx *ctx)
    5709             : {
    5710          31 :         struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
    5711             :         const struct spdk_nvme_ctrlr_data *cdata;
    5712             :         uint32_t ana_log_page_size;
    5713             : 
    5714          31 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    5715             : 
    5716             :         /* Set buffer size enough to include maximum number of allowed namespaces. */
    5717          62 :         ana_log_page_size = sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid *
    5718          31 :                             sizeof(struct spdk_nvme_ana_group_descriptor) + cdata->mnan *
    5719             :                             sizeof(uint32_t);
    5720             : 
    5721          31 :         nvme_ctrlr->ana_log_page = spdk_zmalloc(ana_log_page_size, 64, NULL,
    5722             :                                                 SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    5723          31 :         if (nvme_ctrlr->ana_log_page == NULL) {
    5724           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "could not allocate ANA log page buffer\n");
    5725           0 :                 return -ENXIO;
    5726             :         }
    5727             : 
    5728             :         /* Each descriptor in a ANA log page is not ensured to be 8-bytes aligned.
    5729             :          * Hence copy each descriptor to a temporary area when parsing it.
    5730             :          *
    5731             :          * Allocate a buffer whose size is as large as ANA log page buffer because
    5732             :          * we do not know the size of a descriptor until actually reading it.
    5733             :          */
    5734          31 :         nvme_ctrlr->copied_ana_desc = calloc(1, ana_log_page_size);
    5735          31 :         if (nvme_ctrlr->copied_ana_desc == NULL) {
    5736           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "could not allocate a buffer to parse ANA descriptor\n");
    5737           0 :                 return -ENOMEM;
    5738             :         }
    5739             : 
    5740          31 :         nvme_ctrlr->max_ana_log_page_size = ana_log_page_size;
    5741             : 
    5742          31 :         nvme_ctrlr->probe_ctx = ctx;
    5743             : 
    5744             :         /* Then, set the read size only to include the current active namespaces. */
    5745          31 :         ana_log_page_size = nvme_ctrlr_get_ana_log_page_size(nvme_ctrlr);
    5746             : 
    5747          31 :         if (ana_log_page_size > nvme_ctrlr->max_ana_log_page_size) {
    5748           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "ANA log page size %" PRIu32 " is larger than allowed %" PRIu32 "\n",
    5749             :                                   ana_log_page_size, nvme_ctrlr->max_ana_log_page_size);
    5750           0 :                 return -EINVAL;
    5751             :         }
    5752             : 
    5753          62 :         return spdk_nvme_ctrlr_cmd_get_log_page(ctrlr,
    5754             :                                                 SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
    5755             :                                                 SPDK_NVME_GLOBAL_NS_TAG,
    5756          31 :                                                 nvme_ctrlr->ana_log_page,
    5757          31 :                                                 ana_log_page_size, 0,
    5758             :                                                 nvme_ctrlr_init_ana_log_page_done,
    5759          31 :                                                 nvme_ctrlr);
    5760          31 : }
    5761             : 
    5762             : /* hostnqn and subnqn were already verified before attaching a controller.
    5763             :  * Hence check only the multipath capability and cntlid here.
    5764             :  */
    5765             : static bool
    5766          16 : bdev_nvme_check_multipath(struct nvme_bdev_ctrlr *nbdev_ctrlr, struct spdk_nvme_ctrlr *ctrlr)
    5767             : {
    5768             :         struct nvme_ctrlr *tmp;
    5769             :         const struct spdk_nvme_ctrlr_data *cdata, *tmp_cdata;
    5770             : 
    5771          16 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    5772             : 
    5773          16 :         if (!cdata->cmic.multi_ctrlr) {
    5774           0 :                 SPDK_ERRLOG("Ctrlr%u does not support multipath.\n", cdata->cntlid);
    5775           0 :                 return false;
    5776             :         }
    5777             : 
    5778          33 :         TAILQ_FOREACH(tmp, &nbdev_ctrlr->ctrlrs, tailq) {
    5779          18 :                 tmp_cdata = spdk_nvme_ctrlr_get_data(tmp->ctrlr);
    5780             : 
    5781          18 :                 if (!tmp_cdata->cmic.multi_ctrlr) {
    5782           0 :                         NVME_CTRLR_ERRLOG(tmp, "Ctrlr%u does not support multipath.\n", cdata->cntlid);
    5783           0 :                         return false;
    5784             :                 }
    5785          18 :                 if (cdata->cntlid == tmp_cdata->cntlid) {
    5786           1 :                         NVME_CTRLR_ERRLOG(tmp, "cntlid %u are duplicated.\n", tmp_cdata->cntlid);
    5787           1 :                         return false;
    5788             :                 }
    5789          17 :         }
    5790             : 
    5791          15 :         return true;
    5792          16 : }
    5793             : 
    5794             : 
    5795             : static int
    5796          62 : nvme_bdev_ctrlr_create(const char *name, struct nvme_ctrlr *nvme_ctrlr)
    5797             : {
    5798             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
    5799          62 :         struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
    5800             :         struct nvme_ctrlr      *nctrlr;
    5801          62 :         int rc = 0;
    5802             : 
    5803          62 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    5804             : 
    5805          62 :         nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
    5806          62 :         if (nbdev_ctrlr != NULL) {
    5807          16 :                 if (!bdev_nvme_check_multipath(nbdev_ctrlr, ctrlr)) {
    5808           1 :                         rc = -EINVAL;
    5809           1 :                         goto exit;
    5810             :                 }
    5811          32 :                 TAILQ_FOREACH(nctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
    5812          17 :                         if (nctrlr->opts.multipath != nvme_ctrlr->opts.multipath) {
    5813             :                                 /* All controllers with the same name must be configured the same
    5814             :                                  * way, either for multipath or failover. If the configuration doesn't
    5815             :                                  * match - report error.
    5816             :                                  */
    5817           0 :                                 rc = -EINVAL;
    5818           0 :                                 goto exit;
    5819             :                         }
    5820          17 :                 }
    5821          15 :         } else {
    5822          46 :                 nbdev_ctrlr = calloc(1, sizeof(*nbdev_ctrlr));
    5823          46 :                 if (nbdev_ctrlr == NULL) {
    5824           0 :                         NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to allocate nvme_bdev_ctrlr.\n");
    5825           0 :                         rc = -ENOMEM;
    5826           0 :                         goto exit;
    5827             :                 }
    5828          46 :                 nbdev_ctrlr->name = strdup(name);
    5829          46 :                 if (nbdev_ctrlr->name == NULL) {
    5830           0 :                         NVME_CTRLR_ERRLOG(nvme_ctrlr, "Failed to allocate name of nvme_bdev_ctrlr.\n");
    5831           0 :                         free(nbdev_ctrlr);
    5832           0 :                         goto exit;
    5833             :                 }
    5834          46 :                 TAILQ_INIT(&nbdev_ctrlr->ctrlrs);
    5835          46 :                 TAILQ_INIT(&nbdev_ctrlr->bdevs);
    5836          46 :                 TAILQ_INSERT_TAIL(&g_nvme_bdev_ctrlrs, nbdev_ctrlr, tailq);
    5837             :         }
    5838          61 :         nvme_ctrlr->nbdev_ctrlr = nbdev_ctrlr;
    5839          61 :         TAILQ_INSERT_TAIL(&nbdev_ctrlr->ctrlrs, nvme_ctrlr, tailq);
    5840             : exit:
    5841          62 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    5842          62 :         return rc;
    5843             : }
    5844             : 
    5845             : static int
    5846          62 : nvme_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr,
    5847             :                   const char *name,
    5848             :                   const struct spdk_nvme_transport_id *trid,
    5849             :                   struct nvme_async_probe_ctx *ctx)
    5850             : {
    5851             :         struct nvme_ctrlr *nvme_ctrlr;
    5852             :         struct nvme_path_id *path_id;
    5853             :         const struct spdk_nvme_ctrlr_data *cdata;
    5854          62 :         struct spdk_event_handler_opts opts = {
    5855             :                 .opts_size = SPDK_SIZEOF(&opts, fd_type),
    5856             :         };
    5857             :         uint64_t period;
    5858             :         int fd, rc;
    5859             : 
    5860          62 :         nvme_ctrlr = calloc(1, sizeof(*nvme_ctrlr));
    5861          62 :         if (nvme_ctrlr == NULL) {
    5862           0 :                 SPDK_ERRLOG("Failed to allocate device struct\n");
    5863           0 :                 return -ENOMEM;
    5864             :         }
    5865             : 
    5866          62 :         rc = pthread_mutex_init(&nvme_ctrlr->mutex, NULL);
    5867          62 :         if (rc != 0) {
    5868           0 :                 free(nvme_ctrlr);
    5869           0 :                 return rc;
    5870             :         }
    5871             : 
    5872          62 :         TAILQ_INIT(&nvme_ctrlr->trids);
    5873          62 :         TAILQ_INIT(&nvme_ctrlr->pending_resets);
    5874          62 :         RB_INIT(&nvme_ctrlr->namespaces);
    5875             : 
    5876             :         /* Get another reference to the key, so the first one can be released from probe_ctx */
    5877          62 :         if (ctx != NULL) {
    5878          48 :                 if (ctx->drv_opts.tls_psk != NULL) {
    5879           0 :                         nvme_ctrlr->psk = spdk_keyring_get_key(
    5880           0 :                                                   spdk_key_get_name(ctx->drv_opts.tls_psk));
    5881           0 :                         if (nvme_ctrlr->psk == NULL) {
    5882             :                                 /* Could only happen if the key was removed in the meantime */
    5883           0 :                                 SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
    5884             :                                             spdk_key_get_name(ctx->drv_opts.tls_psk));
    5885           0 :                                 rc = -ENOKEY;
    5886           0 :                                 goto err;
    5887             :                         }
    5888           0 :                 }
    5889             : 
    5890          48 :                 if (ctx->drv_opts.dhchap_key != NULL) {
    5891           0 :                         nvme_ctrlr->dhchap_key = spdk_keyring_get_key(
    5892           0 :                                                          spdk_key_get_name(ctx->drv_opts.dhchap_key));
    5893           0 :                         if (nvme_ctrlr->dhchap_key == NULL) {
    5894           0 :                                 SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
    5895             :                                             spdk_key_get_name(ctx->drv_opts.dhchap_key));
    5896           0 :                                 rc = -ENOKEY;
    5897           0 :                                 goto err;
    5898             :                         }
    5899           0 :                 }
    5900             : 
    5901          48 :                 if (ctx->drv_opts.dhchap_ctrlr_key != NULL) {
    5902           0 :                         nvme_ctrlr->dhchap_ctrlr_key =
    5903           0 :                                 spdk_keyring_get_key(
    5904           0 :                                         spdk_key_get_name(ctx->drv_opts.dhchap_ctrlr_key));
    5905           0 :                         if (nvme_ctrlr->dhchap_ctrlr_key == NULL) {
    5906           0 :                                 SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
    5907             :                                             spdk_key_get_name(ctx->drv_opts.dhchap_ctrlr_key));
    5908           0 :                                 rc = -ENOKEY;
    5909           0 :                                 goto err;
    5910             :                         }
    5911           0 :                 }
    5912          48 :         }
    5913             : 
    5914             :         /* Check if we manage to enable interrupts on the controller. */
    5915          62 :         if (spdk_interrupt_mode_is_enabled() && ctx != NULL && !ctx->drv_opts.enable_interrupts) {
    5916           0 :                 SPDK_ERRLOG("Failed to enable interrupts on the controller\n");
    5917           0 :                 rc = -ENOTSUP;
    5918           0 :                 goto err;
    5919             :         }
    5920             : 
    5921          62 :         path_id = calloc(1, sizeof(*path_id));
    5922          62 :         if (path_id == NULL) {
    5923           0 :                 SPDK_ERRLOG("Failed to allocate trid entry pointer\n");
    5924           0 :                 rc = -ENOMEM;
    5925           0 :                 goto err;
    5926             :         }
    5927             : 
    5928          62 :         path_id->trid = *trid;
    5929          62 :         if (ctx != NULL) {
    5930          48 :                 memcpy(path_id->hostid.hostaddr, ctx->drv_opts.src_addr, sizeof(path_id->hostid.hostaddr));
    5931          48 :                 memcpy(path_id->hostid.hostsvcid, ctx->drv_opts.src_svcid, sizeof(path_id->hostid.hostsvcid));
    5932          48 :         }
    5933          62 :         nvme_ctrlr->active_path_id = path_id;
    5934          62 :         TAILQ_INSERT_HEAD(&nvme_ctrlr->trids, path_id, link);
    5935             : 
    5936          62 :         nvme_ctrlr->thread = spdk_get_thread();
    5937          62 :         nvme_ctrlr->ctrlr = ctrlr;
    5938          62 :         nvme_ctrlr->ref = 1;
    5939             : 
    5940          62 :         if (spdk_nvme_ctrlr_is_ocssd_supported(ctrlr)) {
    5941           0 :                 SPDK_ERRLOG("OCSSDs are not supported");
    5942           0 :                 rc = -ENOTSUP;
    5943           0 :                 goto err;
    5944             :         }
    5945             : 
    5946          62 :         if (ctx != NULL) {
    5947          48 :                 memcpy(&nvme_ctrlr->opts, &ctx->bdev_opts, sizeof(ctx->bdev_opts));
    5948          48 :         } else {
    5949          14 :                 spdk_bdev_nvme_get_default_ctrlr_opts(&nvme_ctrlr->opts);
    5950             :         }
    5951             : 
    5952          62 :         period = spdk_interrupt_mode_is_enabled() ? 0 : g_opts.nvme_adminq_poll_period_us;
    5953             : 
    5954          62 :         nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, nvme_ctrlr,
    5955             :                                           period);
    5956             : 
    5957          62 :         if (spdk_interrupt_mode_is_enabled()) {
    5958           0 :                 spdk_poller_register_interrupt(nvme_ctrlr->adminq_timer_poller, NULL, NULL);
    5959             : 
    5960           0 :                 fd = spdk_nvme_ctrlr_get_admin_qp_fd(nvme_ctrlr->ctrlr, &opts);
    5961           0 :                 if (fd < 0) {
    5962           0 :                         rc = fd;
    5963           0 :                         goto err;
    5964             :                 }
    5965             : 
    5966           0 :                 nvme_ctrlr->intr = SPDK_INTERRUPT_REGISTER_EXT(fd, bdev_nvme_poll_adminq,
    5967             :                                    nvme_ctrlr, &opts);
    5968           0 :                 if (!nvme_ctrlr->intr) {
    5969           0 :                         rc = -EINVAL;
    5970           0 :                         goto err;
    5971             :                 }
    5972           0 :         }
    5973             : 
    5974          62 :         if (g_opts.timeout_us > 0) {
    5975             :                 /* Register timeout callback. Timeout values for IO vs. admin reqs can be different. */
    5976             :                 /* If timeout_admin_us is 0 (not specified), admin uses same timeout as IO. */
    5977           0 :                 uint64_t adm_timeout_us = (g_opts.timeout_admin_us == 0) ?
    5978           0 :                                           g_opts.timeout_us : g_opts.timeout_admin_us;
    5979           0 :                 spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_opts.timeout_us,
    5980           0 :                                 adm_timeout_us, timeout_cb, nvme_ctrlr);
    5981           0 :         }
    5982             : 
    5983          62 :         spdk_nvme_ctrlr_register_aer_callback(ctrlr, aer_cb, nvme_ctrlr);
    5984          62 :         spdk_nvme_ctrlr_set_remove_cb(ctrlr, remove_cb, nvme_ctrlr);
    5985             : 
    5986          62 :         if (spdk_nvme_ctrlr_get_flags(ctrlr) &
    5987             :             SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) {
    5988           0 :                 nvme_ctrlr->opal_dev = spdk_opal_dev_construct(ctrlr);
    5989           0 :         }
    5990             : 
    5991          62 :         rc = nvme_bdev_ctrlr_create(name, nvme_ctrlr);
    5992          62 :         if (rc != 0) {
    5993           1 :                 goto err;
    5994             :         }
    5995             : 
    5996          61 :         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
    5997             : 
    5998          61 :         if (cdata->cmic.ana_reporting) {
    5999          31 :                 rc = nvme_ctrlr_init_ana_log_page(nvme_ctrlr, ctx);
    6000          31 :                 if (rc == 0) {
    6001          31 :                         return 0;
    6002             :                 }
    6003           0 :         } else {
    6004          30 :                 nvme_ctrlr_create_done(nvme_ctrlr, ctx);
    6005          30 :                 return 0;
    6006             :         }
    6007             : 
    6008             : err:
    6009           1 :         nvme_ctrlr_delete(nvme_ctrlr);
    6010           1 :         return rc;
    6011          62 : }
    6012             : 
    6013             : void
    6014          34 : spdk_bdev_nvme_get_default_ctrlr_opts(struct spdk_bdev_nvme_ctrlr_opts *opts)
    6015             : {
    6016          34 :         opts->prchk_flags = 0;
    6017          34 :         opts->ctrlr_loss_timeout_sec = g_opts.ctrlr_loss_timeout_sec;
    6018          34 :         opts->reconnect_delay_sec = g_opts.reconnect_delay_sec;
    6019          34 :         opts->fast_io_fail_timeout_sec = g_opts.fast_io_fail_timeout_sec;
    6020          34 :         opts->multipath = true;
    6021          34 : }
    6022             : 
    6023             : static void
    6024           0 : attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    6025             :           struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *drv_opts)
    6026             : {
    6027             :         char *name;
    6028             : 
    6029           0 :         name = spdk_sprintf_alloc("HotInNvme%d", g_hot_insert_nvme_controller_index++);
    6030           0 :         if (!name) {
    6031           0 :                 SPDK_ERRLOG("Failed to assign name to NVMe device\n");
    6032           0 :                 return;
    6033             :         }
    6034             : 
    6035           0 :         if (nvme_ctrlr_create(ctrlr, name, trid, NULL) == 0) {
    6036           0 :                 SPDK_DEBUGLOG(bdev_nvme, "Attached to %s (%s)\n", trid->traddr, name);
    6037           0 :         } else {
    6038           0 :                 SPDK_ERRLOG("Failed to attach to %s (%s)\n", trid->traddr, name);
    6039             :         }
    6040             : 
    6041           0 :         free(name);
    6042           0 : }
    6043             : 
    6044             : static void
    6045          61 : _nvme_ctrlr_destruct(void *ctx)
    6046             : {
    6047          61 :         struct nvme_ctrlr *nvme_ctrlr = ctx;
    6048             : 
    6049          61 :         nvme_ctrlr_depopulate_namespaces(nvme_ctrlr);
    6050          61 :         nvme_ctrlr_put_ref(nvme_ctrlr);
    6051          61 : }
    6052             : 
    6053             : static int
    6054          58 : bdev_nvme_delete_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, bool hotplug)
    6055             : {
    6056             :         struct nvme_probe_skip_entry *entry;
    6057             : 
    6058             :         /* The controller's destruction was already started */
    6059          58 :         if (nvme_ctrlr->destruct) {
    6060           0 :                 return -EALREADY;
    6061             :         }
    6062             : 
    6063          58 :         if (!hotplug &&
    6064          58 :             nvme_ctrlr->active_path_id->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
    6065           0 :                 entry = calloc(1, sizeof(*entry));
    6066           0 :                 if (!entry) {
    6067           0 :                         return -ENOMEM;
    6068             :                 }
    6069           0 :                 entry->trid = nvme_ctrlr->active_path_id->trid;
    6070           0 :                 TAILQ_INSERT_TAIL(&g_skipped_nvme_ctrlrs, entry, tailq);
    6071           0 :         }
    6072             : 
    6073          58 :         nvme_ctrlr->destruct = true;
    6074          58 :         return 0;
    6075          58 : }
    6076             : 
    6077             : static int
    6078           2 : bdev_nvme_delete_ctrlr(struct nvme_ctrlr *nvme_ctrlr, bool hotplug)
    6079             : {
    6080             :         int rc;
    6081             : 
    6082           2 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    6083           2 :         rc = bdev_nvme_delete_ctrlr_unsafe(nvme_ctrlr, hotplug);
    6084           2 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    6085             : 
    6086           2 :         if (rc == 0) {
    6087           2 :                 _nvme_ctrlr_destruct(nvme_ctrlr);
    6088           2 :         } else if (rc == -EALREADY) {
    6089           0 :                 rc = 0;
    6090           0 :         }
    6091             : 
    6092           2 :         return rc;
    6093             : }
    6094             : 
    6095             : static void
    6096           0 : remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr)
    6097             : {
    6098           0 :         struct nvme_ctrlr *nvme_ctrlr = cb_ctx;
    6099             : 
    6100           0 :         bdev_nvme_delete_ctrlr(nvme_ctrlr, true);
    6101           0 : }
    6102             : 
    6103             : static int
    6104           0 : bdev_nvme_hotplug_probe(void *arg)
    6105             : {
    6106           0 :         if (g_hotplug_probe_ctx == NULL) {
    6107           0 :                 spdk_poller_unregister(&g_hotplug_probe_poller);
    6108           0 :                 return SPDK_POLLER_IDLE;
    6109             :         }
    6110             : 
    6111           0 :         if (spdk_nvme_probe_poll_async(g_hotplug_probe_ctx) != -EAGAIN) {
    6112           0 :                 g_hotplug_probe_ctx = NULL;
    6113           0 :                 spdk_poller_unregister(&g_hotplug_probe_poller);
    6114           0 :         }
    6115             : 
    6116           0 :         return SPDK_POLLER_BUSY;
    6117           0 : }
    6118             : 
    6119             : static int
    6120           0 : bdev_nvme_hotplug(void *arg)
    6121             : {
    6122             :         struct spdk_nvme_transport_id trid_pcie;
    6123             : 
    6124           0 :         if (g_hotplug_probe_ctx) {
    6125           0 :                 return SPDK_POLLER_BUSY;
    6126             :         }
    6127             : 
    6128           0 :         memset(&trid_pcie, 0, sizeof(trid_pcie));
    6129           0 :         spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
    6130             : 
    6131           0 :         g_hotplug_probe_ctx = spdk_nvme_probe_async(&trid_pcie, NULL,
    6132             :                               hotplug_probe_cb, attach_cb, NULL);
    6133             : 
    6134           0 :         if (g_hotplug_probe_ctx) {
    6135           0 :                 assert(g_hotplug_probe_poller == NULL);
    6136           0 :                 g_hotplug_probe_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug_probe, NULL, 1000);
    6137           0 :         }
    6138             : 
    6139           0 :         return SPDK_POLLER_BUSY;
    6140           0 : }
    6141             : 
    6142             : void
    6143           0 : spdk_bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts, size_t opts_size)
    6144             : {
    6145           0 :         if (!opts) {
    6146           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
    6147           0 :                 return;
    6148             :         }
    6149             : 
    6150           0 :         if (!opts_size) {
    6151           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
    6152           0 :                 return;
    6153             :         }
    6154             : 
    6155           0 :         opts->opts_size = opts_size;
    6156             : 
    6157             : #define SET_FIELD(field, defval) \
    6158             :                 opts->field = SPDK_GET_FIELD(&g_opts, field, defval, opts_size); \
    6159             : 
    6160           0 :         SET_FIELD(action_on_timeout, 0);
    6161           0 :         SET_FIELD(keep_alive_timeout_ms, 0);
    6162           0 :         SET_FIELD(timeout_us, 0);
    6163           0 :         SET_FIELD(timeout_admin_us, 0);
    6164           0 :         SET_FIELD(transport_retry_count, 0);
    6165           0 :         SET_FIELD(arbitration_burst, 0);
    6166           0 :         SET_FIELD(low_priority_weight, 0);
    6167           0 :         SET_FIELD(medium_priority_weight, 0);
    6168           0 :         SET_FIELD(high_priority_weight, 0);
    6169           0 :         SET_FIELD(io_queue_requests, 0);
    6170           0 :         SET_FIELD(nvme_adminq_poll_period_us, 0);
    6171           0 :         SET_FIELD(nvme_ioq_poll_period_us, 0);
    6172           0 :         SET_FIELD(delay_cmd_submit, 0);
    6173           0 :         SET_FIELD(bdev_retry_count, 0);
    6174           0 :         SET_FIELD(ctrlr_loss_timeout_sec, 0);
    6175           0 :         SET_FIELD(reconnect_delay_sec, 0);
    6176           0 :         SET_FIELD(fast_io_fail_timeout_sec, 0);
    6177           0 :         SET_FIELD(transport_ack_timeout, 0);
    6178           0 :         SET_FIELD(disable_auto_failback, false);
    6179           0 :         SET_FIELD(generate_uuids, false);
    6180           0 :         SET_FIELD(transport_tos, 0);
    6181           0 :         SET_FIELD(nvme_error_stat, false);
    6182           0 :         SET_FIELD(io_path_stat, false);
    6183           0 :         SET_FIELD(allow_accel_sequence, false);
    6184           0 :         SET_FIELD(rdma_srq_size, 0);
    6185           0 :         SET_FIELD(rdma_max_cq_size, 0);
    6186           0 :         SET_FIELD(rdma_cm_event_timeout_ms, 0);
    6187           0 :         SET_FIELD(dhchap_digests, 0);
    6188           0 :         SET_FIELD(dhchap_dhgroups, 0);
    6189           0 :         SET_FIELD(rdma_umr_per_io, false);
    6190             : 
    6191             : #undef SET_FIELD
    6192             : 
    6193             :         /* Do not remove this statement, you should always update this statement when you adding a new field,
    6194             :          * and do not forget to add the SET_FIELD statement for your added field. */
    6195             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_bdev_nvme_opts) == 128, "Incorrect size");
    6196           0 : }
    6197             : 
    6198             : static bool bdev_nvme_check_io_error_resiliency_params(int32_t ctrlr_loss_timeout_sec,
    6199             :                 uint32_t reconnect_delay_sec,
    6200             :                 uint32_t fast_io_fail_timeout_sec);
    6201             : 
    6202             : static int
    6203           0 : bdev_nvme_validate_opts(const struct spdk_bdev_nvme_opts *opts)
    6204             : {
    6205           0 :         if ((opts->timeout_us == 0) && (opts->timeout_admin_us != 0)) {
    6206             :                 /* Can't set timeout_admin_us without also setting timeout_us */
    6207           0 :                 SPDK_WARNLOG("Invalid options: Can't have (timeout_us == 0) with (timeout_admin_us > 0)\n");
    6208           0 :                 return -EINVAL;
    6209             :         }
    6210             : 
    6211           0 :         if (opts->bdev_retry_count < -1) {
    6212           0 :                 SPDK_WARNLOG("Invalid option: bdev_retry_count can't be less than -1.\n");
    6213           0 :                 return -EINVAL;
    6214             :         }
    6215             : 
    6216           0 :         if (!bdev_nvme_check_io_error_resiliency_params(opts->ctrlr_loss_timeout_sec,
    6217           0 :                         opts->reconnect_delay_sec,
    6218           0 :                         opts->fast_io_fail_timeout_sec)) {
    6219           0 :                 return -EINVAL;
    6220             :         }
    6221             : 
    6222           0 :         return 0;
    6223           0 : }
    6224             : 
    6225             : int
    6226           0 : spdk_bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts)
    6227             : {
    6228             :         struct spdk_nvme_transport_opts drv_opts;
    6229             :         int ret;
    6230             : 
    6231           0 :         if (!opts) {
    6232           0 :                 SPDK_ERRLOG("opts cannot be NULL\n");
    6233           0 :                 return -1;
    6234             :         }
    6235             : 
    6236           0 :         if (!opts->opts_size) {
    6237           0 :                 SPDK_ERRLOG("opts_size inside opts cannot be zero value\n");
    6238           0 :                 return -1;
    6239             :         }
    6240             : 
    6241           0 :         ret = bdev_nvme_validate_opts(opts);
    6242           0 :         if (ret) {
    6243           0 :                 SPDK_WARNLOG("Failed to set nvme opts.\n");
    6244           0 :                 return ret;
    6245             :         }
    6246             : 
    6247           0 :         if (g_bdev_nvme_init_thread != NULL) {
    6248           0 :                 if (!TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
    6249           0 :                         return -EPERM;
    6250             :                 }
    6251           0 :         }
    6252             : 
    6253           0 :         spdk_nvme_transport_get_opts(&drv_opts, sizeof(drv_opts));
    6254           0 :         if (opts->rdma_srq_size != 0) {
    6255           0 :                 drv_opts.rdma_srq_size = opts->rdma_srq_size;
    6256           0 :         }
    6257           0 :         if (opts->rdma_max_cq_size != 0) {
    6258           0 :                 drv_opts.rdma_max_cq_size = opts->rdma_max_cq_size;
    6259           0 :         }
    6260           0 :         if (opts->rdma_cm_event_timeout_ms != 0) {
    6261           0 :                 drv_opts.rdma_cm_event_timeout_ms = opts->rdma_cm_event_timeout_ms;
    6262           0 :         }
    6263           0 :         if (drv_opts.rdma_umr_per_io != opts->rdma_umr_per_io) {
    6264           0 :                 drv_opts.rdma_umr_per_io = opts->rdma_umr_per_io;
    6265           0 :         }
    6266           0 :         ret = spdk_nvme_transport_set_opts(&drv_opts, sizeof(drv_opts));
    6267           0 :         if (ret) {
    6268           0 :                 SPDK_ERRLOG("Failed to set NVMe transport opts.\n");
    6269           0 :                 return ret;
    6270             :         }
    6271             : 
    6272             : #define SET_FIELD(field, defval) \
    6273             :                 g_opts.field = SPDK_GET_FIELD(opts, field, defval, opts->opts_size); \
    6274             : 
    6275           0 :         SET_FIELD(action_on_timeout, 0);
    6276           0 :         SET_FIELD(keep_alive_timeout_ms, 0);
    6277           0 :         SET_FIELD(timeout_us, 0);
    6278           0 :         SET_FIELD(timeout_admin_us, 0);
    6279           0 :         SET_FIELD(transport_retry_count, 0);
    6280           0 :         SET_FIELD(arbitration_burst, 0);
    6281           0 :         SET_FIELD(low_priority_weight, 0);
    6282           0 :         SET_FIELD(medium_priority_weight, 0);
    6283           0 :         SET_FIELD(high_priority_weight, 0);
    6284           0 :         SET_FIELD(io_queue_requests, 0);
    6285           0 :         SET_FIELD(nvme_adminq_poll_period_us, 0);
    6286           0 :         SET_FIELD(nvme_ioq_poll_period_us, 0);
    6287           0 :         SET_FIELD(delay_cmd_submit, 0);
    6288           0 :         SET_FIELD(bdev_retry_count, 0);
    6289           0 :         SET_FIELD(ctrlr_loss_timeout_sec, 0);
    6290           0 :         SET_FIELD(reconnect_delay_sec, 0);
    6291           0 :         SET_FIELD(fast_io_fail_timeout_sec, 0);
    6292           0 :         SET_FIELD(transport_ack_timeout, 0);
    6293           0 :         SET_FIELD(disable_auto_failback, false);
    6294           0 :         SET_FIELD(generate_uuids, false);
    6295           0 :         SET_FIELD(transport_tos, 0);
    6296           0 :         SET_FIELD(nvme_error_stat, false);
    6297           0 :         SET_FIELD(io_path_stat, false);
    6298           0 :         SET_FIELD(allow_accel_sequence, false);
    6299           0 :         SET_FIELD(rdma_srq_size, 0);
    6300           0 :         SET_FIELD(rdma_max_cq_size, 0);
    6301           0 :         SET_FIELD(rdma_cm_event_timeout_ms, 0);
    6302           0 :         SET_FIELD(dhchap_digests, 0);
    6303           0 :         SET_FIELD(dhchap_dhgroups, 0);
    6304             : 
    6305           0 :         g_opts.opts_size = opts->opts_size;
    6306             : 
    6307             : #undef SET_FIELD
    6308             : 
    6309           0 :         return 0;
    6310           0 : }
    6311             : 
    6312             : struct set_nvme_hotplug_ctx {
    6313             :         uint64_t period_us;
    6314             :         bool enabled;
    6315             :         spdk_msg_fn fn;
    6316             :         void *fn_ctx;
    6317             : };
    6318             : 
    6319             : static void
    6320           3 : set_nvme_hotplug_period_cb(void *_ctx)
    6321             : {
    6322           3 :         struct set_nvme_hotplug_ctx *ctx = _ctx;
    6323             : 
    6324           3 :         spdk_poller_unregister(&g_hotplug_poller);
    6325           3 :         if (ctx && ctx->enabled) {
    6326           0 :                 g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug, NULL, ctx->period_us);
    6327           0 :         } else {
    6328           3 :                 g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_remove_poller, NULL,
    6329             :                                                         NVME_HOTPLUG_POLL_PERIOD_DEFAULT);
    6330             :         }
    6331             : 
    6332           3 :         if (!ctx) {
    6333           3 :                 return;
    6334             :         }
    6335             : 
    6336           0 :         g_nvme_hotplug_poll_period_us = ctx->period_us;
    6337           0 :         g_nvme_hotplug_enabled = ctx->enabled;
    6338           0 :         if (ctx->fn) {
    6339           0 :                 ctx->fn(ctx->fn_ctx);
    6340           0 :         }
    6341             : 
    6342           0 :         free(ctx);
    6343           3 : }
    6344             : 
    6345             : int
    6346           0 : bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx)
    6347             : {
    6348             :         struct set_nvme_hotplug_ctx *ctx;
    6349             : 
    6350           0 :         if (enabled == true && !spdk_process_is_primary()) {
    6351           0 :                 return -EPERM;
    6352             :         }
    6353             : 
    6354           0 :         ctx = calloc(1, sizeof(*ctx));
    6355           0 :         if (ctx == NULL) {
    6356           0 :                 return -ENOMEM;
    6357             :         }
    6358             : 
    6359           0 :         period_us = period_us == 0 ? NVME_HOTPLUG_POLL_PERIOD_DEFAULT : period_us;
    6360           0 :         ctx->period_us = spdk_min(period_us, NVME_HOTPLUG_POLL_PERIOD_MAX);
    6361           0 :         ctx->enabled = enabled;
    6362           0 :         ctx->fn = cb;
    6363           0 :         ctx->fn_ctx = cb_ctx;
    6364             : 
    6365           0 :         spdk_thread_send_msg(g_bdev_nvme_init_thread, set_nvme_hotplug_period_cb, ctx);
    6366           0 :         return 0;
    6367           0 : }
    6368             : 
    6369             : static void
    6370          47 : nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
    6371             :                                     struct nvme_async_probe_ctx *ctx)
    6372             : {
    6373             :         struct nvme_ns  *nvme_ns;
    6374             :         struct nvme_bdev        *nvme_bdev;
    6375             :         size_t                  j;
    6376             : 
    6377          47 :         assert(nvme_ctrlr != NULL);
    6378             : 
    6379          47 :         if (ctx->names == NULL) {
    6380           0 :                 ctx->reported_bdevs = 0;
    6381           0 :                 populate_namespaces_cb(ctx, 0);
    6382           0 :                 return;
    6383             :         }
    6384             : 
    6385             :         /*
    6386             :          * Report the new bdevs that were created in this call.
    6387             :          * There can be more than one bdev per NVMe controller.
    6388             :          */
    6389          47 :         j = 0;
    6390             : 
    6391          47 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    6392             : 
    6393          47 :         nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    6394          96 :         while (nvme_ns != NULL) {
    6395          49 :                 nvme_bdev = nvme_ns->bdev;
    6396          49 :                 if (j < ctx->max_bdevs) {
    6397          49 :                         ctx->names[j] = nvme_bdev->disk.name;
    6398          49 :                         j++;
    6399          49 :                 } else {
    6400           0 :                         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    6401             : 
    6402           0 :                         NVME_CTRLR_ERRLOG(nvme_ctrlr,
    6403             :                                           "Maximum number of namespaces supported per NVMe controller is %du. "
    6404             :                                           "Unable to return all names of created bdevs\n",
    6405             :                                           ctx->max_bdevs);
    6406           0 :                         ctx->reported_bdevs = 0;
    6407           0 :                         populate_namespaces_cb(ctx, -ERANGE);
    6408           0 :                         return;
    6409             :                 }
    6410             : 
    6411          49 :                 nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
    6412             :         }
    6413             : 
    6414          47 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    6415             : 
    6416          47 :         ctx->reported_bdevs = j;
    6417          47 :         populate_namespaces_cb(ctx, 0);
    6418          47 : }
    6419             : 
    6420             : static int
    6421           9 : bdev_nvme_check_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
    6422             :                                struct spdk_nvme_ctrlr *new_ctrlr,
    6423             :                                struct spdk_nvme_transport_id *trid)
    6424             : {
    6425             :         struct nvme_path_id *tmp_trid;
    6426             : 
    6427           9 :         if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
    6428           0 :                 NVME_CTRLR_ERRLOG(nvme_ctrlr, "PCIe failover is not supported.\n");
    6429           0 :                 return -ENOTSUP;
    6430             :         }
    6431             : 
    6432             :         /* Currently we only support failover to the same transport type. */
    6433           9 :         if (nvme_ctrlr->active_path_id->trid.trtype != trid->trtype) {
    6434           0 :                 NVME_CTRLR_WARNLOG(nvme_ctrlr,
    6435             :                                    "Failover from trtype: %s to a different trtype: %s is not supported currently\n",
    6436             :                                    spdk_nvme_transport_id_trtype_str(nvme_ctrlr->active_path_id->trid.trtype),
    6437             :                                    spdk_nvme_transport_id_trtype_str(trid->trtype));
    6438           0 :                 return -EINVAL;
    6439             :         }
    6440             : 
    6441             : 
    6442             :         /* Currently we only support failover to the same NQN. */
    6443           9 :         if (strncmp(trid->subnqn, nvme_ctrlr->active_path_id->trid.subnqn, SPDK_NVMF_NQN_MAX_LEN)) {
    6444           0 :                 NVME_CTRLR_WARNLOG(nvme_ctrlr,
    6445             :                                    "Failover from subnqn: %s to a different subnqn: %s is not supported currently\n",
    6446             :                                    nvme_ctrlr->active_path_id->trid.subnqn, trid->subnqn);
    6447           0 :                 return -EINVAL;
    6448             :         }
    6449             : 
    6450             :         /* Skip all the other checks if we've already registered this path. */
    6451          21 :         TAILQ_FOREACH(tmp_trid, &nvme_ctrlr->trids, link) {
    6452          12 :                 if (!spdk_nvme_transport_id_compare(&tmp_trid->trid, trid)) {
    6453           0 :                         NVME_CTRLR_WARNLOG(nvme_ctrlr, "This path (traddr: %s subnqn: %s) is already registered\n",
    6454             :                                            trid->traddr, trid->subnqn);
    6455           0 :                         return -EALREADY;
    6456             :                 }
    6457          12 :         }
    6458             : 
    6459           9 :         return 0;
    6460           9 : }
    6461             : 
    6462             : static int
    6463           9 : bdev_nvme_check_secondary_namespace(struct nvme_ctrlr *nvme_ctrlr,
    6464             :                                     struct spdk_nvme_ctrlr *new_ctrlr)
    6465             : {
    6466             :         struct nvme_ns *nvme_ns;
    6467             :         struct spdk_nvme_ns *new_ns;
    6468             : 
    6469           9 :         nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
    6470           9 :         while (nvme_ns != NULL) {
    6471           0 :                 new_ns = spdk_nvme_ctrlr_get_ns(new_ctrlr, nvme_ns->id);
    6472           0 :                 assert(new_ns != NULL);
    6473             : 
    6474           0 :                 if (!bdev_nvme_compare_ns(nvme_ns->ns, new_ns)) {
    6475           0 :                         return -EINVAL;
    6476             :                 }
    6477             : 
    6478           0 :                 nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
    6479             :         }
    6480             : 
    6481           9 :         return 0;
    6482           9 : }
    6483             : 
    6484             : static int
    6485           9 : _bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
    6486             :                               struct spdk_nvme_transport_id *trid)
    6487             : {
    6488             :         struct nvme_path_id *active_id, *new_trid, *tmp_trid;
    6489             : 
    6490           9 :         new_trid = calloc(1, sizeof(*new_trid));
    6491           9 :         if (new_trid == NULL) {
    6492           0 :                 return -ENOMEM;
    6493             :         }
    6494           9 :         new_trid->trid = *trid;
    6495             : 
    6496           9 :         active_id = nvme_ctrlr->active_path_id;
    6497           9 :         assert(active_id != NULL);
    6498           9 :         assert(active_id == TAILQ_FIRST(&nvme_ctrlr->trids));
    6499             : 
    6500             :         /* Skip the active trid not to replace it until it is failed. */
    6501           9 :         tmp_trid = TAILQ_NEXT(active_id, link);
    6502           9 :         if (tmp_trid == NULL) {
    6503           6 :                 goto add_tail;
    6504             :         }
    6505             : 
    6506             :         /* It means the trid is faled if its last failed time is non-zero.
    6507             :          * Insert the new alternate trid before any failed trid.
    6508             :          */
    6509           5 :         TAILQ_FOREACH_FROM(tmp_trid, &nvme_ctrlr->trids, link) {
    6510           3 :                 if (tmp_trid->last_failed_tsc != 0) {
    6511           1 :                         TAILQ_INSERT_BEFORE(tmp_trid, new_trid, link);
    6512           1 :                         return 0;
    6513             :                 }
    6514           4 :         }
    6515             : 
    6516             : add_tail:
    6517           8 :         TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, new_trid, link);
    6518           8 :         return 0;
    6519           9 : }
    6520             : 
    6521             : /* This is the case that a secondary path is added to an existing
    6522             :  * nvme_ctrlr for failover. After checking if it can access the same
    6523             :  * namespaces as the primary path, it is disconnected until failover occurs.
    6524             :  */
    6525             : static int
    6526           9 : bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
    6527             :                              struct spdk_nvme_ctrlr *new_ctrlr,
    6528             :                              struct spdk_nvme_transport_id *trid)
    6529             : {
    6530             :         int rc;
    6531             : 
    6532           9 :         assert(nvme_ctrlr != NULL);
    6533             : 
    6534           9 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    6535             : 
    6536           9 :         rc = bdev_nvme_check_secondary_trid(nvme_ctrlr, new_ctrlr, trid);
    6537           9 :         if (rc != 0) {
    6538           0 :                 goto exit;
    6539             :         }
    6540             : 
    6541           9 :         rc = bdev_nvme_check_secondary_namespace(nvme_ctrlr, new_ctrlr);
    6542           9 :         if (rc != 0) {
    6543           0 :                 goto exit;
    6544             :         }
    6545             : 
    6546           9 :         rc = _bdev_nvme_add_secondary_trid(nvme_ctrlr, trid);
    6547             : 
    6548             : exit:
    6549           9 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    6550             : 
    6551           9 :         spdk_nvme_detach(new_ctrlr);
    6552             : 
    6553           9 :         return rc;
    6554             : }
    6555             : 
    6556             : static void
    6557          48 : connect_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    6558             :                   struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
    6559             : {
    6560          48 :         struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
    6561             :         struct nvme_async_probe_ctx *ctx;
    6562             :         int rc;
    6563             : 
    6564          48 :         ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, drv_opts);
    6565          48 :         ctx->ctrlr_attached = true;
    6566             : 
    6567          48 :         rc = nvme_ctrlr_create(ctrlr, ctx->base_name, &ctx->trid, ctx);
    6568          48 :         if (rc != 0) {
    6569           1 :                 ctx->reported_bdevs = 0;
    6570           1 :                 populate_namespaces_cb(ctx, rc);
    6571           1 :         }
    6572          48 : }
    6573             : 
    6574             : 
    6575             : static void
    6576           4 : connect_set_failover_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    6577             :                         struct spdk_nvme_ctrlr *ctrlr,
    6578             :                         const struct spdk_nvme_ctrlr_opts *opts)
    6579             : {
    6580           4 :         struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
    6581             :         struct nvme_ctrlr *nvme_ctrlr;
    6582             :         struct nvme_async_probe_ctx *ctx;
    6583             :         int rc;
    6584             : 
    6585           4 :         ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, drv_opts);
    6586           4 :         ctx->ctrlr_attached = true;
    6587             : 
    6588           4 :         nvme_ctrlr = nvme_ctrlr_get_by_name(ctx->base_name);
    6589           4 :         if (nvme_ctrlr) {
    6590           4 :                 rc = bdev_nvme_add_secondary_trid(nvme_ctrlr, ctrlr, &ctx->trid);
    6591           4 :         } else {
    6592           0 :                 rc = -ENODEV;
    6593             :         }
    6594             : 
    6595           4 :         ctx->reported_bdevs = 0;
    6596           4 :         populate_namespaces_cb(ctx, rc);
    6597           4 : }
    6598             : 
    6599             : static int
    6600          53 : bdev_nvme_async_poll(void *arg)
    6601             : {
    6602          53 :         struct nvme_async_probe_ctx     *ctx = arg;
    6603             :         int                             rc;
    6604             : 
    6605          53 :         rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
    6606          53 :         if (spdk_unlikely(rc != -EAGAIN)) {
    6607          53 :                 ctx->probe_done = true;
    6608          53 :                 spdk_poller_unregister(&ctx->poller);
    6609          53 :                 if (!ctx->ctrlr_attached) {
    6610             :                         /* The probe is done, but no controller was attached.
    6611             :                          * That means we had a failure, so report -EIO back to
    6612             :                          * the caller (usually the RPC). populate_namespaces_cb()
    6613             :                          * will take care of freeing the nvme_async_probe_ctx.
    6614             :                          */
    6615           1 :                         ctx->reported_bdevs = 0;
    6616           1 :                         populate_namespaces_cb(ctx, -EIO);
    6617          53 :                 } else if (ctx->namespaces_populated) {
    6618             :                         /* The namespaces for the attached controller were all
    6619             :                          * populated and the response was already sent to the
    6620             :                          * caller (usually the RPC).  So free the context here.
    6621             :                          */
    6622          21 :                         free_nvme_async_probe_ctx(ctx);
    6623          21 :                 }
    6624          53 :         }
    6625             : 
    6626          53 :         return SPDK_POLLER_BUSY;
    6627             : }
    6628             : 
    6629             : static bool
    6630          72 : bdev_nvme_check_io_error_resiliency_params(int32_t ctrlr_loss_timeout_sec,
    6631             :                 uint32_t reconnect_delay_sec,
    6632             :                 uint32_t fast_io_fail_timeout_sec)
    6633             : {
    6634          72 :         if (ctrlr_loss_timeout_sec < -1) {
    6635           1 :                 SPDK_ERRLOG("ctrlr_loss_timeout_sec can't be less than -1.\n");
    6636           1 :                 return false;
    6637          71 :         } else if (ctrlr_loss_timeout_sec == -1) {
    6638          14 :                 if (reconnect_delay_sec == 0) {
    6639           1 :                         SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
    6640           1 :                         return false;
    6641          13 :                 } else if (fast_io_fail_timeout_sec != 0 &&
    6642           3 :                            fast_io_fail_timeout_sec < reconnect_delay_sec) {
    6643           1 :                         SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io-fail_timeout_sec.\n");
    6644           1 :                         return false;
    6645             :                 }
    6646          69 :         } else if (ctrlr_loss_timeout_sec != 0) {
    6647          11 :                 if (reconnect_delay_sec == 0) {
    6648           1 :                         SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
    6649           1 :                         return false;
    6650          10 :                 } else if (reconnect_delay_sec > (uint32_t)ctrlr_loss_timeout_sec) {
    6651           1 :                         SPDK_ERRLOG("reconnect_delay_sec can't be more than ctrlr_loss_timeout_sec.\n");
    6652           1 :                         return false;
    6653           9 :                 } else if (fast_io_fail_timeout_sec != 0) {
    6654           6 :                         if (fast_io_fail_timeout_sec < reconnect_delay_sec) {
    6655           1 :                                 SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io_fail_timeout_sec.\n");
    6656           1 :                                 return false;
    6657           5 :                         } else if (fast_io_fail_timeout_sec > (uint32_t)ctrlr_loss_timeout_sec) {
    6658           1 :                                 SPDK_ERRLOG("fast_io_fail_timeout_sec can't be more than ctrlr_loss_timeout_sec.\n");
    6659           1 :                                 return false;
    6660             :                         }
    6661           4 :                 }
    6662          53 :         } else if (reconnect_delay_sec != 0 || fast_io_fail_timeout_sec != 0) {
    6663           2 :                 SPDK_ERRLOG("Both reconnect_delay_sec and fast_io_fail_timeout_sec must be 0 if ctrlr_loss_timeout_sec is 0.\n");
    6664           2 :                 return false;
    6665             :         }
    6666             : 
    6667          63 :         return true;
    6668          72 : }
    6669             : 
    6670             : int
    6671          53 : spdk_bdev_nvme_create(struct spdk_nvme_transport_id *trid,
    6672             :                       const char *base_name,
    6673             :                       const char **names,
    6674             :                       uint32_t count,
    6675             :                       spdk_bdev_nvme_create_cb cb_fn,
    6676             :                       void *cb_ctx,
    6677             :                       struct spdk_nvme_ctrlr_opts *drv_opts,
    6678             :                       struct spdk_bdev_nvme_ctrlr_opts *bdev_opts)
    6679             : {
    6680             :         struct nvme_probe_skip_entry *entry, *tmp;
    6681             :         struct nvme_async_probe_ctx *ctx;
    6682             :         spdk_nvme_attach_cb attach_cb;
    6683             :         struct nvme_ctrlr *nvme_ctrlr;
    6684             :         int len;
    6685             : 
    6686             :         /* TODO expand this check to include both the host and target TRIDs.
    6687             :          * Only if both are the same should we fail.
    6688             :          */
    6689          53 :         if (nvme_ctrlr_get(trid, drv_opts->hostnqn) != NULL) {
    6690           0 :                 SPDK_ERRLOG("A controller with the provided trid (traddr: %s, hostnqn: %s) "
    6691             :                             "already exists.\n", trid->traddr, drv_opts->hostnqn);
    6692           0 :                 return -EEXIST;
    6693             :         }
    6694             : 
    6695          53 :         len = strnlen(base_name, SPDK_CONTROLLER_NAME_MAX);
    6696             : 
    6697          53 :         if (len == 0 || len == SPDK_CONTROLLER_NAME_MAX) {
    6698           0 :                 SPDK_ERRLOG("controller name must be between 1 and %d characters\n", SPDK_CONTROLLER_NAME_MAX - 1);
    6699           0 :                 return -EINVAL;
    6700             :         }
    6701             : 
    6702          53 :         if (bdev_opts != NULL &&
    6703         106 :             !bdev_nvme_check_io_error_resiliency_params(bdev_opts->ctrlr_loss_timeout_sec,
    6704          53 :                             bdev_opts->reconnect_delay_sec,
    6705          53 :                             bdev_opts->fast_io_fail_timeout_sec)) {
    6706           0 :                 return -EINVAL;
    6707             :         }
    6708             : 
    6709          53 :         ctx = calloc(1, sizeof(*ctx));
    6710          53 :         if (!ctx) {
    6711           0 :                 return -ENOMEM;
    6712             :         }
    6713          53 :         ctx->base_name = strdup(base_name);
    6714          53 :         if (!ctx->base_name) {
    6715           0 :                 free(ctx);
    6716           0 :                 return -ENOMEM;
    6717             :         }
    6718          53 :         ctx->names = names;
    6719          53 :         ctx->max_bdevs = count;
    6720          53 :         ctx->cb_fn = cb_fn;
    6721          53 :         ctx->cb_ctx = cb_ctx;
    6722          53 :         ctx->trid = *trid;
    6723             : 
    6724          53 :         if (bdev_opts) {
    6725          53 :                 memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
    6726          53 :         } else {
    6727           0 :                 spdk_bdev_nvme_get_default_ctrlr_opts(&ctx->bdev_opts);
    6728             :         }
    6729             : 
    6730          53 :         if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
    6731           0 :                 TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, tmp) {
    6732           0 :                         if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
    6733           0 :                                 TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
    6734           0 :                                 free(entry);
    6735           0 :                                 break;
    6736             :                         }
    6737           0 :                 }
    6738           0 :         }
    6739             : 
    6740          53 :         memcpy(&ctx->drv_opts, drv_opts, sizeof(*drv_opts));
    6741          53 :         ctx->drv_opts.transport_retry_count = g_opts.transport_retry_count;
    6742          53 :         ctx->drv_opts.transport_ack_timeout = g_opts.transport_ack_timeout;
    6743          53 :         ctx->drv_opts.keep_alive_timeout_ms = g_opts.keep_alive_timeout_ms;
    6744          53 :         ctx->drv_opts.disable_read_ana_log_page = true;
    6745          53 :         ctx->drv_opts.transport_tos = g_opts.transport_tos;
    6746             : 
    6747          53 :         if (spdk_interrupt_mode_is_enabled()) {
    6748           0 :                 if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
    6749           0 :                         ctx->drv_opts.enable_interrupts = true;
    6750           0 :                 } else {
    6751           0 :                         SPDK_ERRLOG("Interrupt mode is only supported with PCIe transport\n");
    6752           0 :                         free_nvme_async_probe_ctx(ctx);
    6753           0 :                         return -ENOTSUP;
    6754             :                 }
    6755           0 :         }
    6756             : 
    6757          53 :         if (ctx->bdev_opts.psk != NULL) {
    6758           0 :                 ctx->drv_opts.tls_psk = spdk_keyring_get_key(ctx->bdev_opts.psk);
    6759           0 :                 if (ctx->drv_opts.tls_psk == NULL) {
    6760           0 :                         SPDK_ERRLOG("Could not load PSK: %s\n", ctx->bdev_opts.psk);
    6761           0 :                         free_nvme_async_probe_ctx(ctx);
    6762           0 :                         return -ENOKEY;
    6763             :                 }
    6764           0 :         }
    6765             : 
    6766          53 :         if (ctx->bdev_opts.dhchap_key != NULL) {
    6767           0 :                 ctx->drv_opts.dhchap_key = spdk_keyring_get_key(ctx->bdev_opts.dhchap_key);
    6768           0 :                 if (ctx->drv_opts.dhchap_key == NULL) {
    6769           0 :                         SPDK_ERRLOG("Could not load DH-HMAC-CHAP key: %s\n",
    6770             :                                     ctx->bdev_opts.dhchap_key);
    6771           0 :                         free_nvme_async_probe_ctx(ctx);
    6772           0 :                         return -ENOKEY;
    6773             :                 }
    6774             : 
    6775           0 :                 ctx->drv_opts.dhchap_digests = g_opts.dhchap_digests;
    6776           0 :                 ctx->drv_opts.dhchap_dhgroups = g_opts.dhchap_dhgroups;
    6777           0 :         }
    6778          53 :         if (ctx->bdev_opts.dhchap_ctrlr_key != NULL) {
    6779           0 :                 ctx->drv_opts.dhchap_ctrlr_key =
    6780           0 :                         spdk_keyring_get_key(ctx->bdev_opts.dhchap_ctrlr_key);
    6781           0 :                 if (ctx->drv_opts.dhchap_ctrlr_key == NULL) {
    6782           0 :                         SPDK_ERRLOG("Could not load DH-HMAC-CHAP controller key: %s\n",
    6783             :                                     ctx->bdev_opts.dhchap_ctrlr_key);
    6784           0 :                         free_nvme_async_probe_ctx(ctx);
    6785           0 :                         return -ENOKEY;
    6786             :                 }
    6787           0 :         }
    6788             : 
    6789          53 :         if (nvme_bdev_ctrlr_get_by_name(base_name) == NULL || ctx->bdev_opts.multipath) {
    6790          49 :                 attach_cb = connect_attach_cb;
    6791          49 :         } else {
    6792           4 :                 attach_cb = connect_set_failover_cb;
    6793             :         }
    6794             : 
    6795          53 :         nvme_ctrlr = nvme_ctrlr_get_by_name(ctx->base_name);
    6796          53 :         if (nvme_ctrlr  && nvme_ctrlr->opts.multipath != ctx->bdev_opts.multipath) {
    6797             :                 /* All controllers with the same name must be configured the same
    6798             :                  * way, either for multipath or failover. If the configuration doesn't
    6799             :                  * match - report error.
    6800             :                  */
    6801           0 :                 free_nvme_async_probe_ctx(ctx);
    6802           0 :                 return -EINVAL;
    6803             :         }
    6804             : 
    6805          53 :         ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->drv_opts, attach_cb);
    6806          53 :         if (ctx->probe_ctx == NULL) {
    6807           0 :                 SPDK_ERRLOG("No controller was found with provided trid (traddr: %s)\n", trid->traddr);
    6808           0 :                 free_nvme_async_probe_ctx(ctx);
    6809           0 :                 return -ENODEV;
    6810             :         }
    6811          53 :         ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_async_poll, ctx, 1000);
    6812             : 
    6813          53 :         return 0;
    6814          53 : }
    6815             : 
    6816             : struct bdev_nvme_delete_ctx {
    6817             :         char                        *name;
    6818             :         struct nvme_path_id         path_id;
    6819             :         bdev_nvme_delete_done_fn    delete_done;
    6820             :         void                        *delete_done_ctx;
    6821             :         uint64_t                    timeout_ticks;
    6822             :         struct spdk_poller          *poller;
    6823             : };
    6824             : 
    6825             : static void
    6826           2 : free_bdev_nvme_delete_ctx(struct bdev_nvme_delete_ctx *ctx)
    6827             : {
    6828           2 :         if (ctx != NULL) {
    6829           1 :                 free(ctx->name);
    6830           1 :                 free(ctx);
    6831           1 :         }
    6832           2 : }
    6833             : 
    6834             : static bool
    6835          76 : nvme_path_id_compare(struct nvme_path_id *p, const struct nvme_path_id *path_id)
    6836             : {
    6837          76 :         if (path_id->trid.trtype != 0) {
    6838          21 :                 if (path_id->trid.trtype == SPDK_NVME_TRANSPORT_CUSTOM) {
    6839           0 :                         if (strcasecmp(path_id->trid.trstring, p->trid.trstring) != 0) {
    6840           0 :                                 return false;
    6841             :                         }
    6842           0 :                 } else {
    6843          21 :                         if (path_id->trid.trtype != p->trid.trtype) {
    6844           0 :                                 return false;
    6845             :                         }
    6846             :                 }
    6847          21 :         }
    6848             : 
    6849          76 :         if (!spdk_mem_all_zero(path_id->trid.traddr, sizeof(path_id->trid.traddr))) {
    6850          21 :                 if (strcasecmp(path_id->trid.traddr, p->trid.traddr) != 0) {
    6851          11 :                         return false;
    6852             :                 }
    6853          10 :         }
    6854             : 
    6855          65 :         if (path_id->trid.adrfam != 0) {
    6856           0 :                 if (path_id->trid.adrfam != p->trid.adrfam) {
    6857           0 :                         return false;
    6858             :                 }
    6859           0 :         }
    6860             : 
    6861          65 :         if (!spdk_mem_all_zero(path_id->trid.trsvcid, sizeof(path_id->trid.trsvcid))) {
    6862          10 :                 if (strcasecmp(path_id->trid.trsvcid, p->trid.trsvcid) != 0) {
    6863           0 :                         return false;
    6864             :                 }
    6865          10 :         }
    6866             : 
    6867          65 :         if (!spdk_mem_all_zero(path_id->trid.subnqn, sizeof(path_id->trid.subnqn))) {
    6868          10 :                 if (strcmp(path_id->trid.subnqn, p->trid.subnqn) != 0) {
    6869           0 :                         return false;
    6870             :                 }
    6871          10 :         }
    6872             : 
    6873          65 :         if (!spdk_mem_all_zero(path_id->hostid.hostaddr, sizeof(path_id->hostid.hostaddr))) {
    6874           0 :                 if (strcmp(path_id->hostid.hostaddr, p->hostid.hostaddr) != 0) {
    6875           0 :                         return false;
    6876             :                 }
    6877           0 :         }
    6878             : 
    6879          65 :         if (!spdk_mem_all_zero(path_id->hostid.hostsvcid, sizeof(path_id->hostid.hostsvcid))) {
    6880           0 :                 if (strcmp(path_id->hostid.hostsvcid, p->hostid.hostsvcid) != 0) {
    6881           0 :                         return false;
    6882             :                 }
    6883           0 :         }
    6884             : 
    6885          65 :         return true;
    6886          76 : }
    6887             : 
    6888             : static bool
    6889           2 : nvme_path_id_exists(const char *name, const struct nvme_path_id *path_id)
    6890             : {
    6891             :         struct nvme_bdev_ctrlr  *nbdev_ctrlr;
    6892             :         struct nvme_ctrlr       *ctrlr;
    6893             :         struct nvme_path_id     *p;
    6894             : 
    6895           2 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    6896           2 :         nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
    6897           2 :         if (!nbdev_ctrlr) {
    6898           1 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6899           1 :                 return false;
    6900             :         }
    6901             : 
    6902           1 :         TAILQ_FOREACH(ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
    6903           1 :                 pthread_mutex_lock(&ctrlr->mutex);
    6904           1 :                 TAILQ_FOREACH(p, &ctrlr->trids, link) {
    6905           1 :                         if (nvme_path_id_compare(p, path_id)) {
    6906           1 :                                 pthread_mutex_unlock(&ctrlr->mutex);
    6907           1 :                                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6908           1 :                                 return true;
    6909             :                         }
    6910           0 :                 }
    6911           0 :                 pthread_mutex_unlock(&ctrlr->mutex);
    6912           0 :         }
    6913           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    6914             : 
    6915           0 :         return false;
    6916           2 : }
    6917             : 
    6918             : static int
    6919           2 : bdev_nvme_delete_complete_poll(void *arg)
    6920             : {
    6921           2 :         struct bdev_nvme_delete_ctx     *ctx = arg;
    6922           2 :         int                             rc = 0;
    6923             : 
    6924           2 :         if (nvme_path_id_exists(ctx->name, &ctx->path_id)) {
    6925           1 :                 if (ctx->timeout_ticks > spdk_get_ticks()) {
    6926           1 :                         return SPDK_POLLER_BUSY;
    6927             :                 }
    6928             : 
    6929           0 :                 SPDK_ERRLOG("NVMe path '%s' still exists after delete\n", ctx->name);
    6930           0 :                 rc = -ETIMEDOUT;
    6931           0 :         }
    6932             : 
    6933           1 :         spdk_poller_unregister(&ctx->poller);
    6934             : 
    6935           1 :         ctx->delete_done(ctx->delete_done_ctx, rc);
    6936           1 :         free_bdev_nvme_delete_ctx(ctx);
    6937             : 
    6938           1 :         return SPDK_POLLER_BUSY;
    6939           2 : }
    6940             : 
    6941             : static int
    6942          65 : _bdev_nvme_delete(struct nvme_ctrlr *nvme_ctrlr, const struct nvme_path_id *path_id)
    6943             : {
    6944             :         struct nvme_path_id     *p, *t;
    6945             :         spdk_msg_fn             msg_fn;
    6946          65 :         int                     rc = -ENXIO;
    6947             : 
    6948          65 :         pthread_mutex_lock(&nvme_ctrlr->mutex);
    6949             : 
    6950          75 :         TAILQ_FOREACH_REVERSE_SAFE(p, &nvme_ctrlr->trids, nvme_paths, link, t) {
    6951          75 :                 if (p == TAILQ_FIRST(&nvme_ctrlr->trids)) {
    6952          65 :                         break;
    6953             :                 }
    6954             : 
    6955          10 :                 if (!nvme_path_id_compare(p, path_id)) {
    6956           3 :                         continue;
    6957             :                 }
    6958             : 
    6959             :                 /* We are not using the specified path. */
    6960           7 :                 TAILQ_REMOVE(&nvme_ctrlr->trids, p, link);
    6961           7 :                 free(p);
    6962           7 :                 rc = 0;
    6963           7 :         }
    6964             : 
    6965          65 :         if (p == NULL || !nvme_path_id_compare(p, path_id)) {
    6966           8 :                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    6967           8 :                 return rc;
    6968             :         }
    6969             : 
    6970             :         /* If we made it here, then this path is a match! Now we need to remove it. */
    6971             : 
    6972             :         /* This is the active path in use right now. The active path is always the first in the list. */
    6973          57 :         assert(p == nvme_ctrlr->active_path_id);
    6974             : 
    6975          57 :         if (!TAILQ_NEXT(p, link)) {
    6976             :                 /* The current path is the only path. */
    6977          56 :                 msg_fn = _nvme_ctrlr_destruct;
    6978          56 :                 rc = bdev_nvme_delete_ctrlr_unsafe(nvme_ctrlr, false);
    6979          56 :         } else {
    6980             :                 /* There is an alternative path. */
    6981           1 :                 msg_fn = _bdev_nvme_reset_ctrlr;
    6982           1 :                 rc = bdev_nvme_failover_ctrlr_unsafe(nvme_ctrlr, true);
    6983             :         }
    6984             : 
    6985          57 :         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    6986             : 
    6987          57 :         if (rc == 0) {
    6988          57 :                 spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
    6989          57 :         } else if (rc == -EALREADY) {
    6990           0 :                 rc = 0;
    6991           0 :         }
    6992             : 
    6993          57 :         return rc;
    6994          65 : }
    6995             : 
    6996             : int
    6997          50 : bdev_nvme_delete(const char *name, const struct nvme_path_id *path_id,
    6998             :                  bdev_nvme_delete_done_fn delete_done, void *delete_done_ctx)
    6999             : {
    7000             :         struct nvme_bdev_ctrlr          *nbdev_ctrlr;
    7001             :         struct nvme_ctrlr               *nvme_ctrlr, *tmp_nvme_ctrlr;
    7002          50 :         struct bdev_nvme_delete_ctx     *ctx = NULL;
    7003          50 :         int                             rc = -ENXIO, _rc;
    7004             : 
    7005          50 :         if (name == NULL || path_id == NULL) {
    7006           0 :                 rc = -EINVAL;
    7007           0 :                 goto exit;
    7008             :         }
    7009             : 
    7010          50 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    7011             : 
    7012          50 :         nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
    7013          50 :         if (nbdev_ctrlr == NULL) {
    7014           0 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    7015             : 
    7016           0 :                 SPDK_ERRLOG("Failed to find NVMe bdev controller\n");
    7017           0 :                 rc = -ENODEV;
    7018           0 :                 goto exit;
    7019             :         }
    7020             : 
    7021         115 :         TAILQ_FOREACH_SAFE(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq, tmp_nvme_ctrlr) {
    7022          65 :                 _rc = _bdev_nvme_delete(nvme_ctrlr, path_id);
    7023          65 :                 if (_rc < 0 && _rc != -ENXIO) {
    7024           0 :                         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    7025           0 :                         rc = _rc;
    7026           0 :                         goto exit;
    7027          65 :                 } else if (_rc == 0) {
    7028             :                         /* We traverse all remaining nvme_ctrlrs even if one nvme_ctrlr
    7029             :                          * was deleted successfully. To remember the successful deletion,
    7030             :                          * overwrite rc only if _rc is zero.
    7031             :                          */
    7032          59 :                         rc = 0;
    7033          59 :                 }
    7034          65 :         }
    7035             : 
    7036          50 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    7037             : 
    7038          50 :         if (rc != 0 || delete_done == NULL) {
    7039          49 :                 goto exit;
    7040             :         }
    7041             : 
    7042           1 :         ctx = calloc(1, sizeof(*ctx));
    7043           1 :         if (ctx == NULL) {
    7044           0 :                 SPDK_ERRLOG("Failed to allocate context for bdev_nvme_delete\n");
    7045           0 :                 rc = -ENOMEM;
    7046           0 :                 goto exit;
    7047             :         }
    7048             : 
    7049           1 :         ctx->name = strdup(name);
    7050           1 :         if (ctx->name == NULL) {
    7051           0 :                 SPDK_ERRLOG("Failed to copy controller name for deletion\n");
    7052           0 :                 rc = -ENOMEM;
    7053           0 :                 goto exit;
    7054             :         }
    7055             : 
    7056           1 :         ctx->delete_done = delete_done;
    7057           1 :         ctx->delete_done_ctx = delete_done_ctx;
    7058           1 :         ctx->path_id = *path_id;
    7059           1 :         ctx->timeout_ticks = spdk_get_ticks() + 10 * spdk_get_ticks_hz();
    7060           1 :         ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_delete_complete_poll, ctx, 1000);
    7061           1 :         if (ctx->poller == NULL) {
    7062           0 :                 SPDK_ERRLOG("Failed to register bdev_nvme_delete poller\n");
    7063           0 :                 rc = -ENOMEM;
    7064           0 :                 goto exit;
    7065             :         }
    7066             : 
    7067             : exit:
    7068          50 :         if (rc != 0) {
    7069           1 :                 free_bdev_nvme_delete_ctx(ctx);
    7070           1 :         }
    7071             : 
    7072          50 :         return rc;
    7073             : }
    7074             : 
    7075             : #define DISCOVERY_INFOLOG(ctx, format, ...) \
    7076             :         SPDK_INFOLOG(bdev_nvme, "Discovery[%s:%s] " format, ctx->trid.traddr, ctx->trid.trsvcid, ##__VA_ARGS__);
    7077             : 
    7078             : #define DISCOVERY_ERRLOG(ctx, format, ...) \
    7079             :         SPDK_ERRLOG("Discovery[%s:%s] " format, ctx->trid.traddr, ctx->trid.trsvcid, ##__VA_ARGS__);
    7080             : 
    7081             : struct discovery_entry_ctx {
    7082             :         char                                            name[128];
    7083             :         struct spdk_nvme_transport_id                   trid;
    7084             :         struct spdk_nvme_ctrlr_opts                     drv_opts;
    7085             :         struct spdk_nvmf_discovery_log_page_entry       entry;
    7086             :         TAILQ_ENTRY(discovery_entry_ctx)                tailq;
    7087             :         struct discovery_ctx                            *ctx;
    7088             : };
    7089             : 
    7090             : struct discovery_ctx {
    7091             :         char                                    *name;
    7092             :         spdk_bdev_nvme_start_discovery_fn       start_cb_fn;
    7093             :         spdk_bdev_nvme_stop_discovery_fn        stop_cb_fn;
    7094             :         void                                    *cb_ctx;
    7095             :         struct spdk_nvme_probe_ctx              *probe_ctx;
    7096             :         struct spdk_nvme_detach_ctx             *detach_ctx;
    7097             :         struct spdk_nvme_ctrlr                  *ctrlr;
    7098             :         struct spdk_nvme_transport_id           trid;
    7099             :         struct discovery_entry_ctx              *entry_ctx_in_use;
    7100             :         struct spdk_poller                      *poller;
    7101             :         struct spdk_nvme_ctrlr_opts             drv_opts;
    7102             :         struct spdk_bdev_nvme_ctrlr_opts        bdev_opts;
    7103             :         struct spdk_nvmf_discovery_log_page     *log_page;
    7104             :         TAILQ_ENTRY(discovery_ctx)              tailq;
    7105             :         TAILQ_HEAD(, discovery_entry_ctx)       nvm_entry_ctxs;
    7106             :         TAILQ_HEAD(, discovery_entry_ctx)       discovery_entry_ctxs;
    7107             :         int                                     rc;
    7108             :         bool                                    wait_for_attach;
    7109             :         uint64_t                                timeout_ticks;
    7110             :         /* Denotes that the discovery service is being started. We're waiting
    7111             :          * for the initial connection to the discovery controller to be
    7112             :          * established and attach discovered NVM ctrlrs.
    7113             :          */
    7114             :         bool                                    initializing;
    7115             :         /* Denotes if a discovery is currently in progress for this context.
    7116             :          * That includes connecting to newly discovered subsystems.  Used to
    7117             :          * ensure we do not start a new discovery until an existing one is
    7118             :          * complete.
    7119             :          */
    7120             :         bool                                    in_progress;
    7121             : 
    7122             :         /* Denotes if another discovery is needed after the one in progress
    7123             :          * completes.  Set when we receive an AER completion while a discovery
    7124             :          * is already in progress.
    7125             :          */
    7126             :         bool                                    pending;
    7127             : 
    7128             :         /* Signal to the discovery context poller that it should stop the
    7129             :          * discovery service, including detaching from the current discovery
    7130             :          * controller.
    7131             :          */
    7132             :         bool                                    stop;
    7133             : 
    7134             :         struct spdk_thread                      *calling_thread;
    7135             :         uint32_t                                index;
    7136             :         uint32_t                                attach_in_progress;
    7137             :         char                                    *hostnqn;
    7138             : 
    7139             :         /* Denotes if the discovery service was started by the mdns discovery.
    7140             :          */
    7141             :         bool                                    from_mdns_discovery_service;
    7142             : };
    7143             : 
    7144             : TAILQ_HEAD(discovery_ctxs, discovery_ctx);
    7145             : static struct discovery_ctxs g_discovery_ctxs = TAILQ_HEAD_INITIALIZER(g_discovery_ctxs);
    7146             : 
    7147             : static void get_discovery_log_page(struct discovery_ctx *ctx);
    7148             : 
    7149             : static void
    7150           0 : free_discovery_ctx(struct discovery_ctx *ctx)
    7151             : {
    7152           0 :         free(ctx->log_page);
    7153           0 :         free(ctx->hostnqn);
    7154           0 :         free(ctx->name);
    7155           0 :         free(ctx);
    7156           0 : }
    7157             : 
    7158             : static void
    7159           0 : discovery_complete(struct discovery_ctx *ctx)
    7160             : {
    7161           0 :         ctx->initializing = false;
    7162           0 :         ctx->in_progress = false;
    7163           0 :         if (ctx->pending) {
    7164           0 :                 ctx->pending = false;
    7165           0 :                 get_discovery_log_page(ctx);
    7166           0 :         }
    7167           0 : }
    7168             : 
    7169             : static void
    7170           0 : build_trid_from_log_page_entry(struct spdk_nvme_transport_id *trid,
    7171             :                                struct spdk_nvmf_discovery_log_page_entry *entry)
    7172             : {
    7173             :         char *space;
    7174             : 
    7175           0 :         trid->trtype = entry->trtype;
    7176           0 :         trid->adrfam = entry->adrfam;
    7177           0 :         memcpy(trid->traddr, entry->traddr, sizeof(entry->traddr));
    7178           0 :         memcpy(trid->trsvcid, entry->trsvcid, sizeof(entry->trsvcid));
    7179             :         /* Because the source buffer (entry->subnqn) is longer than trid->subnqn, and
    7180             :          * before call to this function trid->subnqn is zeroed out, we need
    7181             :          * to copy sizeof(trid->subnqn) minus one byte to make sure the last character
    7182             :          * remains 0. Then we can shorten the string (replace ' ' with 0) if required
    7183             :          */
    7184           0 :         memcpy(trid->subnqn, entry->subnqn, sizeof(trid->subnqn) - 1);
    7185             : 
    7186             :         /* We want the traddr, trsvcid and subnqn fields to be NULL-terminated.
    7187             :          * But the log page entries typically pad them with spaces, not zeroes.
    7188             :          * So add a NULL terminator to each of these fields at the appropriate
    7189             :          * location.
    7190             :          */
    7191           0 :         space = strchr(trid->traddr, ' ');
    7192           0 :         if (space) {
    7193           0 :                 *space = 0;
    7194           0 :         }
    7195           0 :         space = strchr(trid->trsvcid, ' ');
    7196           0 :         if (space) {
    7197           0 :                 *space = 0;
    7198           0 :         }
    7199           0 :         space = strchr(trid->subnqn, ' ');
    7200           0 :         if (space) {
    7201           0 :                 *space = 0;
    7202           0 :         }
    7203           0 : }
    7204             : 
    7205             : static void
    7206           0 : _stop_discovery(void *_ctx)
    7207             : {
    7208           0 :         struct discovery_ctx *ctx = _ctx;
    7209             : 
    7210           0 :         if (ctx->attach_in_progress > 0) {
    7211           0 :                 spdk_thread_send_msg(spdk_get_thread(), _stop_discovery, ctx);
    7212           0 :                 return;
    7213             :         }
    7214             : 
    7215           0 :         ctx->stop = true;
    7216             : 
    7217           0 :         while (!TAILQ_EMPTY(&ctx->nvm_entry_ctxs)) {
    7218             :                 struct discovery_entry_ctx *entry_ctx;
    7219           0 :                 struct nvme_path_id path = {};
    7220             : 
    7221           0 :                 entry_ctx = TAILQ_FIRST(&ctx->nvm_entry_ctxs);
    7222           0 :                 path.trid = entry_ctx->trid;
    7223           0 :                 bdev_nvme_delete(entry_ctx->name, &path, NULL, NULL);
    7224           0 :                 TAILQ_REMOVE(&ctx->nvm_entry_ctxs, entry_ctx, tailq);
    7225           0 :                 free(entry_ctx);
    7226             :         }
    7227             : 
    7228           0 :         while (!TAILQ_EMPTY(&ctx->discovery_entry_ctxs)) {
    7229             :                 struct discovery_entry_ctx *entry_ctx;
    7230             : 
    7231           0 :                 entry_ctx = TAILQ_FIRST(&ctx->discovery_entry_ctxs);
    7232           0 :                 TAILQ_REMOVE(&ctx->discovery_entry_ctxs, entry_ctx, tailq);
    7233           0 :                 free(entry_ctx);
    7234             :         }
    7235             : 
    7236           0 :         free(ctx->entry_ctx_in_use);
    7237           0 :         ctx->entry_ctx_in_use = NULL;
    7238           0 : }
    7239             : 
    7240             : static void
    7241           0 : stop_discovery(struct discovery_ctx *ctx, spdk_bdev_nvme_stop_discovery_fn cb_fn, void *cb_ctx)
    7242             : {
    7243           0 :         ctx->stop_cb_fn = cb_fn;
    7244           0 :         ctx->cb_ctx = cb_ctx;
    7245             : 
    7246           0 :         if (ctx->attach_in_progress > 0) {
    7247           0 :                 DISCOVERY_INFOLOG(ctx, "stopping discovery with attach_in_progress: %"PRIu32"\n",
    7248             :                                   ctx->attach_in_progress);
    7249           0 :         }
    7250             : 
    7251           0 :         _stop_discovery(ctx);
    7252           0 : }
    7253             : 
    7254             : static void
    7255           2 : remove_discovery_entry(struct nvme_ctrlr *nvme_ctrlr)
    7256             : {
    7257             :         struct discovery_ctx *d_ctx;
    7258             :         struct nvme_path_id *path_id;
    7259           2 :         struct spdk_nvme_transport_id trid = {};
    7260             :         struct discovery_entry_ctx *entry_ctx, *tmp;
    7261             : 
    7262           2 :         path_id = TAILQ_FIRST(&nvme_ctrlr->trids);
    7263             : 
    7264           2 :         TAILQ_FOREACH(d_ctx, &g_discovery_ctxs, tailq) {
    7265           0 :                 TAILQ_FOREACH_SAFE(entry_ctx, &d_ctx->nvm_entry_ctxs, tailq, tmp) {
    7266           0 :                         build_trid_from_log_page_entry(&trid, &entry_ctx->entry);
    7267           0 :                         if (spdk_nvme_transport_id_compare(&trid, &path_id->trid) != 0) {
    7268           0 :                                 continue;
    7269             :                         }
    7270             : 
    7271           0 :                         TAILQ_REMOVE(&d_ctx->nvm_entry_ctxs, entry_ctx, tailq);
    7272           0 :                         free(entry_ctx);
    7273           0 :                         DISCOVERY_INFOLOG(d_ctx, "Remove discovery entry: %s:%s:%s\n",
    7274             :                                           trid.subnqn, trid.traddr, trid.trsvcid);
    7275             : 
    7276             :                         /* Fail discovery ctrlr to force reattach attempt */
    7277           0 :                         spdk_nvme_ctrlr_fail(d_ctx->ctrlr);
    7278           0 :                 }
    7279           0 :         }
    7280           2 : }
    7281             : 
    7282             : static void
    7283           0 : discovery_remove_controllers(struct discovery_ctx *ctx)
    7284             : {
    7285           0 :         struct spdk_nvmf_discovery_log_page *log_page = ctx->log_page;
    7286             :         struct discovery_entry_ctx *entry_ctx, *tmp;
    7287             :         struct spdk_nvmf_discovery_log_page_entry *new_entry, *old_entry;
    7288           0 :         struct spdk_nvme_transport_id old_trid = {};
    7289             :         uint64_t numrec, i;
    7290             :         bool found;
    7291             : 
    7292           0 :         numrec = from_le64(&log_page->numrec);
    7293           0 :         TAILQ_FOREACH_SAFE(entry_ctx, &ctx->nvm_entry_ctxs, tailq, tmp) {
    7294           0 :                 found = false;
    7295           0 :                 old_entry = &entry_ctx->entry;
    7296           0 :                 build_trid_from_log_page_entry(&old_trid, old_entry);
    7297           0 :                 for (i = 0; i < numrec; i++) {
    7298           0 :                         new_entry = &log_page->entries[i];
    7299           0 :                         if (!memcmp(old_entry, new_entry, sizeof(*old_entry))) {
    7300           0 :                                 DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s found again\n",
    7301             :                                                   old_trid.subnqn, old_trid.traddr, old_trid.trsvcid);
    7302           0 :                                 found = true;
    7303           0 :                                 break;
    7304             :                         }
    7305           0 :                 }
    7306           0 :                 if (!found) {
    7307           0 :                         struct nvme_path_id path = {};
    7308             : 
    7309           0 :                         DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s not found\n",
    7310             :                                           old_trid.subnqn, old_trid.traddr, old_trid.trsvcid);
    7311             : 
    7312           0 :                         path.trid = entry_ctx->trid;
    7313           0 :                         bdev_nvme_delete(entry_ctx->name, &path, NULL, NULL);
    7314           0 :                         TAILQ_REMOVE(&ctx->nvm_entry_ctxs, entry_ctx, tailq);
    7315           0 :                         free(entry_ctx);
    7316           0 :                 }
    7317           0 :         }
    7318           0 :         free(log_page);
    7319           0 :         ctx->log_page = NULL;
    7320           0 :         discovery_complete(ctx);
    7321           0 : }
    7322             : 
    7323             : static void
    7324           0 : complete_discovery_start(struct discovery_ctx *ctx, int status)
    7325             : {
    7326           0 :         ctx->timeout_ticks = 0;
    7327           0 :         ctx->rc = status;
    7328           0 :         if (ctx->start_cb_fn) {
    7329           0 :                 ctx->start_cb_fn(ctx->cb_ctx, status);
    7330           0 :                 ctx->start_cb_fn = NULL;
    7331           0 :                 ctx->cb_ctx = NULL;
    7332           0 :         }
    7333           0 : }
    7334             : 
    7335             : static void
    7336           0 : discovery_attach_controller_done(void *cb_ctx, size_t bdev_count, int rc)
    7337             : {
    7338           0 :         struct discovery_entry_ctx *entry_ctx = cb_ctx;
    7339           0 :         struct discovery_ctx *ctx = entry_ctx->ctx;
    7340             : 
    7341           0 :         DISCOVERY_INFOLOG(ctx, "attach %s done\n", entry_ctx->name);
    7342           0 :         ctx->attach_in_progress--;
    7343           0 :         if (ctx->attach_in_progress == 0) {
    7344           0 :                 complete_discovery_start(ctx, ctx->rc);
    7345           0 :                 if (ctx->initializing && ctx->rc != 0) {
    7346           0 :                         DISCOVERY_ERRLOG(ctx, "stopping discovery due to errors: %d\n", ctx->rc);
    7347           0 :                         stop_discovery(ctx, NULL, ctx->cb_ctx);
    7348           0 :                 } else {
    7349           0 :                         discovery_remove_controllers(ctx);
    7350             :                 }
    7351           0 :         }
    7352           0 : }
    7353             : 
    7354             : static struct discovery_entry_ctx *
    7355           0 : create_discovery_entry_ctx(struct discovery_ctx *ctx, struct spdk_nvme_transport_id *trid)
    7356             : {
    7357             :         struct discovery_entry_ctx *new_ctx;
    7358             : 
    7359           0 :         new_ctx = calloc(1, sizeof(*new_ctx));
    7360           0 :         if (new_ctx == NULL) {
    7361           0 :                 DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
    7362           0 :                 return NULL;
    7363             :         }
    7364             : 
    7365           0 :         new_ctx->ctx = ctx;
    7366           0 :         memcpy(&new_ctx->trid, trid, sizeof(*trid));
    7367           0 :         spdk_nvme_ctrlr_get_default_ctrlr_opts(&new_ctx->drv_opts, sizeof(new_ctx->drv_opts));
    7368           0 :         snprintf(new_ctx->drv_opts.hostnqn, sizeof(new_ctx->drv_opts.hostnqn), "%s", ctx->hostnqn);
    7369           0 :         return new_ctx;
    7370           0 : }
    7371             : 
    7372             : static void
    7373           0 : discovery_log_page_cb(void *cb_arg, int rc, const struct spdk_nvme_cpl *cpl,
    7374             :                       struct spdk_nvmf_discovery_log_page *log_page)
    7375             : {
    7376           0 :         struct discovery_ctx *ctx = cb_arg;
    7377             :         struct discovery_entry_ctx *entry_ctx, *tmp;
    7378             :         struct spdk_nvmf_discovery_log_page_entry *new_entry, *old_entry;
    7379             :         uint64_t numrec, i;
    7380             :         bool found;
    7381             : 
    7382           0 :         if (rc || spdk_nvme_cpl_is_error(cpl)) {
    7383           0 :                 DISCOVERY_ERRLOG(ctx, "could not get discovery log page\n");
    7384           0 :                 return;
    7385             :         }
    7386             : 
    7387           0 :         ctx->log_page = log_page;
    7388           0 :         assert(ctx->attach_in_progress == 0);
    7389           0 :         numrec = from_le64(&log_page->numrec);
    7390           0 :         TAILQ_FOREACH_SAFE(entry_ctx, &ctx->discovery_entry_ctxs, tailq, tmp) {
    7391           0 :                 TAILQ_REMOVE(&ctx->discovery_entry_ctxs, entry_ctx, tailq);
    7392           0 :                 free(entry_ctx);
    7393           0 :         }
    7394           0 :         for (i = 0; i < numrec; i++) {
    7395           0 :                 found = false;
    7396           0 :                 new_entry = &log_page->entries[i];
    7397           0 :                 if (new_entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY_CURRENT ||
    7398           0 :                     new_entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
    7399             :                         struct discovery_entry_ctx *new_ctx;
    7400           0 :                         struct spdk_nvme_transport_id trid = {};
    7401             : 
    7402           0 :                         build_trid_from_log_page_entry(&trid, new_entry);
    7403           0 :                         new_ctx = create_discovery_entry_ctx(ctx, &trid);
    7404           0 :                         if (new_ctx == NULL) {
    7405           0 :                                 DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
    7406           0 :                                 break;
    7407             :                         }
    7408             : 
    7409           0 :                         TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, new_ctx, tailq);
    7410           0 :                         continue;
    7411             :                 }
    7412           0 :                 TAILQ_FOREACH(entry_ctx, &ctx->nvm_entry_ctxs, tailq) {
    7413           0 :                         old_entry = &entry_ctx->entry;
    7414           0 :                         if (!memcmp(new_entry, old_entry, sizeof(*new_entry))) {
    7415           0 :                                 found = true;
    7416           0 :                                 break;
    7417             :                         }
    7418           0 :                 }
    7419           0 :                 if (!found) {
    7420           0 :                         struct discovery_entry_ctx *subnqn_ctx = NULL, *new_ctx;
    7421             :                         struct discovery_ctx *d_ctx;
    7422             : 
    7423           0 :                         TAILQ_FOREACH(d_ctx, &g_discovery_ctxs, tailq) {
    7424           0 :                                 TAILQ_FOREACH(subnqn_ctx, &d_ctx->nvm_entry_ctxs, tailq) {
    7425           0 :                                         if (!memcmp(subnqn_ctx->entry.subnqn, new_entry->subnqn,
    7426             :                                                     sizeof(new_entry->subnqn))) {
    7427           0 :                                                 break;
    7428             :                                         }
    7429           0 :                                 }
    7430           0 :                                 if (subnqn_ctx) {
    7431           0 :                                         break;
    7432             :                                 }
    7433           0 :                         }
    7434             : 
    7435           0 :                         new_ctx = calloc(1, sizeof(*new_ctx));
    7436           0 :                         if (new_ctx == NULL) {
    7437           0 :                                 DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
    7438           0 :                                 break;
    7439             :                         }
    7440             : 
    7441           0 :                         new_ctx->ctx = ctx;
    7442           0 :                         memcpy(&new_ctx->entry, new_entry, sizeof(*new_entry));
    7443           0 :                         build_trid_from_log_page_entry(&new_ctx->trid, new_entry);
    7444           0 :                         if (subnqn_ctx) {
    7445           0 :                                 snprintf(new_ctx->name, sizeof(new_ctx->name), "%s", subnqn_ctx->name);
    7446           0 :                                 DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s new path for %s\n",
    7447             :                                                   new_ctx->trid.subnqn, new_ctx->trid.traddr, new_ctx->trid.trsvcid,
    7448             :                                                   new_ctx->name);
    7449           0 :                         } else {
    7450           0 :                                 snprintf(new_ctx->name, sizeof(new_ctx->name), "%s%d", ctx->name, ctx->index++);
    7451           0 :                                 DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s new subsystem %s\n",
    7452             :                                                   new_ctx->trid.subnqn, new_ctx->trid.traddr, new_ctx->trid.trsvcid,
    7453             :                                                   new_ctx->name);
    7454             :                         }
    7455           0 :                         spdk_nvme_ctrlr_get_default_ctrlr_opts(&new_ctx->drv_opts, sizeof(new_ctx->drv_opts));
    7456           0 :                         snprintf(new_ctx->drv_opts.hostnqn, sizeof(new_ctx->drv_opts.hostnqn), "%s", ctx->hostnqn);
    7457           0 :                         rc = spdk_bdev_nvme_create(&new_ctx->trid, new_ctx->name, NULL, 0,
    7458           0 :                                                    discovery_attach_controller_done, new_ctx,
    7459           0 :                                                    &new_ctx->drv_opts, &ctx->bdev_opts);
    7460           0 :                         if (rc == 0) {
    7461           0 :                                 TAILQ_INSERT_TAIL(&ctx->nvm_entry_ctxs, new_ctx, tailq);
    7462           0 :                                 ctx->attach_in_progress++;
    7463           0 :                         } else {
    7464           0 :                                 DISCOVERY_ERRLOG(ctx, "spdk_bdev_nvme_create failed (%s)\n", spdk_strerror(-rc));
    7465             :                         }
    7466           0 :                 }
    7467           0 :         }
    7468             : 
    7469           0 :         if (ctx->attach_in_progress == 0) {
    7470           0 :                 discovery_remove_controllers(ctx);
    7471           0 :         }
    7472           0 : }
    7473             : 
    7474             : static void
    7475           0 : get_discovery_log_page(struct discovery_ctx *ctx)
    7476             : {
    7477             :         int rc;
    7478             : 
    7479           0 :         assert(ctx->in_progress == false);
    7480           0 :         ctx->in_progress = true;
    7481           0 :         rc = spdk_nvme_ctrlr_get_discovery_log_page(ctx->ctrlr, discovery_log_page_cb, ctx);
    7482           0 :         if (rc != 0) {
    7483           0 :                 DISCOVERY_ERRLOG(ctx, "could not get discovery log page\n");
    7484           0 :         }
    7485           0 :         DISCOVERY_INFOLOG(ctx, "sent discovery log page command\n");
    7486           0 : }
    7487             : 
    7488             : static void
    7489           0 : discovery_aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
    7490             : {
    7491           0 :         struct discovery_ctx *ctx = arg;
    7492           0 :         uint32_t log_page_id = (cpl->cdw0 & 0xFF0000) >> 16;
    7493             : 
    7494           0 :         if (spdk_nvme_cpl_is_error(cpl)) {
    7495           0 :                 DISCOVERY_ERRLOG(ctx, "aer failed\n");
    7496           0 :                 return;
    7497             :         }
    7498             : 
    7499           0 :         if (log_page_id != SPDK_NVME_LOG_DISCOVERY) {
    7500           0 :                 DISCOVERY_ERRLOG(ctx, "unexpected log page 0x%x\n", log_page_id);
    7501           0 :                 return;
    7502             :         }
    7503             : 
    7504           0 :         DISCOVERY_INFOLOG(ctx, "got aer\n");
    7505           0 :         if (ctx->in_progress) {
    7506           0 :                 ctx->pending = true;
    7507           0 :                 return;
    7508             :         }
    7509             : 
    7510           0 :         get_discovery_log_page(ctx);
    7511           0 : }
    7512             : 
    7513             : static void
    7514           0 : discovery_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
    7515             :                     struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
    7516             : {
    7517           0 :         struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
    7518             :         struct discovery_ctx *ctx;
    7519             : 
    7520           0 :         ctx = SPDK_CONTAINEROF(user_opts, struct discovery_ctx, drv_opts);
    7521             : 
    7522           0 :         DISCOVERY_INFOLOG(ctx, "discovery ctrlr attached\n");
    7523           0 :         ctx->probe_ctx = NULL;
    7524           0 :         ctx->ctrlr = ctrlr;
    7525             : 
    7526           0 :         if (ctx->rc != 0) {
    7527           0 :                 DISCOVERY_ERRLOG(ctx, "encountered error while attaching discovery ctrlr: %d\n",
    7528             :                                  ctx->rc);
    7529           0 :                 return;
    7530             :         }
    7531             : 
    7532           0 :         spdk_nvme_ctrlr_register_aer_callback(ctx->ctrlr, discovery_aer_cb, ctx);
    7533           0 : }
    7534             : 
    7535             : static int
    7536           0 : discovery_poller(void *arg)
    7537             : {
    7538           0 :         struct discovery_ctx *ctx = arg;
    7539             :         struct spdk_nvme_transport_id *trid;
    7540             :         int rc;
    7541             : 
    7542           0 :         if (ctx->detach_ctx) {
    7543           0 :                 rc = spdk_nvme_detach_poll_async(ctx->detach_ctx);
    7544           0 :                 if (rc != -EAGAIN) {
    7545           0 :                         ctx->detach_ctx = NULL;
    7546           0 :                         ctx->ctrlr = NULL;
    7547           0 :                 }
    7548           0 :         } else if (ctx->stop) {
    7549           0 :                 if (ctx->ctrlr != NULL) {
    7550           0 :                         rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx);
    7551           0 :                         if (rc == 0) {
    7552           0 :                                 return SPDK_POLLER_BUSY;
    7553             :                         }
    7554           0 :                         DISCOVERY_ERRLOG(ctx, "could not detach discovery ctrlr\n");
    7555           0 :                 }
    7556           0 :                 spdk_poller_unregister(&ctx->poller);
    7557           0 :                 TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
    7558           0 :                 assert(ctx->start_cb_fn == NULL);
    7559           0 :                 if (ctx->stop_cb_fn != NULL) {
    7560           0 :                         ctx->stop_cb_fn(ctx->cb_ctx);
    7561           0 :                 }
    7562           0 :                 free_discovery_ctx(ctx);
    7563           0 :         } else if (ctx->probe_ctx == NULL && ctx->ctrlr == NULL) {
    7564           0 :                 if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
    7565           0 :                         DISCOVERY_ERRLOG(ctx, "timed out while attaching discovery ctrlr\n");
    7566           0 :                         assert(ctx->initializing);
    7567           0 :                         spdk_poller_unregister(&ctx->poller);
    7568           0 :                         TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
    7569           0 :                         complete_discovery_start(ctx, -ETIMEDOUT);
    7570           0 :                         stop_discovery(ctx, NULL, NULL);
    7571           0 :                         free_discovery_ctx(ctx);
    7572           0 :                         return SPDK_POLLER_BUSY;
    7573             :                 }
    7574             : 
    7575           0 :                 assert(ctx->entry_ctx_in_use == NULL);
    7576           0 :                 ctx->entry_ctx_in_use = TAILQ_FIRST(&ctx->discovery_entry_ctxs);
    7577           0 :                 TAILQ_REMOVE(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
    7578           0 :                 trid = &ctx->entry_ctx_in_use->trid;
    7579             : 
    7580             :                 /* All controllers must be configured explicitely either for multipath or failover.
    7581             :                  * While discovery use multipath mode, we need to set this in bdev options as well.
    7582             :                  */
    7583           0 :                 ctx->bdev_opts.multipath = true;
    7584             : 
    7585           0 :                 ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->drv_opts, discovery_attach_cb);
    7586           0 :                 if (ctx->probe_ctx) {
    7587           0 :                         spdk_poller_unregister(&ctx->poller);
    7588           0 :                         ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000);
    7589           0 :                 } else {
    7590           0 :                         DISCOVERY_ERRLOG(ctx, "could not start discovery connect\n");
    7591           0 :                         TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
    7592           0 :                         ctx->entry_ctx_in_use = NULL;
    7593             :                 }
    7594           0 :         } else if (ctx->probe_ctx) {
    7595           0 :                 if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
    7596           0 :                         DISCOVERY_ERRLOG(ctx, "timed out while attaching discovery ctrlr\n");
    7597           0 :                         complete_discovery_start(ctx, -ETIMEDOUT);
    7598           0 :                         return SPDK_POLLER_BUSY;
    7599             :                 }
    7600             : 
    7601           0 :                 rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
    7602           0 :                 if (rc != -EAGAIN) {
    7603           0 :                         if (ctx->rc != 0) {
    7604           0 :                                 assert(ctx->initializing);
    7605           0 :                                 stop_discovery(ctx, NULL, ctx->cb_ctx);
    7606           0 :                         } else {
    7607           0 :                                 assert(rc == 0);
    7608           0 :                                 DISCOVERY_INFOLOG(ctx, "discovery ctrlr connected\n");
    7609           0 :                                 ctx->rc = rc;
    7610           0 :                                 get_discovery_log_page(ctx);
    7611             :                         }
    7612           0 :                 }
    7613           0 :         } else {
    7614           0 :                 if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
    7615           0 :                         DISCOVERY_ERRLOG(ctx, "timed out while attaching NVM ctrlrs\n");
    7616           0 :                         complete_discovery_start(ctx, -ETIMEDOUT);
    7617             :                         /* We need to wait until all NVM ctrlrs are attached before we stop the
    7618             :                          * discovery service to make sure we don't detach a ctrlr that is still
    7619             :                          * being attached.
    7620             :                          */
    7621           0 :                         if (ctx->attach_in_progress == 0) {
    7622           0 :                                 stop_discovery(ctx, NULL, ctx->cb_ctx);
    7623           0 :                                 return SPDK_POLLER_BUSY;
    7624             :                         }
    7625           0 :                 }
    7626             : 
    7627           0 :                 rc = spdk_nvme_ctrlr_process_admin_completions(ctx->ctrlr);
    7628           0 :                 if (rc < 0) {
    7629           0 :                         spdk_poller_unregister(&ctx->poller);
    7630           0 :                         ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000 * 1000);
    7631           0 :                         TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
    7632           0 :                         ctx->entry_ctx_in_use = NULL;
    7633             : 
    7634           0 :                         rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx);
    7635           0 :                         if (rc != 0) {
    7636           0 :                                 DISCOVERY_ERRLOG(ctx, "could not detach discovery ctrlr\n");
    7637           0 :                                 ctx->ctrlr = NULL;
    7638           0 :                         }
    7639           0 :                 }
    7640             :         }
    7641             : 
    7642           0 :         return SPDK_POLLER_BUSY;
    7643           0 : }
    7644             : 
    7645             : static void
    7646           0 : start_discovery_poller(void *arg)
    7647             : {
    7648           0 :         struct discovery_ctx *ctx = arg;
    7649             : 
    7650           0 :         TAILQ_INSERT_TAIL(&g_discovery_ctxs, ctx, tailq);
    7651           0 :         ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000 * 1000);
    7652           0 : }
    7653             : 
    7654             : int
    7655           0 : bdev_nvme_start_discovery(struct spdk_nvme_transport_id *trid,
    7656             :                           const char *base_name,
    7657             :                           struct spdk_nvme_ctrlr_opts *drv_opts,
    7658             :                           struct spdk_bdev_nvme_ctrlr_opts *bdev_opts,
    7659             :                           uint64_t attach_timeout,
    7660             :                           bool from_mdns,
    7661             :                           spdk_bdev_nvme_start_discovery_fn cb_fn, void *cb_ctx)
    7662             : {
    7663             :         struct discovery_ctx *ctx;
    7664             :         struct discovery_entry_ctx *discovery_entry_ctx;
    7665             : 
    7666           0 :         snprintf(trid->subnqn, sizeof(trid->subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN);
    7667           0 :         TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    7668           0 :                 if (strcmp(ctx->name, base_name) == 0) {
    7669           0 :                         return -EEXIST;
    7670             :                 }
    7671             : 
    7672           0 :                 if (ctx->entry_ctx_in_use != NULL) {
    7673           0 :                         if (!spdk_nvme_transport_id_compare(trid, &ctx->entry_ctx_in_use->trid)) {
    7674           0 :                                 return -EEXIST;
    7675             :                         }
    7676           0 :                 }
    7677             : 
    7678           0 :                 TAILQ_FOREACH(discovery_entry_ctx, &ctx->discovery_entry_ctxs, tailq) {
    7679           0 :                         if (!spdk_nvme_transport_id_compare(trid, &discovery_entry_ctx->trid)) {
    7680           0 :                                 return -EEXIST;
    7681             :                         }
    7682           0 :                 }
    7683           0 :         }
    7684             : 
    7685           0 :         ctx = calloc(1, sizeof(*ctx));
    7686           0 :         if (ctx == NULL) {
    7687           0 :                 return -ENOMEM;
    7688             :         }
    7689             : 
    7690           0 :         ctx->name = strdup(base_name);
    7691           0 :         if (ctx->name == NULL) {
    7692           0 :                 free_discovery_ctx(ctx);
    7693           0 :                 return -ENOMEM;
    7694             :         }
    7695           0 :         memcpy(&ctx->drv_opts, drv_opts, sizeof(*drv_opts));
    7696           0 :         memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
    7697           0 :         ctx->from_mdns_discovery_service = from_mdns;
    7698           0 :         ctx->bdev_opts.from_discovery_service = true;
    7699           0 :         ctx->calling_thread = spdk_get_thread();
    7700           0 :         ctx->start_cb_fn = cb_fn;
    7701           0 :         ctx->cb_ctx = cb_ctx;
    7702           0 :         ctx->initializing = true;
    7703           0 :         if (ctx->start_cb_fn) {
    7704             :                 /* We can use this when dumping json to denote if this RPC parameter
    7705             :                  * was specified or not.
    7706             :                  */
    7707           0 :                 ctx->wait_for_attach = true;
    7708           0 :         }
    7709           0 :         if (attach_timeout != 0) {
    7710           0 :                 ctx->timeout_ticks = spdk_get_ticks() + attach_timeout *
    7711           0 :                                      spdk_get_ticks_hz() / 1000ull;
    7712           0 :         }
    7713           0 :         TAILQ_INIT(&ctx->nvm_entry_ctxs);
    7714           0 :         TAILQ_INIT(&ctx->discovery_entry_ctxs);
    7715           0 :         memcpy(&ctx->trid, trid, sizeof(*trid));
    7716             :         /* Even if user did not specify hostnqn, we can still strdup("\0"); */
    7717           0 :         ctx->hostnqn = strdup(ctx->drv_opts.hostnqn);
    7718           0 :         if (ctx->hostnqn == NULL) {
    7719           0 :                 free_discovery_ctx(ctx);
    7720           0 :                 return -ENOMEM;
    7721             :         }
    7722           0 :         discovery_entry_ctx = create_discovery_entry_ctx(ctx, trid);
    7723           0 :         if (discovery_entry_ctx == NULL) {
    7724           0 :                 DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
    7725           0 :                 free_discovery_ctx(ctx);
    7726           0 :                 return -ENOMEM;
    7727             :         }
    7728             : 
    7729           0 :         TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, discovery_entry_ctx, tailq);
    7730           0 :         spdk_thread_send_msg(g_bdev_nvme_init_thread, start_discovery_poller, ctx);
    7731           0 :         return 0;
    7732           0 : }
    7733             : 
    7734             : int
    7735           0 : bdev_nvme_stop_discovery(const char *name, spdk_bdev_nvme_stop_discovery_fn cb_fn, void *cb_ctx)
    7736             : {
    7737             :         struct discovery_ctx *ctx;
    7738             : 
    7739           0 :         TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    7740           0 :                 if (strcmp(name, ctx->name) == 0) {
    7741           0 :                         if (ctx->stop) {
    7742           0 :                                 return -EALREADY;
    7743             :                         }
    7744             :                         /* If we're still starting the discovery service and ->rc is non-zero, we're
    7745             :                          * going to stop it as soon as we can
    7746             :                          */
    7747           0 :                         if (ctx->initializing && ctx->rc != 0) {
    7748           0 :                                 return -EALREADY;
    7749             :                         }
    7750           0 :                         stop_discovery(ctx, cb_fn, cb_ctx);
    7751           0 :                         return 0;
    7752             :                 }
    7753           0 :         }
    7754             : 
    7755           0 :         return -ENOENT;
    7756           0 : }
    7757             : 
    7758             : static int
    7759           1 : bdev_nvme_library_init(void)
    7760             : {
    7761           1 :         g_bdev_nvme_init_thread = spdk_get_thread();
    7762             : 
    7763           1 :         spdk_io_device_register(&g_nvme_bdev_ctrlrs, bdev_nvme_create_poll_group_cb,
    7764             :                                 bdev_nvme_destroy_poll_group_cb,
    7765             :                                 sizeof(struct nvme_poll_group),  "nvme_poll_groups");
    7766             : 
    7767           1 :         return 0;
    7768             : }
    7769             : 
    7770             : static void
    7771           1 : bdev_nvme_fini_destruct_ctrlrs(void)
    7772             : {
    7773             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
    7774             :         struct nvme_ctrlr *nvme_ctrlr;
    7775             : 
    7776           1 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    7777           1 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
    7778           0 :                 TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
    7779           0 :                         pthread_mutex_lock(&nvme_ctrlr->mutex);
    7780           0 :                         if (nvme_ctrlr->destruct) {
    7781             :                                 /* This controller's destruction was already started
    7782             :                                  * before the application started shutting down
    7783             :                                  */
    7784           0 :                                 pthread_mutex_unlock(&nvme_ctrlr->mutex);
    7785           0 :                                 continue;
    7786             :                         }
    7787           0 :                         nvme_ctrlr->destruct = true;
    7788           0 :                         pthread_mutex_unlock(&nvme_ctrlr->mutex);
    7789             : 
    7790           0 :                         spdk_thread_send_msg(nvme_ctrlr->thread, _nvme_ctrlr_destruct,
    7791           0 :                                              nvme_ctrlr);
    7792           0 :                 }
    7793           0 :         }
    7794             : 
    7795           1 :         g_bdev_nvme_module_finish = true;
    7796           1 :         if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
    7797           1 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    7798           1 :                 spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
    7799           1 :                 spdk_bdev_module_fini_done();
    7800           1 :                 return;
    7801             :         }
    7802             : 
    7803           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    7804           1 : }
    7805             : 
    7806             : static void
    7807           0 : check_discovery_fini(void *arg)
    7808             : {
    7809           0 :         if (TAILQ_EMPTY(&g_discovery_ctxs)) {
    7810           0 :                 bdev_nvme_fini_destruct_ctrlrs();
    7811           0 :         }
    7812           0 : }
    7813             : 
    7814             : static void
    7815           1 : bdev_nvme_library_fini(void)
    7816             : {
    7817             :         struct nvme_probe_skip_entry *entry, *entry_tmp;
    7818             :         struct discovery_ctx *ctx;
    7819             : 
    7820           1 :         spdk_poller_unregister(&g_hotplug_poller);
    7821           1 :         free(g_hotplug_probe_ctx);
    7822           1 :         g_hotplug_probe_ctx = NULL;
    7823             : 
    7824           1 :         TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, entry_tmp) {
    7825           0 :                 TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
    7826           0 :                 free(entry);
    7827           0 :         }
    7828             : 
    7829           1 :         assert(spdk_get_thread() == g_bdev_nvme_init_thread);
    7830           1 :         if (TAILQ_EMPTY(&g_discovery_ctxs)) {
    7831           1 :                 bdev_nvme_fini_destruct_ctrlrs();
    7832           1 :         } else {
    7833           0 :                 TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    7834           0 :                         stop_discovery(ctx, check_discovery_fini, NULL);
    7835           0 :                 }
    7836             :         }
    7837           1 : }
    7838             : 
    7839             : static void
    7840           0 : bdev_nvme_verify_pi_error(struct nvme_bdev_io *bio)
    7841             : {
    7842           0 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7843           0 :         struct spdk_bdev *bdev = bdev_io->bdev;
    7844             :         struct spdk_dif_ctx dif_ctx;
    7845           0 :         struct spdk_dif_error err_blk = {};
    7846             :         int rc;
    7847             :         struct spdk_dif_ctx_init_ext_opts dif_opts;
    7848             : 
    7849           0 :         dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
    7850           0 :         dif_opts.dif_pi_format = bdev->dif_pi_format;
    7851           0 :         rc = spdk_dif_ctx_init(&dif_ctx,
    7852           0 :                                bdev->blocklen, bdev->md_len, bdev->md_interleave,
    7853           0 :                                bdev->dif_is_head_of_md, bdev->dif_type,
    7854           0 :                                bdev_io->u.bdev.dif_check_flags,
    7855           0 :                                bdev_io->u.bdev.offset_blocks, 0, 0, 0, 0, &dif_opts);
    7856           0 :         if (rc != 0) {
    7857           0 :                 SPDK_ERRLOG("Initialization of DIF context failed\n");
    7858           0 :                 return;
    7859             :         }
    7860             : 
    7861           0 :         if (bdev->md_interleave) {
    7862           0 :                 rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
    7863           0 :                                      bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
    7864           0 :         } else {
    7865           0 :                 struct iovec md_iov = {
    7866           0 :                         .iov_base       = bdev_io->u.bdev.md_buf,
    7867           0 :                         .iov_len        = bdev_io->u.bdev.num_blocks * bdev->md_len,
    7868             :                 };
    7869             : 
    7870           0 :                 rc = spdk_dix_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
    7871           0 :                                      &md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
    7872             :         }
    7873             : 
    7874           0 :         if (rc != 0) {
    7875           0 :                 SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
    7876             :                             err_blk.err_type, err_blk.err_offset);
    7877           0 :         } else {
    7878           0 :                 SPDK_ERRLOG("Hardware reported PI error but SPDK could not find any.\n");
    7879             :         }
    7880           0 : }
    7881             : 
    7882             : static void
    7883           0 : bdev_nvme_no_pi_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7884             : {
    7885           0 :         struct nvme_bdev_io *bio = ref;
    7886             : 
    7887           0 :         if (spdk_nvme_cpl_is_success(cpl)) {
    7888             :                 /* Run PI verification for read data buffer. */
    7889           0 :                 bdev_nvme_verify_pi_error(bio);
    7890           0 :         }
    7891             : 
    7892             :         /* Return original completion status */
    7893           0 :         bdev_nvme_io_complete_nvme_status(bio, &bio->cpl);
    7894           0 : }
    7895             : 
    7896             : static void
    7897           3 : bdev_nvme_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7898             : {
    7899           3 :         struct nvme_bdev_io *bio = ref;
    7900           3 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7901             :         int ret;
    7902             : 
    7903           3 :         if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
    7904           0 :                 SPDK_ERRLOG("readv completed with PI error (sct=%d, sc=%d)\n",
    7905             :                             cpl->status.sct, cpl->status.sc);
    7906             : 
    7907             :                 /* Save completion status to use after verifying PI error. */
    7908           0 :                 bio->cpl = *cpl;
    7909             : 
    7910           0 :                 if (spdk_likely(nvme_io_path_is_available(bio->io_path))) {
    7911             :                         /* Read without PI checking to verify PI error. */
    7912           0 :                         ret = bdev_nvme_no_pi_readv(bio,
    7913           0 :                                                     bdev_io->u.bdev.iovs,
    7914           0 :                                                     bdev_io->u.bdev.iovcnt,
    7915           0 :                                                     bdev_io->u.bdev.md_buf,
    7916           0 :                                                     bdev_io->u.bdev.num_blocks,
    7917           0 :                                                     bdev_io->u.bdev.offset_blocks);
    7918           0 :                         if (ret == 0) {
    7919           0 :                                 return;
    7920             :                         }
    7921           0 :                 }
    7922           0 :         }
    7923             : 
    7924           3 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7925           3 : }
    7926             : 
    7927             : static void
    7928          25 : bdev_nvme_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7929             : {
    7930          25 :         struct nvme_bdev_io *bio = ref;
    7931             : 
    7932          25 :         if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
    7933           0 :                 SPDK_ERRLOG("writev completed with PI error (sct=%d, sc=%d)\n",
    7934             :                             cpl->status.sct, cpl->status.sc);
    7935             :                 /* Run PI verification for write data buffer if PI error is detected. */
    7936           0 :                 bdev_nvme_verify_pi_error(bio);
    7937           0 :         }
    7938             : 
    7939          25 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7940          25 : }
    7941             : 
    7942             : static void
    7943           0 : bdev_nvme_zone_appendv_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7944             : {
    7945           0 :         struct nvme_bdev_io *bio = ref;
    7946           0 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    7947             : 
    7948             :         /* spdk_bdev_io_get_append_location() requires that the ALBA is stored in offset_blocks.
    7949             :          * Additionally, offset_blocks has to be set before calling bdev_nvme_verify_pi_error().
    7950             :          */
    7951           0 :         bdev_io->u.bdev.offset_blocks = *(uint64_t *)&cpl->cdw0;
    7952             : 
    7953           0 :         if (spdk_nvme_cpl_is_pi_error(cpl)) {
    7954           0 :                 SPDK_ERRLOG("zone append completed with PI error (sct=%d, sc=%d)\n",
    7955             :                             cpl->status.sct, cpl->status.sc);
    7956             :                 /* Run PI verification for zone append data buffer if PI error is detected. */
    7957           0 :                 bdev_nvme_verify_pi_error(bio);
    7958           0 :         }
    7959             : 
    7960           0 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7961           0 : }
    7962             : 
    7963             : static void
    7964           1 : bdev_nvme_comparev_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7965             : {
    7966           1 :         struct nvme_bdev_io *bio = ref;
    7967             : 
    7968           1 :         if (spdk_nvme_cpl_is_pi_error(cpl)) {
    7969           0 :                 SPDK_ERRLOG("comparev completed with PI error (sct=%d, sc=%d)\n",
    7970             :                             cpl->status.sct, cpl->status.sc);
    7971             :                 /* Run PI verification for compare data buffer if PI error is detected. */
    7972           0 :                 bdev_nvme_verify_pi_error(bio);
    7973           0 :         }
    7974             : 
    7975           1 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    7976           1 : }
    7977             : 
    7978             : static void
    7979           4 : bdev_nvme_comparev_and_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
    7980             : {
    7981           4 :         struct nvme_bdev_io *bio = ref;
    7982             : 
    7983             :         /* Compare operation completion */
    7984           4 :         if (!bio->first_fused_completed) {
    7985             :                 /* Save compare result for write callback */
    7986           2 :                 bio->cpl = *cpl;
    7987           2 :                 bio->first_fused_completed = true;
    7988           2 :                 return;
    7989             :         }
    7990             : 
    7991             :         /* Write operation completion */
    7992           2 :         if (spdk_nvme_cpl_is_error(&bio->cpl)) {
    7993             :                 /* If bio->cpl is already an error, it means the compare operation failed.  In that case,
    7994             :                  * complete the IO with the compare operation's status.
    7995             :                  */
    7996           1 :                 if (!spdk_nvme_cpl_is_error(cpl)) {
    7997           1 :                         SPDK_ERRLOG("Unexpected write success after compare failure.\n");
    7998           1 :                 }
    7999             : 
    8000           1 :                 bdev_nvme_io_complete_nvme_status(bio, &bio->cpl);
    8001           1 :         } else {
    8002           1 :                 bdev_nvme_io_complete_nvme_status(bio, cpl);
    8003             :         }
    8004           4 : }
    8005             : 
    8006             : static void
    8007           1 : bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl)
    8008             : {
    8009           1 :         struct nvme_bdev_io *bio = ref;
    8010             : 
    8011           1 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    8012           1 : }
    8013             : 
    8014             : static int
    8015           0 : fill_zone_from_report(struct spdk_bdev_zone_info *info, struct spdk_nvme_zns_zone_desc *desc)
    8016             : {
    8017           0 :         switch (desc->zt) {
    8018             :         case SPDK_NVME_ZONE_TYPE_SEQWR:
    8019           0 :                 info->type = SPDK_BDEV_ZONE_TYPE_SEQWR;
    8020           0 :                 break;
    8021             :         default:
    8022           0 :                 SPDK_ERRLOG("Invalid zone type: %#x in zone report\n", desc->zt);
    8023           0 :                 return -EIO;
    8024             :         }
    8025             : 
    8026           0 :         switch (desc->zs) {
    8027             :         case SPDK_NVME_ZONE_STATE_EMPTY:
    8028           0 :                 info->state = SPDK_BDEV_ZONE_STATE_EMPTY;
    8029           0 :                 break;
    8030             :         case SPDK_NVME_ZONE_STATE_IOPEN:
    8031           0 :                 info->state = SPDK_BDEV_ZONE_STATE_IMP_OPEN;
    8032           0 :                 break;
    8033             :         case SPDK_NVME_ZONE_STATE_EOPEN:
    8034           0 :                 info->state = SPDK_BDEV_ZONE_STATE_EXP_OPEN;
    8035           0 :                 break;
    8036             :         case SPDK_NVME_ZONE_STATE_CLOSED:
    8037           0 :                 info->state = SPDK_BDEV_ZONE_STATE_CLOSED;
    8038           0 :                 break;
    8039             :         case SPDK_NVME_ZONE_STATE_RONLY:
    8040           0 :                 info->state = SPDK_BDEV_ZONE_STATE_READ_ONLY;
    8041           0 :                 break;
    8042             :         case SPDK_NVME_ZONE_STATE_FULL:
    8043           0 :                 info->state = SPDK_BDEV_ZONE_STATE_FULL;
    8044           0 :                 break;
    8045             :         case SPDK_NVME_ZONE_STATE_OFFLINE:
    8046           0 :                 info->state = SPDK_BDEV_ZONE_STATE_OFFLINE;
    8047           0 :                 break;
    8048             :         default:
    8049           0 :                 SPDK_ERRLOG("Invalid zone state: %#x in zone report\n", desc->zs);
    8050           0 :                 return -EIO;
    8051             :         }
    8052             : 
    8053           0 :         info->zone_id = desc->zslba;
    8054           0 :         info->write_pointer = desc->wp;
    8055           0 :         info->capacity = desc->zcap;
    8056             : 
    8057           0 :         return 0;
    8058           0 : }
    8059             : 
    8060             : static void
    8061           0 : bdev_nvme_get_zone_info_done(void *ref, const struct spdk_nvme_cpl *cpl)
    8062             : {
    8063           0 :         struct nvme_bdev_io *bio = ref;
    8064           0 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    8065           0 :         uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id;
    8066           0 :         uint32_t zones_to_copy = bdev_io->u.zone_mgmt.num_zones;
    8067           0 :         struct spdk_bdev_zone_info *info = bdev_io->u.zone_mgmt.buf;
    8068             :         uint64_t max_zones_per_buf, i;
    8069             :         uint32_t zone_report_bufsize;
    8070             :         struct spdk_nvme_ns *ns;
    8071             :         struct spdk_nvme_qpair *qpair;
    8072             :         int ret;
    8073             : 
    8074           0 :         if (spdk_nvme_cpl_is_error(cpl)) {
    8075           0 :                 goto out_complete_io_nvme_cpl;
    8076             :         }
    8077             : 
    8078           0 :         if (spdk_unlikely(!nvme_io_path_is_available(bio->io_path))) {
    8079           0 :                 ret = -ENXIO;
    8080           0 :                 goto out_complete_io_ret;
    8081             :         }
    8082             : 
    8083           0 :         ns = bio->io_path->nvme_ns->ns;
    8084           0 :         qpair = bio->io_path->qpair->qpair;
    8085             : 
    8086           0 :         zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns);
    8087           0 :         max_zones_per_buf = (zone_report_bufsize - sizeof(*bio->zone_report_buf)) /
    8088             :                             sizeof(bio->zone_report_buf->descs[0]);
    8089             : 
    8090           0 :         if (bio->zone_report_buf->nr_zones > max_zones_per_buf) {
    8091           0 :                 ret = -EINVAL;
    8092           0 :                 goto out_complete_io_ret;
    8093             :         }
    8094             : 
    8095           0 :         if (!bio->zone_report_buf->nr_zones) {
    8096           0 :                 ret = -EINVAL;
    8097           0 :                 goto out_complete_io_ret;
    8098             :         }
    8099             : 
    8100           0 :         for (i = 0; i < bio->zone_report_buf->nr_zones && bio->handled_zones < zones_to_copy; i++) {
    8101           0 :                 ret = fill_zone_from_report(&info[bio->handled_zones],
    8102           0 :                                             &bio->zone_report_buf->descs[i]);
    8103           0 :                 if (ret) {
    8104           0 :                         goto out_complete_io_ret;
    8105             :                 }
    8106           0 :                 bio->handled_zones++;
    8107           0 :         }
    8108             : 
    8109           0 :         if (bio->handled_zones < zones_to_copy) {
    8110           0 :                 uint64_t zone_size_lba = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
    8111           0 :                 uint64_t slba = zone_id + (zone_size_lba * bio->handled_zones);
    8112             : 
    8113           0 :                 memset(bio->zone_report_buf, 0, zone_report_bufsize);
    8114           0 :                 ret = spdk_nvme_zns_report_zones(ns, qpair,
    8115           0 :                                                  bio->zone_report_buf, zone_report_bufsize,
    8116           0 :                                                  slba, SPDK_NVME_ZRA_LIST_ALL, true,
    8117           0 :                                                  bdev_nvme_get_zone_info_done, bio);
    8118           0 :                 if (!ret) {
    8119           0 :                         return;
    8120             :                 } else {
    8121           0 :                         goto out_complete_io_ret;
    8122             :                 }
    8123             :         }
    8124             : 
    8125             : out_complete_io_nvme_cpl:
    8126           0 :         free(bio->zone_report_buf);
    8127           0 :         bio->zone_report_buf = NULL;
    8128           0 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    8129           0 :         return;
    8130             : 
    8131             : out_complete_io_ret:
    8132           0 :         free(bio->zone_report_buf);
    8133           0 :         bio->zone_report_buf = NULL;
    8134           0 :         bdev_nvme_io_complete(bio, ret);
    8135           0 : }
    8136             : 
    8137             : static void
    8138           0 : bdev_nvme_zone_management_done(void *ref, const struct spdk_nvme_cpl *cpl)
    8139             : {
    8140           0 :         struct nvme_bdev_io *bio = ref;
    8141             : 
    8142           0 :         bdev_nvme_io_complete_nvme_status(bio, cpl);
    8143           0 : }
    8144             : 
    8145             : static void
    8146           4 : bdev_nvme_admin_passthru_complete_nvme_status(void *ctx)
    8147             : {
    8148           4 :         struct nvme_bdev_io *bio = ctx;
    8149           4 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    8150           4 :         const struct spdk_nvme_cpl *cpl = &bio->cpl;
    8151             : 
    8152           4 :         assert(bdev_nvme_io_type_is_admin(bdev_io->type));
    8153             : 
    8154           4 :         __bdev_nvme_io_complete(bdev_io, 0, cpl);
    8155           4 : }
    8156             : 
    8157             : static void
    8158           3 : bdev_nvme_abort_complete(void *ctx)
    8159             : {
    8160           3 :         struct nvme_bdev_io *bio = ctx;
    8161           3 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    8162             : 
    8163           3 :         if (spdk_nvme_cpl_is_abort_success(&bio->cpl)) {
    8164           3 :                 __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS, NULL);
    8165           3 :         } else {
    8166           0 :                 __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED, NULL);
    8167             :         }
    8168           3 : }
    8169             : 
    8170             : static void
    8171           3 : bdev_nvme_abort_done(void *ref, const struct spdk_nvme_cpl *cpl)
    8172             : {
    8173           3 :         struct nvme_bdev_io *bio = ref;
    8174           3 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    8175             : 
    8176           3 :         bio->cpl = *cpl;
    8177           3 :         spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), bdev_nvme_abort_complete, bio);
    8178           3 : }
    8179             : 
    8180             : static void
    8181           4 : bdev_nvme_admin_passthru_done(void *ref, const struct spdk_nvme_cpl *cpl)
    8182             : {
    8183           4 :         struct nvme_bdev_io *bio = ref;
    8184           4 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    8185             : 
    8186           4 :         bio->cpl = *cpl;
    8187           8 :         spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io),
    8188           4 :                              bdev_nvme_admin_passthru_complete_nvme_status, bio);
    8189           4 : }
    8190             : 
    8191             : static void
    8192           0 : bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset)
    8193             : {
    8194           0 :         struct nvme_bdev_io *bio = ref;
    8195             :         struct iovec *iov;
    8196             : 
    8197           0 :         bio->iov_offset = sgl_offset;
    8198           0 :         for (bio->iovpos = 0; bio->iovpos < bio->iovcnt; bio->iovpos++) {
    8199           0 :                 iov = &bio->iovs[bio->iovpos];
    8200           0 :                 if (bio->iov_offset < iov->iov_len) {
    8201           0 :                         break;
    8202             :                 }
    8203             : 
    8204           0 :                 bio->iov_offset -= iov->iov_len;
    8205           0 :         }
    8206           0 : }
    8207             : 
    8208             : static int
    8209           0 : bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length)
    8210             : {
    8211           0 :         struct nvme_bdev_io *bio = ref;
    8212             :         struct iovec *iov;
    8213             : 
    8214           0 :         assert(bio->iovpos < bio->iovcnt);
    8215             : 
    8216           0 :         iov = &bio->iovs[bio->iovpos];
    8217             : 
    8218           0 :         *address = iov->iov_base;
    8219           0 :         *length = iov->iov_len;
    8220             : 
    8221           0 :         if (bio->iov_offset) {
    8222           0 :                 assert(bio->iov_offset <= iov->iov_len);
    8223           0 :                 *address += bio->iov_offset;
    8224           0 :                 *length -= bio->iov_offset;
    8225           0 :         }
    8226             : 
    8227           0 :         bio->iov_offset += *length;
    8228           0 :         if (bio->iov_offset == iov->iov_len) {
    8229           0 :                 bio->iovpos++;
    8230           0 :                 bio->iov_offset = 0;
    8231           0 :         }
    8232             : 
    8233           0 :         return 0;
    8234             : }
    8235             : 
    8236             : static void
    8237           0 : bdev_nvme_queued_reset_fused_sgl(void *ref, uint32_t sgl_offset)
    8238             : {
    8239           0 :         struct nvme_bdev_io *bio = ref;
    8240             :         struct iovec *iov;
    8241             : 
    8242           0 :         bio->fused_iov_offset = sgl_offset;
    8243           0 :         for (bio->fused_iovpos = 0; bio->fused_iovpos < bio->fused_iovcnt; bio->fused_iovpos++) {
    8244           0 :                 iov = &bio->fused_iovs[bio->fused_iovpos];
    8245           0 :                 if (bio->fused_iov_offset < iov->iov_len) {
    8246           0 :                         break;
    8247             :                 }
    8248             : 
    8249           0 :                 bio->fused_iov_offset -= iov->iov_len;
    8250           0 :         }
    8251           0 : }
    8252             : 
    8253             : static int
    8254           0 : bdev_nvme_queued_next_fused_sge(void *ref, void **address, uint32_t *length)
    8255             : {
    8256           0 :         struct nvme_bdev_io *bio = ref;
    8257             :         struct iovec *iov;
    8258             : 
    8259           0 :         assert(bio->fused_iovpos < bio->fused_iovcnt);
    8260             : 
    8261           0 :         iov = &bio->fused_iovs[bio->fused_iovpos];
    8262             : 
    8263           0 :         *address = iov->iov_base;
    8264           0 :         *length = iov->iov_len;
    8265             : 
    8266           0 :         if (bio->fused_iov_offset) {
    8267           0 :                 assert(bio->fused_iov_offset <= iov->iov_len);
    8268           0 :                 *address += bio->fused_iov_offset;
    8269           0 :                 *length -= bio->fused_iov_offset;
    8270           0 :         }
    8271             : 
    8272           0 :         bio->fused_iov_offset += *length;
    8273           0 :         if (bio->fused_iov_offset == iov->iov_len) {
    8274           0 :                 bio->fused_iovpos++;
    8275           0 :                 bio->fused_iov_offset = 0;
    8276           0 :         }
    8277             : 
    8278           0 :         return 0;
    8279             : }
    8280             : 
    8281             : static int
    8282           0 : bdev_nvme_no_pi_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    8283             :                       void *md, uint64_t lba_count, uint64_t lba)
    8284             : {
    8285             :         int rc;
    8286             : 
    8287           0 :         SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 " without PI check\n",
    8288             :                       lba_count, lba);
    8289             : 
    8290           0 :         bio->iovs = iov;
    8291           0 :         bio->iovcnt = iovcnt;
    8292           0 :         bio->iovpos = 0;
    8293           0 :         bio->iov_offset = 0;
    8294             : 
    8295           0 :         rc = spdk_nvme_ns_cmd_readv_with_md(bio->io_path->nvme_ns->ns,
    8296           0 :                                             bio->io_path->qpair->qpair,
    8297           0 :                                             lba, lba_count,
    8298           0 :                                             bdev_nvme_no_pi_readv_done, bio, 0,
    8299             :                                             bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
    8300           0 :                                             md, 0, 0);
    8301             : 
    8302           0 :         if (rc != 0 && rc != -ENOMEM) {
    8303           0 :                 SPDK_ERRLOG("no_pi_readv failed: rc = %d\n", rc);
    8304           0 :         }
    8305           0 :         return rc;
    8306             : }
    8307             : 
    8308             : static int
    8309           3 : bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    8310             :                 void *md, uint64_t lba_count, uint64_t lba, uint32_t flags,
    8311             :                 struct spdk_memory_domain *domain, void *domain_ctx,
    8312             :                 struct spdk_accel_sequence *seq)
    8313             : {
    8314           3 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8315           3 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8316             :         int rc;
    8317             : 
    8318           3 :         SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 "\n",
    8319             :                       lba_count, lba);
    8320             : 
    8321           3 :         bio->iovs = iov;
    8322           3 :         bio->iovcnt = iovcnt;
    8323           3 :         bio->iovpos = 0;
    8324           3 :         bio->iov_offset = 0;
    8325             : 
    8326           3 :         if (domain != NULL || seq != NULL) {
    8327           1 :                 bio->ext_opts.size = SPDK_SIZEOF(&bio->ext_opts, accel_sequence);
    8328           1 :                 bio->ext_opts.memory_domain = domain;
    8329           1 :                 bio->ext_opts.memory_domain_ctx = domain_ctx;
    8330           1 :                 bio->ext_opts.io_flags = flags;
    8331           1 :                 bio->ext_opts.metadata = md;
    8332           1 :                 bio->ext_opts.accel_sequence = seq;
    8333             : 
    8334           1 :                 if (iovcnt == 1) {
    8335           2 :                         rc = spdk_nvme_ns_cmd_read_ext(ns, qpair, iov[0].iov_base, lba, lba_count, bdev_nvme_readv_done,
    8336           1 :                                                        bio, &bio->ext_opts);
    8337           1 :                 } else {
    8338           0 :                         rc = spdk_nvme_ns_cmd_readv_ext(ns, qpair, lba, lba_count,
    8339           0 :                                                         bdev_nvme_readv_done, bio,
    8340             :                                                         bdev_nvme_queued_reset_sgl,
    8341             :                                                         bdev_nvme_queued_next_sge,
    8342           0 :                                                         &bio->ext_opts);
    8343             :                 }
    8344           3 :         } else if (iovcnt == 1) {
    8345           4 :                 rc = spdk_nvme_ns_cmd_read_with_md(ns, qpair, iov[0].iov_base,
    8346           2 :                                                    md, lba, lba_count, bdev_nvme_readv_done,
    8347           2 :                                                    bio, flags, 0, 0);
    8348           2 :         } else {
    8349           0 :                 rc = spdk_nvme_ns_cmd_readv_with_md(ns, qpair, lba, lba_count,
    8350           0 :                                                     bdev_nvme_readv_done, bio, flags,
    8351             :                                                     bdev_nvme_queued_reset_sgl,
    8352           0 :                                                     bdev_nvme_queued_next_sge, md, 0, 0);
    8353             :         }
    8354             : 
    8355           3 :         if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
    8356           0 :                 SPDK_ERRLOG("readv failed: rc = %d\n", rc);
    8357           0 :         }
    8358           3 :         return rc;
    8359             : }
    8360             : 
    8361             : static int
    8362          25 : bdev_nvme_writev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    8363             :                  void *md, uint64_t lba_count, uint64_t lba, uint32_t flags,
    8364             :                  struct spdk_memory_domain *domain, void *domain_ctx,
    8365             :                  struct spdk_accel_sequence *seq,
    8366             :                  union spdk_bdev_nvme_cdw12 cdw12, union spdk_bdev_nvme_cdw13 cdw13)
    8367             : {
    8368          25 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8369          25 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8370             :         int rc;
    8371             : 
    8372          25 :         SPDK_DEBUGLOG(bdev_nvme, "write %" PRIu64 " blocks with offset %#" PRIx64 "\n",
    8373             :                       lba_count, lba);
    8374             : 
    8375          25 :         bio->iovs = iov;
    8376          25 :         bio->iovcnt = iovcnt;
    8377          25 :         bio->iovpos = 0;
    8378          25 :         bio->iov_offset = 0;
    8379             : 
    8380          25 :         if (domain != NULL || seq != NULL) {
    8381           0 :                 bio->ext_opts.size = SPDK_SIZEOF(&bio->ext_opts, accel_sequence);
    8382           0 :                 bio->ext_opts.memory_domain = domain;
    8383           0 :                 bio->ext_opts.memory_domain_ctx = domain_ctx;
    8384           0 :                 bio->ext_opts.io_flags = flags | SPDK_NVME_IO_FLAGS_DIRECTIVE(cdw12.write.dtype);
    8385           0 :                 bio->ext_opts.cdw13 = cdw13.raw;
    8386           0 :                 bio->ext_opts.metadata = md;
    8387           0 :                 bio->ext_opts.accel_sequence = seq;
    8388             : 
    8389           0 :                 if (iovcnt == 1) {
    8390           0 :                         rc = spdk_nvme_ns_cmd_write_ext(ns, qpair, iov[0].iov_base, lba, lba_count, bdev_nvme_writev_done,
    8391           0 :                                                         bio, &bio->ext_opts);
    8392           0 :                 } else {
    8393           0 :                         rc = spdk_nvme_ns_cmd_writev_ext(ns, qpair, lba, lba_count,
    8394           0 :                                                          bdev_nvme_writev_done, bio,
    8395             :                                                          bdev_nvme_queued_reset_sgl,
    8396             :                                                          bdev_nvme_queued_next_sge,
    8397           0 :                                                          &bio->ext_opts);
    8398             :                 }
    8399          25 :         } else if (iovcnt == 1) {
    8400          50 :                 rc = spdk_nvme_ns_cmd_write_with_md(ns, qpair, iov[0].iov_base,
    8401          25 :                                                     md, lba, lba_count, bdev_nvme_writev_done,
    8402          25 :                                                     bio, flags, 0, 0);
    8403          25 :         } else {
    8404           0 :                 rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count,
    8405           0 :                                                      bdev_nvme_writev_done, bio, flags,
    8406             :                                                      bdev_nvme_queued_reset_sgl,
    8407           0 :                                                      bdev_nvme_queued_next_sge, md, 0, 0);
    8408             :         }
    8409             : 
    8410          25 :         if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
    8411           0 :                 SPDK_ERRLOG("writev failed: rc = %d\n", rc);
    8412           0 :         }
    8413          25 :         return rc;
    8414             : }
    8415             : 
    8416             : static int
    8417           0 : bdev_nvme_zone_appendv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    8418             :                        void *md, uint64_t lba_count, uint64_t zslba,
    8419             :                        uint32_t flags)
    8420             : {
    8421           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8422           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8423             :         int rc;
    8424             : 
    8425           0 :         SPDK_DEBUGLOG(bdev_nvme, "zone append %" PRIu64 " blocks to zone start lba %#" PRIx64 "\n",
    8426             :                       lba_count, zslba);
    8427             : 
    8428           0 :         bio->iovs = iov;
    8429           0 :         bio->iovcnt = iovcnt;
    8430           0 :         bio->iovpos = 0;
    8431           0 :         bio->iov_offset = 0;
    8432             : 
    8433           0 :         if (iovcnt == 1) {
    8434           0 :                 rc = spdk_nvme_zns_zone_append_with_md(ns, qpair, iov[0].iov_base, md, zslba,
    8435           0 :                                                        lba_count,
    8436           0 :                                                        bdev_nvme_zone_appendv_done, bio,
    8437           0 :                                                        flags,
    8438             :                                                        0, 0);
    8439           0 :         } else {
    8440           0 :                 rc = spdk_nvme_zns_zone_appendv_with_md(ns, qpair, zslba, lba_count,
    8441           0 :                                                         bdev_nvme_zone_appendv_done, bio, flags,
    8442             :                                                         bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
    8443           0 :                                                         md, 0, 0);
    8444             :         }
    8445             : 
    8446           0 :         if (rc != 0 && rc != -ENOMEM) {
    8447           0 :                 SPDK_ERRLOG("zone append failed: rc = %d\n", rc);
    8448           0 :         }
    8449           0 :         return rc;
    8450             : }
    8451             : 
    8452             : static int
    8453           1 : bdev_nvme_comparev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
    8454             :                    void *md, uint64_t lba_count, uint64_t lba,
    8455             :                    uint32_t flags)
    8456             : {
    8457             :         int rc;
    8458             : 
    8459           1 :         SPDK_DEBUGLOG(bdev_nvme, "compare %" PRIu64 " blocks with offset %#" PRIx64 "\n",
    8460             :                       lba_count, lba);
    8461             : 
    8462           1 :         bio->iovs = iov;
    8463           1 :         bio->iovcnt = iovcnt;
    8464           1 :         bio->iovpos = 0;
    8465           1 :         bio->iov_offset = 0;
    8466             : 
    8467           2 :         rc = spdk_nvme_ns_cmd_comparev_with_md(bio->io_path->nvme_ns->ns,
    8468           1 :                                                bio->io_path->qpair->qpair,
    8469           1 :                                                lba, lba_count,
    8470           1 :                                                bdev_nvme_comparev_done, bio, flags,
    8471             :                                                bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
    8472           1 :                                                md, 0, 0);
    8473             : 
    8474           1 :         if (rc != 0 && rc != -ENOMEM) {
    8475           0 :                 SPDK_ERRLOG("comparev failed: rc = %d\n", rc);
    8476           0 :         }
    8477           1 :         return rc;
    8478             : }
    8479             : 
    8480             : static int
    8481           2 : bdev_nvme_comparev_and_writev(struct nvme_bdev_io *bio, struct iovec *cmp_iov, int cmp_iovcnt,
    8482             :                               struct iovec *write_iov, int write_iovcnt,
    8483             :                               void *md, uint64_t lba_count, uint64_t lba, uint32_t flags)
    8484             : {
    8485           2 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8486           2 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8487           2 :         struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
    8488             :         int rc;
    8489             : 
    8490           2 :         SPDK_DEBUGLOG(bdev_nvme, "compare and write %" PRIu64 " blocks with offset %#" PRIx64 "\n",
    8491             :                       lba_count, lba);
    8492             : 
    8493           2 :         bio->iovs = cmp_iov;
    8494           2 :         bio->iovcnt = cmp_iovcnt;
    8495           2 :         bio->iovpos = 0;
    8496           2 :         bio->iov_offset = 0;
    8497           2 :         bio->fused_iovs = write_iov;
    8498           2 :         bio->fused_iovcnt = write_iovcnt;
    8499           2 :         bio->fused_iovpos = 0;
    8500           2 :         bio->fused_iov_offset = 0;
    8501             : 
    8502           2 :         if (bdev_io->num_retries == 0) {
    8503           2 :                 bio->first_fused_submitted = false;
    8504           2 :                 bio->first_fused_completed = false;
    8505           2 :         }
    8506             : 
    8507           2 :         if (!bio->first_fused_submitted) {
    8508           2 :                 flags |= SPDK_NVME_IO_FLAGS_FUSE_FIRST;
    8509           2 :                 memset(&bio->cpl, 0, sizeof(bio->cpl));
    8510             : 
    8511           4 :                 rc = spdk_nvme_ns_cmd_comparev_with_md(ns, qpair, lba, lba_count,
    8512           2 :                                                        bdev_nvme_comparev_and_writev_done, bio, flags,
    8513           2 :                                                        bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, md, 0, 0);
    8514           2 :                 if (rc == 0) {
    8515           2 :                         bio->first_fused_submitted = true;
    8516           2 :                         flags &= ~SPDK_NVME_IO_FLAGS_FUSE_FIRST;
    8517           2 :                 } else {
    8518           0 :                         if (rc != -ENOMEM) {
    8519           0 :                                 SPDK_ERRLOG("compare failed: rc = %d\n", rc);
    8520           0 :                         }
    8521           0 :                         return rc;
    8522             :                 }
    8523           2 :         }
    8524             : 
    8525           2 :         flags |= SPDK_NVME_IO_FLAGS_FUSE_SECOND;
    8526             : 
    8527           4 :         rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count,
    8528           2 :                                              bdev_nvme_comparev_and_writev_done, bio, flags,
    8529           2 :                                              bdev_nvme_queued_reset_fused_sgl, bdev_nvme_queued_next_fused_sge, md, 0, 0);
    8530           2 :         if (rc != 0 && rc != -ENOMEM) {
    8531           0 :                 SPDK_ERRLOG("write failed: rc = %d\n", rc);
    8532           0 :                 rc = 0;
    8533           0 :         }
    8534             : 
    8535           2 :         return rc;
    8536           2 : }
    8537             : 
    8538             : static int
    8539           1 : bdev_nvme_unmap(struct nvme_bdev_io *bio, uint64_t offset_blocks, uint64_t num_blocks)
    8540             : {
    8541             :         struct spdk_nvme_dsm_range dsm_ranges[SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES];
    8542             :         struct spdk_nvme_dsm_range *range;
    8543             :         uint64_t offset, remaining;
    8544             :         uint64_t num_ranges_u64;
    8545             :         uint16_t num_ranges;
    8546             :         int rc;
    8547             : 
    8548           1 :         num_ranges_u64 = (num_blocks + SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS - 1) /
    8549             :                          SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
    8550           1 :         if (num_ranges_u64 > SPDK_COUNTOF(dsm_ranges)) {
    8551           0 :                 SPDK_ERRLOG("Unmap request for %" PRIu64 " blocks is too large\n", num_blocks);
    8552           0 :                 return -EINVAL;
    8553             :         }
    8554           1 :         num_ranges = (uint16_t)num_ranges_u64;
    8555             : 
    8556           1 :         offset = offset_blocks;
    8557           1 :         remaining = num_blocks;
    8558           1 :         range = &dsm_ranges[0];
    8559             : 
    8560             :         /* Fill max-size ranges until the remaining blocks fit into one range */
    8561           1 :         while (remaining > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) {
    8562           0 :                 range->attributes.raw = 0;
    8563           0 :                 range->length = SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
    8564           0 :                 range->starting_lba = offset;
    8565             : 
    8566           0 :                 offset += SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
    8567           0 :                 remaining -= SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
    8568           0 :                 range++;
    8569             :         }
    8570             : 
    8571             :         /* Final range describes the remaining blocks */
    8572           1 :         range->attributes.raw = 0;
    8573           1 :         range->length = remaining;
    8574           1 :         range->starting_lba = offset;
    8575             : 
    8576           2 :         rc = spdk_nvme_ns_cmd_dataset_management(bio->io_path->nvme_ns->ns,
    8577           1 :                         bio->io_path->qpair->qpair,
    8578             :                         SPDK_NVME_DSM_ATTR_DEALLOCATE,
    8579           1 :                         dsm_ranges, num_ranges,
    8580           1 :                         bdev_nvme_queued_done, bio);
    8581             : 
    8582           1 :         return rc;
    8583           1 : }
    8584             : 
    8585             : static int
    8586           0 : bdev_nvme_write_zeroes(struct nvme_bdev_io *bio, uint64_t offset_blocks, uint64_t num_blocks)
    8587             : {
    8588           0 :         if (num_blocks > UINT16_MAX + 1) {
    8589           0 :                 SPDK_ERRLOG("NVMe write zeroes is limited to 16-bit block count\n");
    8590           0 :                 return -EINVAL;
    8591             :         }
    8592             : 
    8593           0 :         return spdk_nvme_ns_cmd_write_zeroes(bio->io_path->nvme_ns->ns,
    8594           0 :                                              bio->io_path->qpair->qpair,
    8595           0 :                                              offset_blocks, num_blocks,
    8596           0 :                                              bdev_nvme_queued_done, bio,
    8597             :                                              0);
    8598           0 : }
    8599             : 
    8600             : static int
    8601           0 : bdev_nvme_get_zone_info(struct nvme_bdev_io *bio, uint64_t zone_id, uint32_t num_zones,
    8602             :                         struct spdk_bdev_zone_info *info)
    8603             : {
    8604           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8605           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8606           0 :         uint32_t zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns);
    8607           0 :         uint64_t zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
    8608           0 :         uint64_t total_zones = spdk_nvme_zns_ns_get_num_zones(ns);
    8609             : 
    8610           0 :         if (zone_id % zone_size != 0) {
    8611           0 :                 return -EINVAL;
    8612             :         }
    8613             : 
    8614           0 :         if (num_zones > total_zones || !num_zones) {
    8615           0 :                 return -EINVAL;
    8616             :         }
    8617             : 
    8618           0 :         assert(!bio->zone_report_buf);
    8619           0 :         bio->zone_report_buf = calloc(1, zone_report_bufsize);
    8620           0 :         if (!bio->zone_report_buf) {
    8621           0 :                 return -ENOMEM;
    8622             :         }
    8623             : 
    8624           0 :         bio->handled_zones = 0;
    8625             : 
    8626           0 :         return spdk_nvme_zns_report_zones(ns, qpair, bio->zone_report_buf, zone_report_bufsize,
    8627           0 :                                           zone_id, SPDK_NVME_ZRA_LIST_ALL, true,
    8628           0 :                                           bdev_nvme_get_zone_info_done, bio);
    8629           0 : }
    8630             : 
    8631             : static int
    8632           0 : bdev_nvme_zone_management(struct nvme_bdev_io *bio, uint64_t zone_id,
    8633             :                           enum spdk_bdev_zone_action action)
    8634             : {
    8635           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8636           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8637             : 
    8638           0 :         switch (action) {
    8639             :         case SPDK_BDEV_ZONE_CLOSE:
    8640           0 :                 return spdk_nvme_zns_close_zone(ns, qpair, zone_id, false,
    8641           0 :                                                 bdev_nvme_zone_management_done, bio);
    8642             :         case SPDK_BDEV_ZONE_FINISH:
    8643           0 :                 return spdk_nvme_zns_finish_zone(ns, qpair, zone_id, false,
    8644           0 :                                                  bdev_nvme_zone_management_done, bio);
    8645             :         case SPDK_BDEV_ZONE_OPEN:
    8646           0 :                 return spdk_nvme_zns_open_zone(ns, qpair, zone_id, false,
    8647           0 :                                                bdev_nvme_zone_management_done, bio);
    8648             :         case SPDK_BDEV_ZONE_RESET:
    8649           0 :                 return spdk_nvme_zns_reset_zone(ns, qpair, zone_id, false,
    8650           0 :                                                 bdev_nvme_zone_management_done, bio);
    8651             :         case SPDK_BDEV_ZONE_OFFLINE:
    8652           0 :                 return spdk_nvme_zns_offline_zone(ns, qpair, zone_id, false,
    8653           0 :                                                   bdev_nvme_zone_management_done, bio);
    8654             :         default:
    8655           0 :                 return -EINVAL;
    8656             :         }
    8657           0 : }
    8658             : 
    8659             : static void
    8660           5 : bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio,
    8661             :                          struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes)
    8662             : {
    8663             :         struct nvme_io_path *io_path;
    8664             :         struct nvme_ctrlr *nvme_ctrlr;
    8665             :         uint32_t max_xfer_size;
    8666           5 :         int rc = -ENXIO;
    8667             : 
    8668             :         /* Choose the first ctrlr which is not failed. */
    8669           8 :         STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    8670           7 :                 nvme_ctrlr = io_path->qpair->ctrlr;
    8671             : 
    8672             :                 /* We should skip any unavailable nvme_ctrlr rather than checking
    8673             :                  * if the return value of spdk_nvme_ctrlr_cmd_admin_raw() is -ENXIO.
    8674             :                  */
    8675           7 :                 if (!nvme_ctrlr_is_available(nvme_ctrlr)) {
    8676           3 :                         continue;
    8677             :                 }
    8678             : 
    8679           4 :                 max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nvme_ctrlr->ctrlr);
    8680             : 
    8681           4 :                 if (nbytes > max_xfer_size) {
    8682           0 :                         SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
    8683           0 :                         rc = -EINVAL;
    8684           0 :                         goto err;
    8685             :                 }
    8686             : 
    8687           8 :                 rc = spdk_nvme_ctrlr_cmd_admin_raw(nvme_ctrlr->ctrlr, cmd, buf, (uint32_t)nbytes,
    8688           4 :                                                    bdev_nvme_admin_passthru_done, bio);
    8689           4 :                 if (rc == 0) {
    8690           4 :                         return;
    8691             :                 }
    8692           1 :         }
    8693             : 
    8694             : err:
    8695           1 :         bdev_nvme_admin_complete(bio, rc);
    8696           5 : }
    8697             : 
    8698             : static int
    8699           0 : bdev_nvme_io_passthru(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
    8700             :                       void *buf, size_t nbytes)
    8701             : {
    8702           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8703           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8704           0 :         uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
    8705           0 :         struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    8706             : 
    8707           0 :         if (nbytes > max_xfer_size) {
    8708           0 :                 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
    8709           0 :                 return -EINVAL;
    8710             :         }
    8711             : 
    8712             :         /*
    8713             :          * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
    8714             :          * so fill it out automatically.
    8715             :          */
    8716           0 :         cmd->nsid = spdk_nvme_ns_get_id(ns);
    8717             : 
    8718           0 :         return spdk_nvme_ctrlr_cmd_io_raw(ctrlr, qpair, cmd, buf,
    8719           0 :                                           (uint32_t)nbytes, bdev_nvme_queued_done, bio);
    8720           0 : }
    8721             : 
    8722             : static int
    8723           0 : bdev_nvme_io_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
    8724             :                          void *buf, size_t nbytes, void *md_buf, size_t md_len)
    8725             : {
    8726           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8727           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8728           0 :         size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(ns);
    8729           0 :         uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
    8730           0 :         struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    8731             : 
    8732           0 :         if (nbytes > max_xfer_size) {
    8733           0 :                 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
    8734           0 :                 return -EINVAL;
    8735             :         }
    8736             : 
    8737           0 :         if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(ns)) {
    8738           0 :                 SPDK_ERRLOG("invalid meta data buffer size\n");
    8739           0 :                 return -EINVAL;
    8740             :         }
    8741             : 
    8742             :         /*
    8743             :          * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
    8744             :          * so fill it out automatically.
    8745             :          */
    8746           0 :         cmd->nsid = spdk_nvme_ns_get_id(ns);
    8747             : 
    8748           0 :         return spdk_nvme_ctrlr_cmd_io_raw_with_md(ctrlr, qpair, cmd, buf,
    8749           0 :                         (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio);
    8750           0 : }
    8751             : 
    8752             : static int
    8753           0 : bdev_nvme_iov_passthru_md(struct nvme_bdev_io *bio,
    8754             :                           struct spdk_nvme_cmd *cmd, struct iovec *iov, int iovcnt,
    8755             :                           size_t nbytes, void *md_buf, size_t md_len)
    8756             : {
    8757           0 :         struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
    8758           0 :         struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
    8759           0 :         size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(ns);
    8760           0 :         uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
    8761           0 :         struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
    8762             : 
    8763           0 :         bio->iovs = iov;
    8764           0 :         bio->iovcnt = iovcnt;
    8765           0 :         bio->iovpos = 0;
    8766           0 :         bio->iov_offset = 0;
    8767             : 
    8768           0 :         if (nbytes > max_xfer_size) {
    8769           0 :                 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
    8770           0 :                 return -EINVAL;
    8771             :         }
    8772             : 
    8773           0 :         if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(ns)) {
    8774           0 :                 SPDK_ERRLOG("invalid meta data buffer size\n");
    8775           0 :                 return -EINVAL;
    8776             :         }
    8777             : 
    8778             :         /*
    8779             :          * Each NVMe bdev is a specific namespace, and all NVMe I/O commands
    8780             :          * require a nsid, so fill it out automatically.
    8781             :          */
    8782           0 :         cmd->nsid = spdk_nvme_ns_get_id(ns);
    8783             : 
    8784           0 :         return spdk_nvme_ctrlr_cmd_iov_raw_with_md(
    8785           0 :                        ctrlr, qpair, cmd, (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio,
    8786             :                        bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge);
    8787           0 : }
    8788             : 
    8789             : static void
    8790           6 : bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio,
    8791             :                 struct nvme_bdev_io *bio_to_abort)
    8792             : {
    8793             :         struct nvme_io_path *io_path;
    8794           6 :         int rc = 0;
    8795             : 
    8796           6 :         rc = bdev_nvme_abort_retry_io(nbdev_ch, bio_to_abort);
    8797           6 :         if (rc == 0) {
    8798           1 :                 bdev_nvme_admin_complete(bio, 0);
    8799           1 :                 return;
    8800             :         }
    8801             : 
    8802           5 :         io_path = bio_to_abort->io_path;
    8803           5 :         if (io_path != NULL) {
    8804           6 :                 rc = spdk_nvme_ctrlr_cmd_abort_ext(io_path->qpair->ctrlr->ctrlr,
    8805           3 :                                                    io_path->qpair->qpair,
    8806           3 :                                                    bio_to_abort,
    8807           3 :                                                    bdev_nvme_abort_done, bio);
    8808           3 :         } else {
    8809           3 :                 STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
    8810           4 :                         rc = spdk_nvme_ctrlr_cmd_abort_ext(io_path->qpair->ctrlr->ctrlr,
    8811             :                                                            NULL,
    8812           2 :                                                            bio_to_abort,
    8813           2 :                                                            bdev_nvme_abort_done, bio);
    8814             : 
    8815           2 :                         if (rc != -ENOENT) {
    8816           1 :                                 break;
    8817             :                         }
    8818           1 :                 }
    8819             :         }
    8820             : 
    8821           5 :         if (rc != 0) {
    8822             :                 /* If no command was found or there was any error, complete the abort
    8823             :                  * request with failure.
    8824             :                  */
    8825           2 :                 bdev_nvme_admin_complete(bio, rc);
    8826           2 :         }
    8827           6 : }
    8828             : 
    8829             : static int
    8830           0 : bdev_nvme_copy(struct nvme_bdev_io *bio, uint64_t dst_offset_blocks, uint64_t src_offset_blocks,
    8831             :                uint64_t num_blocks)
    8832             : {
    8833           0 :         struct spdk_nvme_scc_source_range range = {
    8834           0 :                 .slba = src_offset_blocks,
    8835           0 :                 .nlb = num_blocks - 1
    8836             :         };
    8837             : 
    8838           0 :         return spdk_nvme_ns_cmd_copy(bio->io_path->nvme_ns->ns,
    8839           0 :                                      bio->io_path->qpair->qpair,
    8840           0 :                                      &range, 1, dst_offset_blocks,
    8841           0 :                                      bdev_nvme_queued_done, bio);
    8842             : }
    8843             : 
    8844             : static void
    8845           0 : bdev_nvme_opts_config_json(struct spdk_json_write_ctx *w)
    8846             : {
    8847             :         const char *action;
    8848             :         uint32_t i;
    8849             : 
    8850           0 :         if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET) {
    8851           0 :                 action = "reset";
    8852           0 :         } else if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT) {
    8853           0 :                 action = "abort";
    8854           0 :         } else {
    8855           0 :                 action = "none";
    8856             :         }
    8857             : 
    8858           0 :         spdk_json_write_object_begin(w);
    8859             : 
    8860           0 :         spdk_json_write_named_string(w, "method", "bdev_nvme_set_options");
    8861             : 
    8862           0 :         spdk_json_write_named_object_begin(w, "params");
    8863           0 :         spdk_json_write_named_string(w, "action_on_timeout", action);
    8864           0 :         spdk_json_write_named_uint64(w, "timeout_us", g_opts.timeout_us);
    8865           0 :         spdk_json_write_named_uint64(w, "timeout_admin_us", g_opts.timeout_admin_us);
    8866           0 :         spdk_json_write_named_uint32(w, "keep_alive_timeout_ms", g_opts.keep_alive_timeout_ms);
    8867           0 :         spdk_json_write_named_uint32(w, "arbitration_burst", g_opts.arbitration_burst);
    8868           0 :         spdk_json_write_named_uint32(w, "low_priority_weight", g_opts.low_priority_weight);
    8869           0 :         spdk_json_write_named_uint32(w, "medium_priority_weight", g_opts.medium_priority_weight);
    8870           0 :         spdk_json_write_named_uint32(w, "high_priority_weight", g_opts.high_priority_weight);
    8871           0 :         spdk_json_write_named_uint64(w, "nvme_adminq_poll_period_us", g_opts.nvme_adminq_poll_period_us);
    8872           0 :         spdk_json_write_named_uint64(w, "nvme_ioq_poll_period_us", g_opts.nvme_ioq_poll_period_us);
    8873           0 :         spdk_json_write_named_uint32(w, "io_queue_requests", g_opts.io_queue_requests);
    8874           0 :         spdk_json_write_named_bool(w, "delay_cmd_submit", g_opts.delay_cmd_submit);
    8875           0 :         spdk_json_write_named_uint32(w, "transport_retry_count", g_opts.transport_retry_count);
    8876           0 :         spdk_json_write_named_int32(w, "bdev_retry_count", g_opts.bdev_retry_count);
    8877           0 :         spdk_json_write_named_uint8(w, "transport_ack_timeout", g_opts.transport_ack_timeout);
    8878           0 :         spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", g_opts.ctrlr_loss_timeout_sec);
    8879           0 :         spdk_json_write_named_uint32(w, "reconnect_delay_sec", g_opts.reconnect_delay_sec);
    8880           0 :         spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec", g_opts.fast_io_fail_timeout_sec);
    8881           0 :         spdk_json_write_named_bool(w, "disable_auto_failback", g_opts.disable_auto_failback);
    8882           0 :         spdk_json_write_named_bool(w, "generate_uuids", g_opts.generate_uuids);
    8883           0 :         spdk_json_write_named_uint8(w, "transport_tos", g_opts.transport_tos);
    8884           0 :         spdk_json_write_named_bool(w, "nvme_error_stat", g_opts.nvme_error_stat);
    8885           0 :         spdk_json_write_named_uint32(w, "rdma_srq_size", g_opts.rdma_srq_size);
    8886           0 :         spdk_json_write_named_bool(w, "io_path_stat", g_opts.io_path_stat);
    8887           0 :         spdk_json_write_named_bool(w, "allow_accel_sequence", g_opts.allow_accel_sequence);
    8888           0 :         spdk_json_write_named_uint32(w, "rdma_max_cq_size", g_opts.rdma_max_cq_size);
    8889           0 :         spdk_json_write_named_uint16(w, "rdma_cm_event_timeout_ms", g_opts.rdma_cm_event_timeout_ms);
    8890           0 :         spdk_json_write_named_array_begin(w, "dhchap_digests");
    8891           0 :         for (i = 0; i < 32; ++i) {
    8892           0 :                 if (g_opts.dhchap_digests & SPDK_BIT(i)) {
    8893           0 :                         spdk_json_write_string(w, spdk_nvme_dhchap_get_digest_name(i));
    8894           0 :                 }
    8895           0 :         }
    8896           0 :         spdk_json_write_array_end(w);
    8897           0 :         spdk_json_write_named_array_begin(w, "dhchap_dhgroups");
    8898           0 :         for (i = 0; i < 32; ++i) {
    8899           0 :                 if (g_opts.dhchap_dhgroups & SPDK_BIT(i)) {
    8900           0 :                         spdk_json_write_string(w, spdk_nvme_dhchap_get_dhgroup_name(i));
    8901           0 :                 }
    8902           0 :         }
    8903             : 
    8904           0 :         spdk_json_write_array_end(w);
    8905           0 :         spdk_json_write_named_bool(w, "rdma_umr_per_io", g_opts.rdma_umr_per_io);
    8906           0 :         spdk_json_write_object_end(w);
    8907             : 
    8908           0 :         spdk_json_write_object_end(w);
    8909           0 : }
    8910             : 
    8911             : static void
    8912           0 : bdev_nvme_discovery_config_json(struct spdk_json_write_ctx *w, struct discovery_ctx *ctx)
    8913             : {
    8914             :         struct spdk_nvme_transport_id trid;
    8915             : 
    8916           0 :         spdk_json_write_object_begin(w);
    8917             : 
    8918           0 :         spdk_json_write_named_string(w, "method", "bdev_nvme_start_discovery");
    8919             : 
    8920           0 :         spdk_json_write_named_object_begin(w, "params");
    8921           0 :         spdk_json_write_named_string(w, "name", ctx->name);
    8922           0 :         spdk_json_write_named_string(w, "hostnqn", ctx->hostnqn);
    8923             : 
    8924           0 :         trid = ctx->trid;
    8925           0 :         memset(trid.subnqn, 0, sizeof(trid.subnqn));
    8926           0 :         nvme_bdev_dump_trid_json(&trid, w);
    8927             : 
    8928           0 :         spdk_json_write_named_bool(w, "wait_for_attach", ctx->wait_for_attach);
    8929           0 :         spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", ctx->bdev_opts.ctrlr_loss_timeout_sec);
    8930           0 :         spdk_json_write_named_uint32(w, "reconnect_delay_sec", ctx->bdev_opts.reconnect_delay_sec);
    8931           0 :         spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec",
    8932           0 :                                      ctx->bdev_opts.fast_io_fail_timeout_sec);
    8933           0 :         spdk_json_write_object_end(w);
    8934             : 
    8935           0 :         spdk_json_write_object_end(w);
    8936           0 : }
    8937             : 
    8938             : #ifdef SPDK_CONFIG_NVME_CUSE
    8939             : static void
    8940             : nvme_ctrlr_cuse_config_json(struct spdk_json_write_ctx *w,
    8941             :                             struct nvme_ctrlr *nvme_ctrlr)
    8942             : {
    8943             :         size_t cuse_name_size = 128;
    8944             :         char cuse_name[cuse_name_size];
    8945             : 
    8946             :         if (spdk_nvme_cuse_get_ctrlr_name(nvme_ctrlr->ctrlr,
    8947             :                                           cuse_name, &cuse_name_size) != 0) {
    8948             :                 return;
    8949             :         }
    8950             : 
    8951             :         spdk_json_write_object_begin(w);
    8952             : 
    8953             :         spdk_json_write_named_string(w, "method", "bdev_nvme_cuse_register");
    8954             : 
    8955             :         spdk_json_write_named_object_begin(w, "params");
    8956             :         spdk_json_write_named_string(w, "name", nvme_ctrlr->nbdev_ctrlr->name);
    8957             :         spdk_json_write_object_end(w);
    8958             : 
    8959             :         spdk_json_write_object_end(w);
    8960             : }
    8961             : #endif
    8962             : 
    8963             : static void
    8964           0 : nvme_ctrlr_config_json(struct spdk_json_write_ctx *w,
    8965             :                        struct nvme_ctrlr *nvme_ctrlr,
    8966             :                        struct nvme_path_id *path_id)
    8967             : {
    8968             :         struct spdk_nvme_transport_id   *trid;
    8969             :         const struct spdk_nvme_ctrlr_opts *opts;
    8970             : 
    8971           0 :         if (nvme_ctrlr->opts.from_discovery_service) {
    8972             :                 /* Do not emit an RPC for this - it will be implicitly
    8973             :                  * covered by a separate bdev_nvme_start_discovery or
    8974             :                  * bdev_nvme_start_mdns_discovery RPC.
    8975             :                  */
    8976           0 :                 return;
    8977             :         }
    8978             : 
    8979           0 :         trid = &path_id->trid;
    8980             : 
    8981           0 :         spdk_json_write_object_begin(w);
    8982             : 
    8983           0 :         spdk_json_write_named_string(w, "method", "bdev_nvme_attach_controller");
    8984             : 
    8985           0 :         spdk_json_write_named_object_begin(w, "params");
    8986           0 :         spdk_json_write_named_string(w, "name", nvme_ctrlr->nbdev_ctrlr->name);
    8987           0 :         nvme_bdev_dump_trid_json(trid, w);
    8988           0 :         spdk_json_write_named_bool(w, "prchk_reftag",
    8989           0 :                                    (nvme_ctrlr->opts.prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) != 0);
    8990           0 :         spdk_json_write_named_bool(w, "prchk_guard",
    8991           0 :                                    (nvme_ctrlr->opts.prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) != 0);
    8992           0 :         spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", nvme_ctrlr->opts.ctrlr_loss_timeout_sec);
    8993           0 :         spdk_json_write_named_uint32(w, "reconnect_delay_sec", nvme_ctrlr->opts.reconnect_delay_sec);
    8994           0 :         spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec",
    8995           0 :                                      nvme_ctrlr->opts.fast_io_fail_timeout_sec);
    8996           0 :         if (nvme_ctrlr->psk != NULL) {
    8997           0 :                 spdk_json_write_named_string(w, "psk", spdk_key_get_name(nvme_ctrlr->psk));
    8998           0 :         }
    8999           0 :         if (nvme_ctrlr->dhchap_key != NULL) {
    9000           0 :                 spdk_json_write_named_string(w, "dhchap_key",
    9001           0 :                                              spdk_key_get_name(nvme_ctrlr->dhchap_key));
    9002           0 :         }
    9003           0 :         if (nvme_ctrlr->dhchap_ctrlr_key != NULL) {
    9004           0 :                 spdk_json_write_named_string(w, "dhchap_ctrlr_key",
    9005           0 :                                              spdk_key_get_name(nvme_ctrlr->dhchap_ctrlr_key));
    9006           0 :         }
    9007           0 :         opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
    9008           0 :         spdk_json_write_named_string(w, "hostnqn", opts->hostnqn);
    9009           0 :         spdk_json_write_named_bool(w, "hdgst", opts->header_digest);
    9010           0 :         spdk_json_write_named_bool(w, "ddgst", opts->data_digest);
    9011           0 :         if (opts->src_addr[0] != '\0') {
    9012           0 :                 spdk_json_write_named_string(w, "hostaddr", opts->src_addr);
    9013           0 :         }
    9014           0 :         if (opts->src_svcid[0] != '\0') {
    9015           0 :                 spdk_json_write_named_string(w, "hostsvcid", opts->src_svcid);
    9016           0 :         }
    9017             : 
    9018           0 :         if (nvme_ctrlr->opts.multipath) {
    9019           0 :                 spdk_json_write_named_string(w, "multipath", "multipath");
    9020           0 :         }
    9021           0 :         spdk_json_write_object_end(w);
    9022             : 
    9023           0 :         spdk_json_write_object_end(w);
    9024           0 : }
    9025             : 
    9026             : static void
    9027           0 : bdev_nvme_hotplug_config_json(struct spdk_json_write_ctx *w)
    9028             : {
    9029           0 :         spdk_json_write_object_begin(w);
    9030           0 :         spdk_json_write_named_string(w, "method", "bdev_nvme_set_hotplug");
    9031             : 
    9032           0 :         spdk_json_write_named_object_begin(w, "params");
    9033           0 :         spdk_json_write_named_uint64(w, "period_us", g_nvme_hotplug_poll_period_us);
    9034           0 :         spdk_json_write_named_bool(w, "enable", g_nvme_hotplug_enabled);
    9035           0 :         spdk_json_write_object_end(w);
    9036             : 
    9037           0 :         spdk_json_write_object_end(w);
    9038           0 : }
    9039             : 
    9040             : static int
    9041           0 : bdev_nvme_config_json(struct spdk_json_write_ctx *w)
    9042             : {
    9043             :         struct nvme_bdev_ctrlr  *nbdev_ctrlr;
    9044             :         struct nvme_ctrlr       *nvme_ctrlr;
    9045             :         struct discovery_ctx    *ctx;
    9046             :         struct nvme_path_id     *path_id;
    9047             : 
    9048           0 :         bdev_nvme_opts_config_json(w);
    9049             : 
    9050           0 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    9051             : 
    9052           0 :         TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
    9053           0 :                 TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
    9054           0 :                         path_id = nvme_ctrlr->active_path_id;
    9055           0 :                         assert(path_id == TAILQ_FIRST(&nvme_ctrlr->trids));
    9056           0 :                         nvme_ctrlr_config_json(w, nvme_ctrlr, path_id);
    9057             : 
    9058           0 :                         path_id = TAILQ_NEXT(path_id, link);
    9059           0 :                         while (path_id != NULL) {
    9060           0 :                                 nvme_ctrlr_config_json(w, nvme_ctrlr, path_id);
    9061           0 :                                 path_id = TAILQ_NEXT(path_id, link);
    9062             :                         }
    9063             : 
    9064             : #ifdef SPDK_CONFIG_NVME_CUSE
    9065             :                         nvme_ctrlr_cuse_config_json(w, nvme_ctrlr);
    9066             : #endif
    9067           0 :                 }
    9068           0 :         }
    9069             : 
    9070           0 :         TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    9071           0 :                 if (!ctx->from_mdns_discovery_service) {
    9072           0 :                         bdev_nvme_discovery_config_json(w, ctx);
    9073           0 :                 }
    9074           0 :         }
    9075             : 
    9076           0 :         bdev_nvme_mdns_discovery_config_json(w);
    9077             : 
    9078             :         /* Dump as last parameter to give all NVMe bdevs chance to be constructed
    9079             :          * before enabling hotplug poller.
    9080             :          */
    9081           0 :         bdev_nvme_hotplug_config_json(w);
    9082             : 
    9083           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    9084           0 :         return 0;
    9085             : }
    9086             : 
    9087             : struct spdk_nvme_ctrlr *
    9088           1 : bdev_nvme_get_ctrlr(struct spdk_bdev *bdev)
    9089             : {
    9090             :         struct nvme_bdev *nbdev;
    9091             :         struct nvme_ns *nvme_ns;
    9092             : 
    9093           1 :         if (!bdev || bdev->module != &nvme_if) {
    9094           0 :                 return NULL;
    9095             :         }
    9096             : 
    9097           1 :         nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
    9098           1 :         nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
    9099           1 :         assert(nvme_ns != NULL);
    9100             : 
    9101           1 :         return nvme_ns->ctrlr->ctrlr;
    9102           1 : }
    9103             : 
    9104             : static bool
    9105          12 : nvme_io_path_is_current(struct nvme_io_path *io_path)
    9106             : {
    9107             :         const struct nvme_bdev_channel *nbdev_ch;
    9108             :         bool current;
    9109             : 
    9110          12 :         if (!nvme_io_path_is_available(io_path)) {
    9111           4 :                 return false;
    9112             :         }
    9113             : 
    9114           8 :         nbdev_ch = io_path->nbdev_ch;
    9115           8 :         if (nbdev_ch == NULL) {
    9116           1 :                 current = false;
    9117           8 :         } else if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
    9118           3 :                 struct nvme_io_path *optimized_io_path = NULL;
    9119             : 
    9120           6 :                 STAILQ_FOREACH(optimized_io_path, &nbdev_ch->io_path_list, stailq) {
    9121           5 :                         if (optimized_io_path->nvme_ns->ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE) {
    9122           2 :                                 break;
    9123             :                         }
    9124           3 :                 }
    9125             : 
    9126             :                 /* A non-optimized path is only current if there are no optimized paths. */
    9127           3 :                 current = (io_path->nvme_ns->ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE) ||
    9128           2 :                           (optimized_io_path == NULL);
    9129           3 :         } else {
    9130           4 :                 if (nbdev_ch->current_io_path) {
    9131           1 :                         current = (io_path == nbdev_ch->current_io_path);
    9132           1 :                 } else {
    9133             :                         struct nvme_io_path *first_path;
    9134             : 
    9135             :                         /* We arrived here as there are no optimized paths for active-passive
    9136             :                          * mode. Check if this io_path is the first one available on the list.
    9137             :                          */
    9138           3 :                         current = false;
    9139           3 :                         STAILQ_FOREACH(first_path, &nbdev_ch->io_path_list, stailq) {
    9140           3 :                                 if (nvme_io_path_is_available(first_path)) {
    9141           3 :                                         current = (io_path == first_path);
    9142           3 :                                         break;
    9143             :                                 }
    9144           0 :                         }
    9145             :                 }
    9146             :         }
    9147             : 
    9148           8 :         return current;
    9149          12 : }
    9150             : 
    9151             : static struct nvme_ctrlr *
    9152           0 : bdev_nvme_next_ctrlr_unsafe(struct nvme_bdev_ctrlr *nbdev_ctrlr, struct nvme_ctrlr *prev)
    9153             : {
    9154             :         struct nvme_ctrlr *next;
    9155             : 
    9156             :         /* Must be called under g_bdev_nvme_mutex */
    9157           0 :         next = prev != NULL ? TAILQ_NEXT(prev, tailq) : TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
    9158           0 :         while (next != NULL) {
    9159             :                 /* ref can be 0 when the ctrlr was released, but hasn't been detached yet */
    9160           0 :                 pthread_mutex_lock(&next->mutex);
    9161           0 :                 if (next->ref > 0) {
    9162           0 :                         next->ref++;
    9163           0 :                         pthread_mutex_unlock(&next->mutex);
    9164           0 :                         return next;
    9165             :                 }
    9166             : 
    9167           0 :                 pthread_mutex_unlock(&next->mutex);
    9168           0 :                 next = TAILQ_NEXT(next, tailq);
    9169             :         }
    9170             : 
    9171           0 :         return NULL;
    9172           0 : }
    9173             : 
    9174             : struct bdev_nvme_set_keys_ctx {
    9175             :         struct nvme_ctrlr       *nctrlr;
    9176             :         struct spdk_key         *dhchap_key;
    9177             :         struct spdk_key         *dhchap_ctrlr_key;
    9178             :         struct spdk_thread      *thread;
    9179             :         bdev_nvme_set_keys_cb   cb_fn;
    9180             :         void                    *cb_ctx;
    9181             :         int                     status;
    9182             : };
    9183             : 
    9184             : static void
    9185           0 : bdev_nvme_free_set_keys_ctx(struct bdev_nvme_set_keys_ctx *ctx)
    9186             : {
    9187           0 :         if (ctx == NULL) {
    9188           0 :                 return;
    9189             :         }
    9190             : 
    9191           0 :         spdk_keyring_put_key(ctx->dhchap_key);
    9192           0 :         spdk_keyring_put_key(ctx->dhchap_ctrlr_key);
    9193           0 :         free(ctx);
    9194           0 : }
    9195             : 
    9196             : static void
    9197           0 : _bdev_nvme_set_keys_done(void *_ctx)
    9198             : {
    9199           0 :         struct bdev_nvme_set_keys_ctx *ctx = _ctx;
    9200             : 
    9201           0 :         ctx->cb_fn(ctx->cb_ctx, ctx->status);
    9202             : 
    9203           0 :         if (ctx->nctrlr != NULL) {
    9204           0 :                 nvme_ctrlr_put_ref(ctx->nctrlr);
    9205           0 :         }
    9206           0 :         bdev_nvme_free_set_keys_ctx(ctx);
    9207           0 : }
    9208             : 
    9209             : static void
    9210           0 : bdev_nvme_set_keys_done(struct bdev_nvme_set_keys_ctx *ctx, int status)
    9211             : {
    9212           0 :         ctx->status = status;
    9213           0 :         spdk_thread_exec_msg(ctx->thread, _bdev_nvme_set_keys_done, ctx);
    9214           0 : }
    9215             : 
    9216             : static void bdev_nvme_authenticate_ctrlr(struct bdev_nvme_set_keys_ctx *ctx);
    9217             : 
    9218             : static void
    9219           0 : bdev_nvme_authenticate_ctrlr_continue(struct bdev_nvme_set_keys_ctx *ctx)
    9220             : {
    9221             :         struct nvme_ctrlr *next;
    9222             : 
    9223           0 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    9224           0 :         next = bdev_nvme_next_ctrlr_unsafe(NULL, ctx->nctrlr);
    9225           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    9226             : 
    9227           0 :         nvme_ctrlr_put_ref(ctx->nctrlr);
    9228           0 :         ctx->nctrlr = next;
    9229             : 
    9230           0 :         if (next == NULL) {
    9231           0 :                 bdev_nvme_set_keys_done(ctx, 0);
    9232           0 :         } else {
    9233           0 :                 bdev_nvme_authenticate_ctrlr(ctx);
    9234             :         }
    9235           0 : }
    9236             : 
    9237             : static void
    9238           0 : bdev_nvme_authenticate_qpairs_done(struct spdk_io_channel_iter *i, int status)
    9239             : {
    9240           0 :         struct bdev_nvme_set_keys_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
    9241             : 
    9242           0 :         if (status != 0) {
    9243           0 :                 bdev_nvme_set_keys_done(ctx, status);
    9244           0 :                 return;
    9245             :         }
    9246           0 :         bdev_nvme_authenticate_ctrlr_continue(ctx);
    9247           0 : }
    9248             : 
    9249             : static void
    9250           0 : bdev_nvme_authenticate_qpair_done(void *ctx, int status)
    9251             : {
    9252           0 :         spdk_for_each_channel_continue(ctx, status);
    9253           0 : }
    9254             : 
    9255             : static void
    9256           0 : bdev_nvme_authenticate_qpair(struct spdk_io_channel_iter *i)
    9257             : {
    9258           0 :         struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
    9259           0 :         struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(ch);
    9260           0 :         struct nvme_qpair *qpair = ctrlr_ch->qpair;
    9261             :         int rc;
    9262             : 
    9263           0 :         if (!nvme_qpair_is_connected(qpair)) {
    9264           0 :                 spdk_for_each_channel_continue(i, 0);
    9265           0 :                 return;
    9266             :         }
    9267             : 
    9268           0 :         rc = spdk_nvme_qpair_authenticate(qpair->qpair, bdev_nvme_authenticate_qpair_done, i);
    9269           0 :         if (rc != 0) {
    9270           0 :                 spdk_for_each_channel_continue(i, rc);
    9271           0 :         }
    9272           0 : }
    9273             : 
    9274             : static void
    9275           0 : bdev_nvme_authenticate_ctrlr_done(void *_ctx, int status)
    9276             : {
    9277           0 :         struct bdev_nvme_set_keys_ctx *ctx = _ctx;
    9278             : 
    9279           0 :         if (status != 0) {
    9280           0 :                 bdev_nvme_set_keys_done(ctx, status);
    9281           0 :                 return;
    9282             :         }
    9283             : 
    9284           0 :         spdk_for_each_channel(ctx->nctrlr, bdev_nvme_authenticate_qpair, ctx,
    9285             :                               bdev_nvme_authenticate_qpairs_done);
    9286           0 : }
    9287             : 
    9288             : static void
    9289           0 : bdev_nvme_authenticate_ctrlr(struct bdev_nvme_set_keys_ctx *ctx)
    9290             : {
    9291           0 :         struct spdk_nvme_ctrlr_key_opts opts = {};
    9292           0 :         struct nvme_ctrlr *nctrlr = ctx->nctrlr;
    9293             :         int rc;
    9294             : 
    9295           0 :         opts.size = SPDK_SIZEOF(&opts, dhchap_ctrlr_key);
    9296           0 :         opts.dhchap_key = ctx->dhchap_key;
    9297           0 :         opts.dhchap_ctrlr_key = ctx->dhchap_ctrlr_key;
    9298           0 :         rc = spdk_nvme_ctrlr_set_keys(nctrlr->ctrlr, &opts);
    9299           0 :         if (rc != 0) {
    9300           0 :                 bdev_nvme_set_keys_done(ctx, rc);
    9301           0 :                 return;
    9302             :         }
    9303             : 
    9304           0 :         if (ctx->dhchap_key != NULL) {
    9305           0 :                 rc = spdk_nvme_ctrlr_authenticate(nctrlr->ctrlr,
    9306           0 :                                                   bdev_nvme_authenticate_ctrlr_done, ctx);
    9307           0 :                 if (rc != 0) {
    9308           0 :                         bdev_nvme_set_keys_done(ctx, rc);
    9309           0 :                 }
    9310           0 :         } else {
    9311           0 :                 bdev_nvme_authenticate_ctrlr_continue(ctx);
    9312             :         }
    9313           0 : }
    9314             : 
    9315             : int
    9316           0 : bdev_nvme_set_keys(const char *name, const char *dhchap_key, const char *dhchap_ctrlr_key,
    9317             :                    bdev_nvme_set_keys_cb cb_fn, void *cb_ctx)
    9318             : {
    9319             :         struct bdev_nvme_set_keys_ctx *ctx;
    9320             :         struct nvme_bdev_ctrlr *nbdev_ctrlr;
    9321             :         struct nvme_ctrlr *nctrlr;
    9322             : 
    9323           0 :         ctx = calloc(1, sizeof(*ctx));
    9324           0 :         if (ctx == NULL) {
    9325           0 :                 return -ENOMEM;
    9326             :         }
    9327             : 
    9328           0 :         if (dhchap_key != NULL) {
    9329           0 :                 ctx->dhchap_key = spdk_keyring_get_key(dhchap_key);
    9330           0 :                 if (ctx->dhchap_key == NULL) {
    9331           0 :                         SPDK_ERRLOG("Could not find key %s for bdev %s\n", dhchap_key, name);
    9332           0 :                         bdev_nvme_free_set_keys_ctx(ctx);
    9333           0 :                         return -ENOKEY;
    9334             :                 }
    9335           0 :         }
    9336           0 :         if (dhchap_ctrlr_key != NULL) {
    9337           0 :                 ctx->dhchap_ctrlr_key = spdk_keyring_get_key(dhchap_ctrlr_key);
    9338           0 :                 if (ctx->dhchap_ctrlr_key == NULL) {
    9339           0 :                         SPDK_ERRLOG("Could not find key %s for bdev %s\n", dhchap_ctrlr_key, name);
    9340           0 :                         bdev_nvme_free_set_keys_ctx(ctx);
    9341           0 :                         return -ENOKEY;
    9342             :                 }
    9343           0 :         }
    9344             : 
    9345           0 :         pthread_mutex_lock(&g_bdev_nvme_mutex);
    9346           0 :         nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
    9347           0 :         if (nbdev_ctrlr == NULL) {
    9348           0 :                 SPDK_ERRLOG("Could not find bdev_ctrlr %s\n", name);
    9349           0 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    9350           0 :                 bdev_nvme_free_set_keys_ctx(ctx);
    9351           0 :                 return -ENODEV;
    9352             :         }
    9353           0 :         nctrlr = bdev_nvme_next_ctrlr_unsafe(nbdev_ctrlr, NULL);
    9354           0 :         if (nctrlr == NULL) {
    9355           0 :                 SPDK_ERRLOG("Could not find any nvme_ctrlrs on bdev_ctrlr %s\n", name);
    9356           0 :                 pthread_mutex_unlock(&g_bdev_nvme_mutex);
    9357           0 :                 bdev_nvme_free_set_keys_ctx(ctx);
    9358           0 :                 return -ENODEV;
    9359             :         }
    9360           0 :         pthread_mutex_unlock(&g_bdev_nvme_mutex);
    9361             : 
    9362           0 :         ctx->nctrlr = nctrlr;
    9363           0 :         ctx->cb_fn = cb_fn;
    9364           0 :         ctx->cb_ctx = cb_ctx;
    9365           0 :         ctx->thread = spdk_get_thread();
    9366             : 
    9367           0 :         bdev_nvme_authenticate_ctrlr(ctx);
    9368             : 
    9369           0 :         return 0;
    9370           0 : }
    9371             : 
    9372             : void
    9373           0 : nvme_io_path_info_json(struct spdk_json_write_ctx *w, struct nvme_io_path *io_path)
    9374             : {
    9375           0 :         struct nvme_ns *nvme_ns = io_path->nvme_ns;
    9376           0 :         struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
    9377             :         const struct spdk_nvme_ctrlr_data *cdata;
    9378             :         const struct spdk_nvme_transport_id *trid;
    9379             :         const char *adrfam_str;
    9380             : 
    9381           0 :         spdk_json_write_object_begin(w);
    9382             : 
    9383           0 :         spdk_json_write_named_string(w, "bdev_name", nvme_ns->bdev->disk.name);
    9384             : 
    9385           0 :         cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
    9386           0 :         trid = spdk_nvme_ctrlr_get_transport_id(nvme_ctrlr->ctrlr);
    9387             : 
    9388           0 :         spdk_json_write_named_uint32(w, "cntlid", cdata->cntlid);
    9389           0 :         spdk_json_write_named_bool(w, "current", nvme_io_path_is_current(io_path));
    9390           0 :         spdk_json_write_named_bool(w, "connected", nvme_qpair_is_connected(io_path->qpair));
    9391           0 :         spdk_json_write_named_bool(w, "accessible", nvme_ns_is_accessible(nvme_ns));
    9392             : 
    9393           0 :         spdk_json_write_named_object_begin(w, "transport");
    9394           0 :         spdk_json_write_named_string(w, "trtype", trid->trstring);
    9395           0 :         spdk_json_write_named_string(w, "traddr", trid->traddr);
    9396           0 :         if (trid->trsvcid[0] != '\0') {
    9397           0 :                 spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
    9398           0 :         }
    9399           0 :         adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
    9400           0 :         if (adrfam_str) {
    9401           0 :                 spdk_json_write_named_string(w, "adrfam", adrfam_str);
    9402           0 :         }
    9403           0 :         spdk_json_write_object_end(w);
    9404             : 
    9405           0 :         spdk_json_write_object_end(w);
    9406           0 : }
    9407             : 
    9408             : void
    9409           0 : bdev_nvme_get_discovery_info(struct spdk_json_write_ctx *w)
    9410             : {
    9411             :         struct discovery_ctx *ctx;
    9412             :         struct discovery_entry_ctx *entry_ctx;
    9413             : 
    9414           0 :         spdk_json_write_array_begin(w);
    9415           0 :         TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
    9416           0 :                 spdk_json_write_object_begin(w);
    9417           0 :                 spdk_json_write_named_string(w, "name", ctx->name);
    9418             : 
    9419           0 :                 spdk_json_write_named_object_begin(w, "trid");
    9420           0 :                 nvme_bdev_dump_trid_json(&ctx->trid, w);
    9421           0 :                 spdk_json_write_object_end(w);
    9422             : 
    9423           0 :                 spdk_json_write_named_array_begin(w, "referrals");
    9424           0 :                 TAILQ_FOREACH(entry_ctx, &ctx->discovery_entry_ctxs, tailq) {
    9425           0 :                         spdk_json_write_object_begin(w);
    9426           0 :                         spdk_json_write_named_object_begin(w, "trid");
    9427           0 :                         nvme_bdev_dump_trid_json(&entry_ctx->trid, w);
    9428           0 :                         spdk_json_write_object_end(w);
    9429           0 :                         spdk_json_write_object_end(w);
    9430           0 :                 }
    9431           0 :                 spdk_json_write_array_end(w);
    9432             : 
    9433           0 :                 spdk_json_write_object_end(w);
    9434           0 :         }
    9435           0 :         spdk_json_write_array_end(w);
    9436           0 : }
    9437             : 
    9438           1 : SPDK_LOG_REGISTER_COMPONENT(bdev_nvme)
    9439             : 
    9440             : static void
    9441           0 : bdev_nvme_trace(void)
    9442             : {
    9443           0 :         struct spdk_trace_tpoint_opts opts[] = {
    9444             :                 {
    9445             :                         "BDEV_NVME_IO_START", TRACE_BDEV_NVME_IO_START,
    9446             :                         OWNER_TYPE_NONE, OBJECT_BDEV_NVME_IO, 1,
    9447             :                         {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }}
    9448             :                 },
    9449             :                 {
    9450             :                         "BDEV_NVME_IO_DONE", TRACE_BDEV_NVME_IO_DONE,
    9451             :                         OWNER_TYPE_NONE, OBJECT_BDEV_NVME_IO, 0,
    9452             :                         {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }}
    9453             :                 }
    9454             :         };
    9455             : 
    9456             : 
    9457           0 :         spdk_trace_register_object(OBJECT_BDEV_NVME_IO, 'N');
    9458           0 :         spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
    9459           0 :         spdk_trace_tpoint_register_relation(TRACE_NVME_PCIE_SUBMIT, OBJECT_BDEV_NVME_IO, 0);
    9460           0 :         spdk_trace_tpoint_register_relation(TRACE_NVME_TCP_SUBMIT, OBJECT_BDEV_NVME_IO, 0);
    9461           0 :         spdk_trace_tpoint_register_relation(TRACE_NVME_PCIE_COMPLETE, OBJECT_BDEV_NVME_IO, 0);
    9462           0 :         spdk_trace_tpoint_register_relation(TRACE_NVME_TCP_COMPLETE, OBJECT_BDEV_NVME_IO, 0);
    9463           0 : }
    9464           1 : SPDK_TRACE_REGISTER_FN(bdev_nvme_trace, "bdev_nvme", TRACE_GROUP_BDEV_NVME)

Generated by: LCOV version 1.15