Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2016 Intel Corporation. All rights reserved.
3 : * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4 : * Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5 : * Copyright (c) 2022 Dell Inc, or its subsidiaries. All rights reserved.
6 : */
7 :
8 : #include "spdk/stdinc.h"
9 :
10 : #include "bdev_nvme.h"
11 :
12 : #include "spdk/accel.h"
13 : #include "spdk/config.h"
14 : #include "spdk/endian.h"
15 : #include "spdk/bdev.h"
16 : #include "spdk/json.h"
17 : #include "spdk/keyring.h"
18 : #include "spdk/likely.h"
19 : #include "spdk/nvme.h"
20 : #include "spdk/nvme_ocssd.h"
21 : #include "spdk/nvme_zns.h"
22 : #include "spdk/opal.h"
23 : #include "spdk/thread.h"
24 : #include "spdk/trace.h"
25 : #include "spdk/string.h"
26 : #include "spdk/util.h"
27 : #include "spdk/uuid.h"
28 :
29 : #include "spdk/bdev_module.h"
30 : #include "spdk/log.h"
31 :
32 : #include "spdk_internal/usdt.h"
33 : #include "spdk_internal/trace_defs.h"
34 :
35 : #define SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT true
36 : #define SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS (10000)
37 :
38 : #define NSID_STR_LEN 10
39 :
40 : #define SPDK_CONTROLLER_NAME_MAX 512
41 :
42 : static int bdev_nvme_config_json(struct spdk_json_write_ctx *w);
43 :
44 : struct nvme_bdev_io {
45 : /** array of iovecs to transfer. */
46 : struct iovec *iovs;
47 :
48 : /** Number of iovecs in iovs array. */
49 : int iovcnt;
50 :
51 : /** Current iovec position. */
52 : int iovpos;
53 :
54 : /** Offset in current iovec. */
55 : uint32_t iov_offset;
56 :
57 : /** I/O path the current I/O or admin passthrough is submitted on, or the I/O path
58 : * being reset in a reset I/O.
59 : */
60 : struct nvme_io_path *io_path;
61 :
62 : /** array of iovecs to transfer. */
63 : struct iovec *fused_iovs;
64 :
65 : /** Number of iovecs in iovs array. */
66 : int fused_iovcnt;
67 :
68 : /** Current iovec position. */
69 : int fused_iovpos;
70 :
71 : /** Offset in current iovec. */
72 : uint32_t fused_iov_offset;
73 :
74 : /** Saved status for admin passthru completion event, PI error verification, or intermediate compare-and-write status */
75 : struct spdk_nvme_cpl cpl;
76 :
77 : /** Extended IO opts passed by the user to bdev layer and mapped to NVME format */
78 : struct spdk_nvme_ns_cmd_ext_io_opts ext_opts;
79 :
80 : /** Keeps track if first of fused commands was submitted */
81 : bool first_fused_submitted;
82 :
83 : /** Keeps track if first of fused commands was completed */
84 : bool first_fused_completed;
85 :
86 : /** Temporary pointer to zone report buffer */
87 : struct spdk_nvme_zns_zone_report *zone_report_buf;
88 :
89 : /** Keep track of how many zones that have been copied to the spdk_bdev_zone_info struct */
90 : uint64_t handled_zones;
91 :
92 : /** Expiration value in ticks to retry the current I/O. */
93 : uint64_t retry_ticks;
94 :
95 : /* How many times the current I/O was retried. */
96 : int32_t retry_count;
97 :
98 : /* Current tsc at submit time. */
99 : uint64_t submit_tsc;
100 : };
101 :
102 : struct nvme_probe_skip_entry {
103 : struct spdk_nvme_transport_id trid;
104 : TAILQ_ENTRY(nvme_probe_skip_entry) tailq;
105 : };
106 : /* All the controllers deleted by users via RPC are skipped by hotplug monitor */
107 : static TAILQ_HEAD(, nvme_probe_skip_entry) g_skipped_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER(
108 : g_skipped_nvme_ctrlrs);
109 :
110 : #define BDEV_NVME_DEFAULT_DIGESTS (SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA256) | \
111 : SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA384) | \
112 : SPDK_BIT(SPDK_NVMF_DHCHAP_HASH_SHA512))
113 :
114 : #define BDEV_NVME_DEFAULT_DHGROUPS (SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_NULL) | \
115 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_2048) | \
116 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_3072) | \
117 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_4096) | \
118 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_6144) | \
119 : SPDK_BIT(SPDK_NVMF_DHCHAP_DHGROUP_8192))
120 :
121 : static struct spdk_bdev_nvme_opts g_opts = {
122 : .action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE,
123 : .timeout_us = 0,
124 : .timeout_admin_us = 0,
125 : .keep_alive_timeout_ms = SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS,
126 : .transport_retry_count = 4,
127 : .arbitration_burst = 0,
128 : .low_priority_weight = 0,
129 : .medium_priority_weight = 0,
130 : .high_priority_weight = 0,
131 : .nvme_adminq_poll_period_us = 10000ULL,
132 : .nvme_ioq_poll_period_us = 0,
133 : .io_queue_requests = 0,
134 : .delay_cmd_submit = SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT,
135 : .bdev_retry_count = 3,
136 : .transport_ack_timeout = 0,
137 : .ctrlr_loss_timeout_sec = 0,
138 : .reconnect_delay_sec = 0,
139 : .fast_io_fail_timeout_sec = 0,
140 : .disable_auto_failback = false,
141 : .generate_uuids = false,
142 : .transport_tos = 0,
143 : .nvme_error_stat = false,
144 : .io_path_stat = false,
145 : .allow_accel_sequence = false,
146 : .dhchap_digests = BDEV_NVME_DEFAULT_DIGESTS,
147 : .dhchap_dhgroups = BDEV_NVME_DEFAULT_DHGROUPS,
148 : };
149 :
150 : #define NVME_HOTPLUG_POLL_PERIOD_MAX 10000000ULL
151 : #define NVME_HOTPLUG_POLL_PERIOD_DEFAULT 100000ULL
152 :
153 : static int g_hot_insert_nvme_controller_index = 0;
154 : static uint64_t g_nvme_hotplug_poll_period_us = NVME_HOTPLUG_POLL_PERIOD_DEFAULT;
155 : static bool g_nvme_hotplug_enabled = false;
156 : struct spdk_thread *g_bdev_nvme_init_thread;
157 : static struct spdk_poller *g_hotplug_poller;
158 : static struct spdk_poller *g_hotplug_probe_poller;
159 : static struct spdk_nvme_probe_ctx *g_hotplug_probe_ctx;
160 :
161 : static void nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr,
162 : struct nvme_async_probe_ctx *ctx);
163 : static void nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
164 : struct nvme_async_probe_ctx *ctx);
165 : static int bdev_nvme_library_init(void);
166 : static void bdev_nvme_library_fini(void);
167 : static void _bdev_nvme_submit_request(struct nvme_bdev_channel *nbdev_ch,
168 : struct spdk_bdev_io *bdev_io);
169 : static void bdev_nvme_submit_request(struct spdk_io_channel *ch,
170 : struct spdk_bdev_io *bdev_io);
171 : static int bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
172 : void *md, uint64_t lba_count, uint64_t lba,
173 : uint32_t flags, struct spdk_memory_domain *domain, void *domain_ctx,
174 : struct spdk_accel_sequence *seq);
175 : static int bdev_nvme_no_pi_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
176 : void *md, uint64_t lba_count, uint64_t lba);
177 : static int bdev_nvme_writev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
178 : void *md, uint64_t lba_count, uint64_t lba,
179 : uint32_t flags, struct spdk_memory_domain *domain, void *domain_ctx,
180 : struct spdk_accel_sequence *seq,
181 : union spdk_bdev_nvme_cdw12 cdw12, union spdk_bdev_nvme_cdw13 cdw13);
182 : static int bdev_nvme_zone_appendv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
183 : void *md, uint64_t lba_count,
184 : uint64_t zslba, uint32_t flags);
185 : static int bdev_nvme_comparev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
186 : void *md, uint64_t lba_count, uint64_t lba,
187 : uint32_t flags);
188 : static int bdev_nvme_comparev_and_writev(struct nvme_bdev_io *bio,
189 : struct iovec *cmp_iov, int cmp_iovcnt, struct iovec *write_iov,
190 : int write_iovcnt, void *md, uint64_t lba_count, uint64_t lba,
191 : uint32_t flags);
192 : static int bdev_nvme_get_zone_info(struct nvme_bdev_io *bio, uint64_t zone_id,
193 : uint32_t num_zones, struct spdk_bdev_zone_info *info);
194 : static int bdev_nvme_zone_management(struct nvme_bdev_io *bio, uint64_t zone_id,
195 : enum spdk_bdev_zone_action action);
196 : static void bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch,
197 : struct nvme_bdev_io *bio,
198 : struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes);
199 : static int bdev_nvme_io_passthru(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
200 : void *buf, size_t nbytes);
201 : static int bdev_nvme_io_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
202 : void *buf, size_t nbytes, void *md_buf, size_t md_len);
203 : static int bdev_nvme_iov_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
204 : struct iovec *iov, int iovcnt, size_t nbytes,
205 : void *md_buf, size_t md_len);
206 : static void bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch,
207 : struct nvme_bdev_io *bio, struct nvme_bdev_io *bio_to_abort);
208 : static void bdev_nvme_reset_io(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio);
209 : static int bdev_nvme_reset_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
210 : static int bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
211 : static void remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr);
212 : static int nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr);
213 :
214 : static struct nvme_ns *nvme_ns_alloc(void);
215 : static void nvme_ns_free(struct nvme_ns *ns);
216 :
217 : static int
218 173 : nvme_ns_cmp(struct nvme_ns *ns1, struct nvme_ns *ns2)
219 : {
220 173 : return ns1->id < ns2->id ? -1 : ns1->id > ns2->id;
221 : }
222 :
223 902 : RB_GENERATE_STATIC(nvme_ns_tree, nvme_ns, node, nvme_ns_cmp);
224 :
225 : struct spdk_nvme_qpair *
226 1 : bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch)
227 : {
228 : struct nvme_ctrlr_channel *ctrlr_ch;
229 :
230 1 : assert(ctrlr_io_ch != NULL);
231 :
232 1 : ctrlr_ch = spdk_io_channel_get_ctx(ctrlr_io_ch);
233 :
234 1 : return ctrlr_ch->qpair->qpair;
235 : }
236 :
237 : static int
238 0 : bdev_nvme_get_ctx_size(void)
239 : {
240 0 : return sizeof(struct nvme_bdev_io);
241 : }
242 :
243 : static struct spdk_bdev_module nvme_if = {
244 : .name = "nvme",
245 : .async_fini = true,
246 : .module_init = bdev_nvme_library_init,
247 : .module_fini = bdev_nvme_library_fini,
248 : .config_json = bdev_nvme_config_json,
249 : .get_ctx_size = bdev_nvme_get_ctx_size,
250 :
251 : };
252 1 : SPDK_BDEV_MODULE_REGISTER(nvme, &nvme_if)
253 :
254 : struct nvme_bdev_ctrlrs g_nvme_bdev_ctrlrs = TAILQ_HEAD_INITIALIZER(g_nvme_bdev_ctrlrs);
255 : pthread_mutex_t g_bdev_nvme_mutex = PTHREAD_MUTEX_INITIALIZER;
256 : bool g_bdev_nvme_module_finish;
257 :
258 : struct nvme_bdev_ctrlr *
259 270 : nvme_bdev_ctrlr_get_by_name(const char *name)
260 : {
261 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
262 :
263 270 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
264 148 : if (strcmp(name, nbdev_ctrlr->name) == 0) {
265 148 : break;
266 : }
267 : }
268 :
269 270 : return nbdev_ctrlr;
270 : }
271 :
272 : static struct nvme_ctrlr *
273 58 : nvme_bdev_ctrlr_get_ctrlr(struct nvme_bdev_ctrlr *nbdev_ctrlr,
274 : const struct spdk_nvme_transport_id *trid)
275 : {
276 : struct nvme_ctrlr *nvme_ctrlr;
277 :
278 99 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
279 74 : if (spdk_nvme_transport_id_compare(trid, &nvme_ctrlr->active_path_id->trid) == 0) {
280 33 : break;
281 : }
282 : }
283 :
284 58 : return nvme_ctrlr;
285 : }
286 :
287 : struct nvme_ctrlr *
288 0 : nvme_bdev_ctrlr_get_ctrlr_by_id(struct nvme_bdev_ctrlr *nbdev_ctrlr,
289 : uint16_t cntlid)
290 : {
291 : struct nvme_ctrlr *nvme_ctrlr;
292 : const struct spdk_nvme_ctrlr_data *cdata;
293 :
294 0 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
295 0 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
296 0 : if (cdata->cntlid == cntlid) {
297 0 : break;
298 : }
299 : }
300 :
301 0 : return nvme_ctrlr;
302 : }
303 :
304 : static struct nvme_bdev *
305 72 : nvme_bdev_ctrlr_get_bdev(struct nvme_bdev_ctrlr *nbdev_ctrlr, uint32_t nsid)
306 : {
307 : struct nvme_bdev *bdev;
308 :
309 72 : pthread_mutex_lock(&g_bdev_nvme_mutex);
310 106 : TAILQ_FOREACH(bdev, &nbdev_ctrlr->bdevs, tailq) {
311 68 : if (bdev->nsid == nsid) {
312 34 : break;
313 : }
314 : }
315 72 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
316 :
317 72 : return bdev;
318 : }
319 :
320 : struct nvme_ns *
321 139 : nvme_ctrlr_get_ns(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid)
322 : {
323 139 : struct nvme_ns ns;
324 :
325 139 : assert(nsid > 0);
326 :
327 139 : ns.id = nsid;
328 139 : return RB_FIND(nvme_ns_tree, &nvme_ctrlr->namespaces, &ns);
329 : }
330 :
331 : struct nvme_ns *
332 152 : nvme_ctrlr_get_first_active_ns(struct nvme_ctrlr *nvme_ctrlr)
333 : {
334 152 : return RB_MIN(nvme_ns_tree, &nvme_ctrlr->namespaces);
335 : }
336 :
337 : struct nvme_ns *
338 63 : nvme_ctrlr_get_next_active_ns(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *ns)
339 : {
340 63 : if (ns == NULL) {
341 0 : return NULL;
342 : }
343 :
344 63 : return RB_NEXT(nvme_ns_tree, &nvme_ctrlr->namespaces, ns);
345 : }
346 :
347 : static struct nvme_ctrlr *
348 51 : nvme_ctrlr_get(const struct spdk_nvme_transport_id *trid)
349 : {
350 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
351 51 : struct nvme_ctrlr *nvme_ctrlr = NULL;
352 :
353 51 : pthread_mutex_lock(&g_bdev_nvme_mutex);
354 70 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
355 19 : nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, trid);
356 19 : if (nvme_ctrlr != NULL) {
357 0 : break;
358 : }
359 : }
360 51 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
361 :
362 51 : return nvme_ctrlr;
363 : }
364 :
365 : struct nvme_ctrlr *
366 71 : nvme_ctrlr_get_by_name(const char *name)
367 : {
368 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
369 71 : struct nvme_ctrlr *nvme_ctrlr = NULL;
370 :
371 71 : if (name == NULL) {
372 0 : return NULL;
373 : }
374 :
375 71 : pthread_mutex_lock(&g_bdev_nvme_mutex);
376 71 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
377 71 : if (nbdev_ctrlr != NULL) {
378 40 : nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
379 : }
380 71 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
381 :
382 71 : return nvme_ctrlr;
383 : }
384 :
385 : void
386 0 : nvme_bdev_ctrlr_for_each(nvme_bdev_ctrlr_for_each_fn fn, void *ctx)
387 : {
388 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
389 :
390 0 : pthread_mutex_lock(&g_bdev_nvme_mutex);
391 0 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
392 0 : fn(nbdev_ctrlr, ctx);
393 : }
394 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
395 0 : }
396 :
397 : void
398 0 : nvme_bdev_dump_trid_json(const struct spdk_nvme_transport_id *trid, struct spdk_json_write_ctx *w)
399 : {
400 : const char *trtype_str;
401 : const char *adrfam_str;
402 :
403 0 : trtype_str = spdk_nvme_transport_id_trtype_str(trid->trtype);
404 0 : if (trtype_str) {
405 0 : spdk_json_write_named_string(w, "trtype", trtype_str);
406 : }
407 :
408 0 : adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
409 0 : if (adrfam_str) {
410 0 : spdk_json_write_named_string(w, "adrfam", adrfam_str);
411 : }
412 :
413 0 : if (trid->traddr[0] != '\0') {
414 0 : spdk_json_write_named_string(w, "traddr", trid->traddr);
415 : }
416 :
417 0 : if (trid->trsvcid[0] != '\0') {
418 0 : spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
419 : }
420 :
421 0 : if (trid->subnqn[0] != '\0') {
422 0 : spdk_json_write_named_string(w, "subnqn", trid->subnqn);
423 : }
424 0 : }
425 :
426 : static void
427 59 : nvme_bdev_ctrlr_delete(struct nvme_bdev_ctrlr *nbdev_ctrlr,
428 : struct nvme_ctrlr *nvme_ctrlr)
429 : {
430 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_delete, nvme_ctrlr->nbdev_ctrlr->name);
431 59 : pthread_mutex_lock(&g_bdev_nvme_mutex);
432 :
433 59 : TAILQ_REMOVE(&nbdev_ctrlr->ctrlrs, nvme_ctrlr, tailq);
434 59 : if (!TAILQ_EMPTY(&nbdev_ctrlr->ctrlrs)) {
435 15 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
436 :
437 15 : return;
438 : }
439 44 : TAILQ_REMOVE(&g_nvme_bdev_ctrlrs, nbdev_ctrlr, tailq);
440 :
441 44 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
442 :
443 44 : assert(TAILQ_EMPTY(&nbdev_ctrlr->bdevs));
444 :
445 44 : free(nbdev_ctrlr->name);
446 44 : free(nbdev_ctrlr);
447 : }
448 :
449 : static void
450 60 : _nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
451 : {
452 : struct nvme_path_id *path_id, *tmp_path;
453 : struct nvme_ns *ns, *tmp_ns;
454 :
455 60 : free(nvme_ctrlr->copied_ana_desc);
456 60 : spdk_free(nvme_ctrlr->ana_log_page);
457 :
458 60 : if (nvme_ctrlr->opal_dev) {
459 0 : spdk_opal_dev_destruct(nvme_ctrlr->opal_dev);
460 0 : nvme_ctrlr->opal_dev = NULL;
461 : }
462 :
463 60 : if (nvme_ctrlr->nbdev_ctrlr) {
464 59 : nvme_bdev_ctrlr_delete(nvme_ctrlr->nbdev_ctrlr, nvme_ctrlr);
465 : }
466 :
467 60 : RB_FOREACH_SAFE(ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp_ns) {
468 0 : RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, ns);
469 0 : nvme_ns_free(ns);
470 : }
471 :
472 120 : TAILQ_FOREACH_SAFE(path_id, &nvme_ctrlr->trids, link, tmp_path) {
473 60 : TAILQ_REMOVE(&nvme_ctrlr->trids, path_id, link);
474 60 : free(path_id);
475 : }
476 :
477 60 : pthread_mutex_destroy(&nvme_ctrlr->mutex);
478 60 : spdk_keyring_put_key(nvme_ctrlr->psk);
479 60 : spdk_keyring_put_key(nvme_ctrlr->dhchap_key);
480 60 : spdk_keyring_put_key(nvme_ctrlr->dhchap_ctrlr_key);
481 60 : free(nvme_ctrlr);
482 :
483 60 : pthread_mutex_lock(&g_bdev_nvme_mutex);
484 60 : if (g_bdev_nvme_module_finish && TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
485 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
486 0 : spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
487 0 : spdk_bdev_module_fini_done();
488 0 : return;
489 : }
490 60 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
491 : }
492 :
493 : static int
494 60 : nvme_detach_poller(void *arg)
495 : {
496 60 : struct nvme_ctrlr *nvme_ctrlr = arg;
497 : int rc;
498 :
499 60 : rc = spdk_nvme_detach_poll_async(nvme_ctrlr->detach_ctx);
500 60 : if (rc != -EAGAIN) {
501 60 : spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
502 60 : _nvme_ctrlr_delete(nvme_ctrlr);
503 : }
504 :
505 60 : return SPDK_POLLER_BUSY;
506 : }
507 :
508 : static void
509 60 : nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
510 : {
511 : int rc;
512 :
513 60 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
514 :
515 : /* First, unregister the adminq poller, as the driver will poll adminq if necessary */
516 60 : spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
517 :
518 : /* If we got here, the reset/detach poller cannot be active */
519 60 : assert(nvme_ctrlr->reset_detach_poller == NULL);
520 60 : nvme_ctrlr->reset_detach_poller = SPDK_POLLER_REGISTER(nvme_detach_poller,
521 : nvme_ctrlr, 1000);
522 60 : if (nvme_ctrlr->reset_detach_poller == NULL) {
523 0 : SPDK_ERRLOG("Failed to register detach poller\n");
524 0 : goto error;
525 : }
526 :
527 60 : rc = spdk_nvme_detach_async(nvme_ctrlr->ctrlr, &nvme_ctrlr->detach_ctx);
528 60 : if (rc != 0) {
529 0 : SPDK_ERRLOG("Failed to detach the NVMe controller\n");
530 0 : goto error;
531 : }
532 :
533 60 : return;
534 0 : error:
535 : /* We don't have a good way to handle errors here, so just do what we can and delete the
536 : * controller without detaching the underlying NVMe device.
537 : */
538 0 : spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
539 0 : _nvme_ctrlr_delete(nvme_ctrlr);
540 : }
541 :
542 : static void
543 59 : nvme_ctrlr_unregister_cb(void *io_device)
544 : {
545 59 : struct nvme_ctrlr *nvme_ctrlr = io_device;
546 :
547 59 : nvme_ctrlr_delete(nvme_ctrlr);
548 59 : }
549 :
550 : static void
551 59 : nvme_ctrlr_unregister(void *ctx)
552 : {
553 59 : struct nvme_ctrlr *nvme_ctrlr = ctx;
554 :
555 59 : spdk_io_device_unregister(nvme_ctrlr, nvme_ctrlr_unregister_cb);
556 59 : }
557 :
558 : static bool
559 220 : nvme_ctrlr_can_be_unregistered(struct nvme_ctrlr *nvme_ctrlr)
560 : {
561 220 : if (!nvme_ctrlr->destruct) {
562 105 : return false;
563 : }
564 :
565 115 : if (nvme_ctrlr->ref > 0) {
566 56 : return false;
567 : }
568 :
569 59 : if (nvme_ctrlr->resetting) {
570 0 : return false;
571 : }
572 :
573 59 : if (nvme_ctrlr->ana_log_page_updating) {
574 0 : return false;
575 : }
576 :
577 59 : if (nvme_ctrlr->io_path_cache_clearing) {
578 0 : return false;
579 : }
580 :
581 59 : return true;
582 : }
583 :
584 : static void
585 164 : nvme_ctrlr_release(struct nvme_ctrlr *nvme_ctrlr)
586 : {
587 164 : pthread_mutex_lock(&nvme_ctrlr->mutex);
588 : SPDK_DTRACE_PROBE2(bdev_nvme_ctrlr_release, nvme_ctrlr->nbdev_ctrlr->name, nvme_ctrlr->ref);
589 :
590 164 : assert(nvme_ctrlr->ref > 0);
591 164 : nvme_ctrlr->ref--;
592 :
593 164 : if (!nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
594 105 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
595 105 : return;
596 : }
597 :
598 59 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
599 :
600 59 : spdk_thread_exec_msg(nvme_ctrlr->thread, nvme_ctrlr_unregister, nvme_ctrlr);
601 : }
602 :
603 : static void
604 161 : bdev_nvme_clear_current_io_path(struct nvme_bdev_channel *nbdev_ch)
605 : {
606 161 : nbdev_ch->current_io_path = NULL;
607 161 : nbdev_ch->rr_counter = 0;
608 161 : }
609 :
610 : static struct nvme_io_path *
611 8 : _bdev_nvme_get_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
612 : {
613 : struct nvme_io_path *io_path;
614 :
615 16 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
616 15 : if (io_path->nvme_ns == nvme_ns) {
617 7 : break;
618 : }
619 : }
620 :
621 8 : return io_path;
622 : }
623 :
624 : static struct nvme_io_path *
625 35 : nvme_io_path_alloc(void)
626 : {
627 : struct nvme_io_path *io_path;
628 :
629 35 : io_path = calloc(1, sizeof(*io_path));
630 35 : if (io_path == NULL) {
631 0 : SPDK_ERRLOG("Failed to alloc io_path.\n");
632 0 : return NULL;
633 : }
634 :
635 35 : if (g_opts.io_path_stat) {
636 0 : io_path->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
637 0 : if (io_path->stat == NULL) {
638 0 : free(io_path);
639 0 : SPDK_ERRLOG("Failed to alloc io_path stat.\n");
640 0 : return NULL;
641 : }
642 0 : spdk_bdev_reset_io_stat(io_path->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
643 : }
644 :
645 35 : return io_path;
646 : }
647 :
648 : static void
649 35 : nvme_io_path_free(struct nvme_io_path *io_path)
650 : {
651 35 : free(io_path->stat);
652 35 : free(io_path);
653 35 : }
654 :
655 : static int
656 35 : _bdev_nvme_add_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
657 : {
658 : struct nvme_io_path *io_path;
659 : struct spdk_io_channel *ch;
660 : struct nvme_ctrlr_channel *ctrlr_ch;
661 : struct nvme_qpair *nvme_qpair;
662 :
663 35 : io_path = nvme_io_path_alloc();
664 35 : if (io_path == NULL) {
665 0 : return -ENOMEM;
666 : }
667 :
668 35 : io_path->nvme_ns = nvme_ns;
669 :
670 35 : ch = spdk_get_io_channel(nvme_ns->ctrlr);
671 35 : if (ch == NULL) {
672 0 : nvme_io_path_free(io_path);
673 0 : SPDK_ERRLOG("Failed to alloc io_channel.\n");
674 0 : return -ENOMEM;
675 : }
676 :
677 35 : ctrlr_ch = spdk_io_channel_get_ctx(ch);
678 :
679 35 : nvme_qpair = ctrlr_ch->qpair;
680 35 : assert(nvme_qpair != NULL);
681 :
682 35 : io_path->qpair = nvme_qpair;
683 35 : TAILQ_INSERT_TAIL(&nvme_qpair->io_path_list, io_path, tailq);
684 :
685 35 : io_path->nbdev_ch = nbdev_ch;
686 35 : STAILQ_INSERT_TAIL(&nbdev_ch->io_path_list, io_path, stailq);
687 :
688 35 : bdev_nvme_clear_current_io_path(nbdev_ch);
689 :
690 35 : return 0;
691 : }
692 :
693 : static void
694 35 : bdev_nvme_clear_retry_io_path(struct nvme_bdev_channel *nbdev_ch,
695 : struct nvme_io_path *io_path)
696 : {
697 : struct spdk_bdev_io *bdev_io;
698 : struct nvme_bdev_io *bio;
699 :
700 36 : TAILQ_FOREACH(bdev_io, &nbdev_ch->retry_io_list, module_link) {
701 1 : bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
702 1 : if (bio->io_path == io_path) {
703 1 : bio->io_path = NULL;
704 : }
705 : }
706 35 : }
707 :
708 : static void
709 35 : _bdev_nvme_delete_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_path *io_path)
710 : {
711 : struct spdk_io_channel *ch;
712 : struct nvme_qpair *nvme_qpair;
713 : struct nvme_ctrlr_channel *ctrlr_ch;
714 : struct nvme_bdev *nbdev;
715 :
716 35 : nbdev = spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(nbdev_ch));
717 :
718 : /* Add the statistics to nvme_ns before this path is destroyed. */
719 35 : pthread_mutex_lock(&nbdev->mutex);
720 35 : if (nbdev->ref != 0 && io_path->nvme_ns->stat != NULL && io_path->stat != NULL) {
721 0 : spdk_bdev_add_io_stat(io_path->nvme_ns->stat, io_path->stat);
722 : }
723 35 : pthread_mutex_unlock(&nbdev->mutex);
724 :
725 35 : bdev_nvme_clear_current_io_path(nbdev_ch);
726 35 : bdev_nvme_clear_retry_io_path(nbdev_ch, io_path);
727 :
728 35 : STAILQ_REMOVE(&nbdev_ch->io_path_list, io_path, nvme_io_path, stailq);
729 35 : io_path->nbdev_ch = NULL;
730 :
731 35 : nvme_qpair = io_path->qpair;
732 35 : assert(nvme_qpair != NULL);
733 :
734 35 : ctrlr_ch = nvme_qpair->ctrlr_ch;
735 35 : assert(ctrlr_ch != NULL);
736 :
737 35 : ch = spdk_io_channel_from_ctx(ctrlr_ch);
738 35 : spdk_put_io_channel(ch);
739 :
740 : /* After an io_path is removed, I/Os submitted to it may complete and update statistics
741 : * of the io_path. To avoid heap-use-after-free error from this case, do not free the
742 : * io_path here but free the io_path when the associated qpair is freed. It is ensured
743 : * that all I/Os submitted to the io_path are completed when the associated qpair is freed.
744 : */
745 35 : }
746 :
747 : static void
748 22 : _bdev_nvme_delete_io_paths(struct nvme_bdev_channel *nbdev_ch)
749 : {
750 : struct nvme_io_path *io_path, *tmp_io_path;
751 :
752 55 : STAILQ_FOREACH_SAFE(io_path, &nbdev_ch->io_path_list, stailq, tmp_io_path) {
753 33 : _bdev_nvme_delete_io_path(nbdev_ch, io_path);
754 : }
755 22 : }
756 :
757 : static int
758 22 : bdev_nvme_create_bdev_channel_cb(void *io_device, void *ctx_buf)
759 : {
760 22 : struct nvme_bdev_channel *nbdev_ch = ctx_buf;
761 22 : struct nvme_bdev *nbdev = io_device;
762 : struct nvme_ns *nvme_ns;
763 : int rc;
764 :
765 22 : STAILQ_INIT(&nbdev_ch->io_path_list);
766 22 : TAILQ_INIT(&nbdev_ch->retry_io_list);
767 :
768 22 : pthread_mutex_lock(&nbdev->mutex);
769 :
770 22 : nbdev_ch->mp_policy = nbdev->mp_policy;
771 22 : nbdev_ch->mp_selector = nbdev->mp_selector;
772 22 : nbdev_ch->rr_min_io = nbdev->rr_min_io;
773 :
774 55 : TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
775 33 : rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns);
776 33 : if (rc != 0) {
777 0 : pthread_mutex_unlock(&nbdev->mutex);
778 :
779 0 : _bdev_nvme_delete_io_paths(nbdev_ch);
780 0 : return rc;
781 : }
782 : }
783 22 : pthread_mutex_unlock(&nbdev->mutex);
784 :
785 22 : return 0;
786 : }
787 :
788 : /* If cpl != NULL, complete the bdev_io with nvme status based on 'cpl'.
789 : * If cpl == NULL, complete the bdev_io with bdev status based on 'status'.
790 : */
791 : static inline void
792 47 : __bdev_nvme_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status,
793 : const struct spdk_nvme_cpl *cpl)
794 : {
795 47 : spdk_trace_record(TRACE_BDEV_NVME_IO_DONE, 0, 0, (uintptr_t)bdev_io->driver_ctx,
796 : (uintptr_t)bdev_io);
797 47 : if (cpl) {
798 29 : spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc);
799 : } else {
800 18 : spdk_bdev_io_complete(bdev_io, status);
801 : }
802 47 : }
803 :
804 : static void bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch);
805 :
806 : static void
807 22 : bdev_nvme_destroy_bdev_channel_cb(void *io_device, void *ctx_buf)
808 : {
809 22 : struct nvme_bdev_channel *nbdev_ch = ctx_buf;
810 :
811 22 : bdev_nvme_abort_retry_ios(nbdev_ch);
812 22 : _bdev_nvme_delete_io_paths(nbdev_ch);
813 22 : }
814 :
815 : static inline bool
816 58 : bdev_nvme_io_type_is_admin(enum spdk_bdev_io_type io_type)
817 : {
818 58 : switch (io_type) {
819 5 : case SPDK_BDEV_IO_TYPE_RESET:
820 : case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
821 : case SPDK_BDEV_IO_TYPE_ABORT:
822 5 : return true;
823 53 : default:
824 53 : break;
825 : }
826 :
827 53 : return false;
828 : }
829 :
830 : static inline bool
831 77 : nvme_ns_is_active(struct nvme_ns *nvme_ns)
832 : {
833 77 : if (spdk_unlikely(nvme_ns->ana_state_updating)) {
834 1 : return false;
835 : }
836 :
837 76 : if (spdk_unlikely(nvme_ns->ns == NULL)) {
838 0 : return false;
839 : }
840 :
841 76 : return true;
842 : }
843 :
844 : static inline bool
845 65 : nvme_ns_is_accessible(struct nvme_ns *nvme_ns)
846 : {
847 65 : if (spdk_unlikely(!nvme_ns_is_active(nvme_ns))) {
848 1 : return false;
849 : }
850 :
851 64 : switch (nvme_ns->ana_state) {
852 57 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
853 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
854 57 : return true;
855 7 : default:
856 7 : break;
857 : }
858 :
859 7 : return false;
860 : }
861 :
862 : static inline bool
863 102 : nvme_qpair_is_connected(struct nvme_qpair *nvme_qpair)
864 : {
865 102 : if (spdk_unlikely(nvme_qpair->qpair == NULL)) {
866 20 : return false;
867 : }
868 :
869 82 : if (spdk_unlikely(spdk_nvme_qpair_get_failure_reason(nvme_qpair->qpair) !=
870 : SPDK_NVME_QPAIR_FAILURE_NONE)) {
871 0 : return false;
872 : }
873 :
874 82 : if (spdk_unlikely(nvme_qpair->ctrlr_ch->reset_iter != NULL)) {
875 0 : return false;
876 : }
877 :
878 82 : return true;
879 : }
880 :
881 : static inline bool
882 77 : nvme_io_path_is_available(struct nvme_io_path *io_path)
883 : {
884 77 : if (spdk_unlikely(!nvme_qpair_is_connected(io_path->qpair))) {
885 12 : return false;
886 : }
887 :
888 65 : if (spdk_unlikely(!nvme_ns_is_accessible(io_path->nvme_ns))) {
889 8 : return false;
890 : }
891 :
892 57 : return true;
893 : }
894 :
895 : static inline bool
896 8 : nvme_ctrlr_is_failed(struct nvme_ctrlr *nvme_ctrlr)
897 : {
898 8 : if (nvme_ctrlr->destruct) {
899 0 : return true;
900 : }
901 :
902 8 : if (nvme_ctrlr->fast_io_fail_timedout) {
903 2 : return true;
904 : }
905 :
906 6 : if (nvme_ctrlr->resetting) {
907 4 : if (nvme_ctrlr->opts.reconnect_delay_sec != 0) {
908 4 : return false;
909 : } else {
910 0 : return true;
911 : }
912 : }
913 :
914 2 : if (nvme_ctrlr->reconnect_is_delayed) {
915 2 : return false;
916 : }
917 :
918 0 : if (nvme_ctrlr->disabled) {
919 0 : return true;
920 : }
921 :
922 0 : if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
923 0 : return true;
924 : } else {
925 0 : return false;
926 : }
927 : }
928 :
929 : static bool
930 20 : nvme_ctrlr_is_available(struct nvme_ctrlr *nvme_ctrlr)
931 : {
932 20 : if (nvme_ctrlr->destruct) {
933 0 : return false;
934 : }
935 :
936 20 : if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
937 3 : return false;
938 : }
939 :
940 17 : if (nvme_ctrlr->resetting || nvme_ctrlr->reconnect_is_delayed) {
941 1 : return false;
942 : }
943 :
944 16 : if (nvme_ctrlr->disabled) {
945 0 : return false;
946 : }
947 :
948 16 : return true;
949 : }
950 :
951 : /* Simulate circular linked list. */
952 : static inline struct nvme_io_path *
953 87 : nvme_io_path_get_next(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_path *prev_path)
954 : {
955 : struct nvme_io_path *next_path;
956 :
957 87 : if (prev_path != NULL) {
958 37 : next_path = STAILQ_NEXT(prev_path, stailq);
959 37 : if (next_path != NULL) {
960 14 : return next_path;
961 : }
962 : }
963 :
964 73 : return STAILQ_FIRST(&nbdev_ch->io_path_list);
965 : }
966 :
967 : static struct nvme_io_path *
968 57 : _bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
969 : {
970 57 : struct nvme_io_path *io_path, *start, *non_optimized = NULL;
971 :
972 57 : start = nvme_io_path_get_next(nbdev_ch, nbdev_ch->current_io_path);
973 :
974 57 : io_path = start;
975 : do {
976 69 : if (spdk_likely(nvme_io_path_is_available(io_path))) {
977 49 : switch (io_path->nvme_ns->ana_state) {
978 39 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
979 39 : nbdev_ch->current_io_path = io_path;
980 39 : return io_path;
981 10 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
982 10 : if (non_optimized == NULL) {
983 7 : non_optimized = io_path;
984 : }
985 10 : break;
986 0 : default:
987 0 : assert(false);
988 : break;
989 : }
990 20 : }
991 30 : io_path = nvme_io_path_get_next(nbdev_ch, io_path);
992 30 : } while (io_path != start);
993 :
994 18 : if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
995 : /* We come here only if there is no optimized path. Cache even non_optimized
996 : * path for load balance across multiple non_optimized paths.
997 : */
998 1 : nbdev_ch->current_io_path = non_optimized;
999 : }
1000 :
1001 18 : return non_optimized;
1002 : }
1003 :
1004 : static struct nvme_io_path *
1005 4 : _bdev_nvme_find_io_path_min_qd(struct nvme_bdev_channel *nbdev_ch)
1006 : {
1007 : struct nvme_io_path *io_path;
1008 4 : struct nvme_io_path *optimized = NULL, *non_optimized = NULL;
1009 4 : uint32_t opt_min_qd = UINT32_MAX, non_opt_min_qd = UINT32_MAX;
1010 : uint32_t num_outstanding_reqs;
1011 :
1012 16 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
1013 12 : if (spdk_unlikely(!nvme_qpair_is_connected(io_path->qpair))) {
1014 : /* The device is currently resetting. */
1015 0 : continue;
1016 : }
1017 :
1018 12 : if (spdk_unlikely(!nvme_ns_is_active(io_path->nvme_ns))) {
1019 0 : continue;
1020 : }
1021 :
1022 12 : num_outstanding_reqs = spdk_nvme_qpair_get_num_outstanding_reqs(io_path->qpair->qpair);
1023 12 : switch (io_path->nvme_ns->ana_state) {
1024 6 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
1025 6 : if (num_outstanding_reqs < opt_min_qd) {
1026 5 : opt_min_qd = num_outstanding_reqs;
1027 5 : optimized = io_path;
1028 : }
1029 6 : break;
1030 3 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
1031 3 : if (num_outstanding_reqs < non_opt_min_qd) {
1032 3 : non_opt_min_qd = num_outstanding_reqs;
1033 3 : non_optimized = io_path;
1034 : }
1035 3 : break;
1036 3 : default:
1037 3 : break;
1038 : }
1039 : }
1040 :
1041 : /* don't cache io path for BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH selector */
1042 4 : if (optimized != NULL) {
1043 3 : return optimized;
1044 : }
1045 :
1046 1 : return non_optimized;
1047 : }
1048 :
1049 : static inline struct nvme_io_path *
1050 95 : bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
1051 : {
1052 95 : if (spdk_likely(nbdev_ch->current_io_path != NULL)) {
1053 41 : if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE) {
1054 31 : return nbdev_ch->current_io_path;
1055 10 : } else if (nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
1056 10 : if (++nbdev_ch->rr_counter < nbdev_ch->rr_min_io) {
1057 3 : return nbdev_ch->current_io_path;
1058 : }
1059 7 : nbdev_ch->rr_counter = 0;
1060 : }
1061 : }
1062 :
1063 61 : if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE ||
1064 14 : nbdev_ch->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
1065 57 : return _bdev_nvme_find_io_path(nbdev_ch);
1066 : } else {
1067 4 : return _bdev_nvme_find_io_path_min_qd(nbdev_ch);
1068 : }
1069 : }
1070 :
1071 : /* Return true if there is any io_path whose qpair is active or ctrlr is not failed,
1072 : * or false otherwise.
1073 : *
1074 : * If any io_path has an active qpair but find_io_path() returned NULL, its namespace
1075 : * is likely to be non-accessible now but may become accessible.
1076 : *
1077 : * If any io_path has an unfailed ctrlr but find_io_path() returned NULL, the ctrlr
1078 : * is likely to be resetting now but the reset may succeed. A ctrlr is set to unfailed
1079 : * when starting to reset it but it is set to failed when the reset failed. Hence, if
1080 : * a ctrlr is unfailed, it is likely that it works fine or is resetting.
1081 : */
1082 : static bool
1083 13 : any_io_path_may_become_available(struct nvme_bdev_channel *nbdev_ch)
1084 : {
1085 : struct nvme_io_path *io_path;
1086 :
1087 15 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
1088 13 : if (io_path->nvme_ns->ana_transition_timedout) {
1089 0 : continue;
1090 : }
1091 :
1092 13 : if (nvme_qpair_is_connected(io_path->qpair) ||
1093 8 : !nvme_ctrlr_is_failed(io_path->qpair->ctrlr)) {
1094 11 : return true;
1095 : }
1096 : }
1097 :
1098 2 : return false;
1099 : }
1100 :
1101 : static void
1102 14 : bdev_nvme_retry_io(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io)
1103 : {
1104 14 : struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
1105 : struct spdk_io_channel *ch;
1106 :
1107 14 : if (nbdev_io->io_path != NULL && nvme_io_path_is_available(nbdev_io->io_path)) {
1108 3 : _bdev_nvme_submit_request(nbdev_ch, bdev_io);
1109 : } else {
1110 11 : ch = spdk_io_channel_from_ctx(nbdev_ch);
1111 11 : bdev_nvme_submit_request(ch, bdev_io);
1112 : }
1113 14 : }
1114 :
1115 : static int
1116 14 : bdev_nvme_retry_ios(void *arg)
1117 : {
1118 14 : struct nvme_bdev_channel *nbdev_ch = arg;
1119 : struct spdk_bdev_io *bdev_io, *tmp_bdev_io;
1120 : struct nvme_bdev_io *bio;
1121 : uint64_t now, delay_us;
1122 :
1123 14 : now = spdk_get_ticks();
1124 :
1125 28 : TAILQ_FOREACH_SAFE(bdev_io, &nbdev_ch->retry_io_list, module_link, tmp_bdev_io) {
1126 15 : bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
1127 15 : if (bio->retry_ticks > now) {
1128 1 : break;
1129 : }
1130 :
1131 14 : TAILQ_REMOVE(&nbdev_ch->retry_io_list, bdev_io, module_link);
1132 :
1133 14 : bdev_nvme_retry_io(nbdev_ch, bdev_io);
1134 : }
1135 :
1136 14 : spdk_poller_unregister(&nbdev_ch->retry_io_poller);
1137 :
1138 14 : bdev_io = TAILQ_FIRST(&nbdev_ch->retry_io_list);
1139 14 : if (bdev_io != NULL) {
1140 4 : bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
1141 :
1142 4 : delay_us = (bio->retry_ticks - now) * SPDK_SEC_TO_USEC / spdk_get_ticks_hz();
1143 :
1144 4 : nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
1145 : delay_us);
1146 : }
1147 :
1148 14 : return SPDK_POLLER_BUSY;
1149 : }
1150 :
1151 : static void
1152 15 : bdev_nvme_queue_retry_io(struct nvme_bdev_channel *nbdev_ch,
1153 : struct nvme_bdev_io *bio, uint64_t delay_ms)
1154 : {
1155 15 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1156 : struct spdk_bdev_io *tmp_bdev_io;
1157 : struct nvme_bdev_io *tmp_bio;
1158 :
1159 15 : bio->retry_ticks = spdk_get_ticks() + delay_ms * spdk_get_ticks_hz() / 1000ULL;
1160 :
1161 15 : TAILQ_FOREACH_REVERSE(tmp_bdev_io, &nbdev_ch->retry_io_list, retry_io_head, module_link) {
1162 1 : tmp_bio = (struct nvme_bdev_io *)tmp_bdev_io->driver_ctx;
1163 :
1164 1 : if (tmp_bio->retry_ticks <= bio->retry_ticks) {
1165 1 : TAILQ_INSERT_AFTER(&nbdev_ch->retry_io_list, tmp_bdev_io, bdev_io,
1166 : module_link);
1167 1 : return;
1168 : }
1169 : }
1170 :
1171 : /* No earlier I/Os were found. This I/O must be the new head. */
1172 14 : TAILQ_INSERT_HEAD(&nbdev_ch->retry_io_list, bdev_io, module_link);
1173 :
1174 14 : spdk_poller_unregister(&nbdev_ch->retry_io_poller);
1175 :
1176 14 : nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
1177 : delay_ms * 1000ULL);
1178 : }
1179 :
1180 : static void
1181 36 : bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch)
1182 : {
1183 : struct spdk_bdev_io *bdev_io, *tmp_io;
1184 :
1185 36 : TAILQ_FOREACH_SAFE(bdev_io, &nbdev_ch->retry_io_list, module_link, tmp_io) {
1186 0 : TAILQ_REMOVE(&nbdev_ch->retry_io_list, bdev_io, module_link);
1187 0 : __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_ABORTED, NULL);
1188 : }
1189 :
1190 36 : spdk_poller_unregister(&nbdev_ch->retry_io_poller);
1191 36 : }
1192 :
1193 : static int
1194 6 : bdev_nvme_abort_retry_io(struct nvme_bdev_channel *nbdev_ch,
1195 : struct nvme_bdev_io *bio_to_abort)
1196 : {
1197 : struct spdk_bdev_io *bdev_io_to_abort;
1198 :
1199 6 : TAILQ_FOREACH(bdev_io_to_abort, &nbdev_ch->retry_io_list, module_link) {
1200 1 : if ((struct nvme_bdev_io *)bdev_io_to_abort->driver_ctx == bio_to_abort) {
1201 1 : TAILQ_REMOVE(&nbdev_ch->retry_io_list, bdev_io_to_abort, module_link);
1202 1 : __bdev_nvme_io_complete(bdev_io_to_abort, SPDK_BDEV_IO_STATUS_ABORTED, NULL);
1203 1 : return 0;
1204 : }
1205 : }
1206 :
1207 5 : return -ENOENT;
1208 : }
1209 :
1210 : static void
1211 12 : bdev_nvme_update_nvme_error_stat(struct spdk_bdev_io *bdev_io, const struct spdk_nvme_cpl *cpl)
1212 : {
1213 : struct nvme_bdev *nbdev;
1214 : uint16_t sct, sc;
1215 :
1216 12 : assert(spdk_nvme_cpl_is_error(cpl));
1217 :
1218 12 : nbdev = bdev_io->bdev->ctxt;
1219 :
1220 12 : if (nbdev->err_stat == NULL) {
1221 12 : return;
1222 : }
1223 :
1224 0 : sct = cpl->status.sct;
1225 0 : sc = cpl->status.sc;
1226 :
1227 0 : pthread_mutex_lock(&nbdev->mutex);
1228 :
1229 0 : nbdev->err_stat->status_type[sct]++;
1230 0 : switch (sct) {
1231 0 : case SPDK_NVME_SCT_GENERIC:
1232 : case SPDK_NVME_SCT_COMMAND_SPECIFIC:
1233 : case SPDK_NVME_SCT_MEDIA_ERROR:
1234 : case SPDK_NVME_SCT_PATH:
1235 0 : nbdev->err_stat->status[sct][sc]++;
1236 0 : break;
1237 0 : default:
1238 0 : break;
1239 : }
1240 :
1241 0 : pthread_mutex_unlock(&nbdev->mutex);
1242 : }
1243 :
1244 : static inline void
1245 20 : bdev_nvme_update_io_path_stat(struct nvme_bdev_io *bio)
1246 : {
1247 20 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1248 20 : uint64_t num_blocks = bdev_io->u.bdev.num_blocks;
1249 20 : uint32_t blocklen = bdev_io->bdev->blocklen;
1250 : struct spdk_bdev_io_stat *stat;
1251 : uint64_t tsc_diff;
1252 :
1253 20 : if (bio->io_path->stat == NULL) {
1254 20 : return;
1255 : }
1256 :
1257 0 : tsc_diff = spdk_get_ticks() - bio->submit_tsc;
1258 0 : stat = bio->io_path->stat;
1259 :
1260 0 : switch (bdev_io->type) {
1261 0 : case SPDK_BDEV_IO_TYPE_READ:
1262 0 : stat->bytes_read += num_blocks * blocklen;
1263 0 : stat->num_read_ops++;
1264 0 : stat->read_latency_ticks += tsc_diff;
1265 0 : if (stat->max_read_latency_ticks < tsc_diff) {
1266 0 : stat->max_read_latency_ticks = tsc_diff;
1267 : }
1268 0 : if (stat->min_read_latency_ticks > tsc_diff) {
1269 0 : stat->min_read_latency_ticks = tsc_diff;
1270 : }
1271 0 : break;
1272 0 : case SPDK_BDEV_IO_TYPE_WRITE:
1273 0 : stat->bytes_written += num_blocks * blocklen;
1274 0 : stat->num_write_ops++;
1275 0 : stat->write_latency_ticks += tsc_diff;
1276 0 : if (stat->max_write_latency_ticks < tsc_diff) {
1277 0 : stat->max_write_latency_ticks = tsc_diff;
1278 : }
1279 0 : if (stat->min_write_latency_ticks > tsc_diff) {
1280 0 : stat->min_write_latency_ticks = tsc_diff;
1281 : }
1282 0 : break;
1283 0 : case SPDK_BDEV_IO_TYPE_UNMAP:
1284 0 : stat->bytes_unmapped += num_blocks * blocklen;
1285 0 : stat->num_unmap_ops++;
1286 0 : stat->unmap_latency_ticks += tsc_diff;
1287 0 : if (stat->max_unmap_latency_ticks < tsc_diff) {
1288 0 : stat->max_unmap_latency_ticks = tsc_diff;
1289 : }
1290 0 : if (stat->min_unmap_latency_ticks > tsc_diff) {
1291 0 : stat->min_unmap_latency_ticks = tsc_diff;
1292 : }
1293 0 : break;
1294 0 : case SPDK_BDEV_IO_TYPE_ZCOPY:
1295 : /* Track the data in the start phase only */
1296 0 : if (!bdev_io->u.bdev.zcopy.start) {
1297 0 : break;
1298 : }
1299 0 : if (bdev_io->u.bdev.zcopy.populate) {
1300 0 : stat->bytes_read += num_blocks * blocklen;
1301 0 : stat->num_read_ops++;
1302 0 : stat->read_latency_ticks += tsc_diff;
1303 0 : if (stat->max_read_latency_ticks < tsc_diff) {
1304 0 : stat->max_read_latency_ticks = tsc_diff;
1305 : }
1306 0 : if (stat->min_read_latency_ticks > tsc_diff) {
1307 0 : stat->min_read_latency_ticks = tsc_diff;
1308 : }
1309 : } else {
1310 0 : stat->bytes_written += num_blocks * blocklen;
1311 0 : stat->num_write_ops++;
1312 0 : stat->write_latency_ticks += tsc_diff;
1313 0 : if (stat->max_write_latency_ticks < tsc_diff) {
1314 0 : stat->max_write_latency_ticks = tsc_diff;
1315 : }
1316 0 : if (stat->min_write_latency_ticks > tsc_diff) {
1317 0 : stat->min_write_latency_ticks = tsc_diff;
1318 : }
1319 : }
1320 0 : break;
1321 0 : case SPDK_BDEV_IO_TYPE_COPY:
1322 0 : stat->bytes_copied += num_blocks * blocklen;
1323 0 : stat->num_copy_ops++;
1324 0 : stat->copy_latency_ticks += tsc_diff;
1325 0 : if (stat->max_copy_latency_ticks < tsc_diff) {
1326 0 : stat->max_copy_latency_ticks = tsc_diff;
1327 : }
1328 0 : if (stat->min_copy_latency_ticks > tsc_diff) {
1329 0 : stat->min_copy_latency_ticks = tsc_diff;
1330 : }
1331 0 : break;
1332 0 : default:
1333 0 : break;
1334 : }
1335 : }
1336 :
1337 : static bool
1338 7 : bdev_nvme_check_retry_io(struct nvme_bdev_io *bio,
1339 : const struct spdk_nvme_cpl *cpl,
1340 : struct nvme_bdev_channel *nbdev_ch,
1341 : uint64_t *_delay_ms)
1342 : {
1343 7 : struct nvme_io_path *io_path = bio->io_path;
1344 7 : struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
1345 : const struct spdk_nvme_ctrlr_data *cdata;
1346 :
1347 7 : if (spdk_nvme_cpl_is_path_error(cpl) ||
1348 5 : spdk_nvme_cpl_is_aborted_sq_deletion(cpl) ||
1349 4 : !nvme_io_path_is_available(io_path) ||
1350 4 : !nvme_ctrlr_is_available(nvme_ctrlr)) {
1351 3 : bdev_nvme_clear_current_io_path(nbdev_ch);
1352 3 : bio->io_path = NULL;
1353 3 : if (spdk_nvme_cpl_is_ana_error(cpl)) {
1354 1 : if (nvme_ctrlr_read_ana_log_page(nvme_ctrlr) == 0) {
1355 1 : io_path->nvme_ns->ana_state_updating = true;
1356 : }
1357 : }
1358 3 : if (!any_io_path_may_become_available(nbdev_ch)) {
1359 0 : return false;
1360 : }
1361 3 : *_delay_ms = 0;
1362 : } else {
1363 4 : bio->retry_count++;
1364 :
1365 4 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
1366 :
1367 4 : if (cpl->status.crd != 0) {
1368 1 : *_delay_ms = cdata->crdt[cpl->status.crd] * 100;
1369 : } else {
1370 3 : *_delay_ms = 0;
1371 : }
1372 : }
1373 :
1374 7 : return true;
1375 : }
1376 :
1377 : static inline void
1378 32 : bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio,
1379 : const struct spdk_nvme_cpl *cpl)
1380 : {
1381 32 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1382 : struct nvme_bdev_channel *nbdev_ch;
1383 32 : uint64_t delay_ms;
1384 :
1385 32 : assert(!bdev_nvme_io_type_is_admin(bdev_io->type));
1386 :
1387 32 : if (spdk_likely(spdk_nvme_cpl_is_success(cpl))) {
1388 20 : bdev_nvme_update_io_path_stat(bio);
1389 20 : goto complete;
1390 : }
1391 :
1392 : /* Update error counts before deciding if retry is needed.
1393 : * Hence, error counts may be more than the number of I/O errors.
1394 : */
1395 12 : bdev_nvme_update_nvme_error_stat(bdev_io, cpl);
1396 :
1397 12 : if (cpl->status.dnr != 0 || spdk_nvme_cpl_is_aborted_by_request(cpl) ||
1398 8 : (g_opts.bdev_retry_count != -1 && bio->retry_count >= g_opts.bdev_retry_count)) {
1399 5 : goto complete;
1400 : }
1401 :
1402 : /* At this point we don't know whether the sequence was successfully executed or not, so we
1403 : * cannot retry the IO */
1404 7 : if (bdev_io->u.bdev.accel_sequence != NULL) {
1405 0 : goto complete;
1406 : }
1407 :
1408 7 : nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
1409 :
1410 7 : if (bdev_nvme_check_retry_io(bio, cpl, nbdev_ch, &delay_ms)) {
1411 7 : bdev_nvme_queue_retry_io(nbdev_ch, bio, delay_ms);
1412 7 : return;
1413 : }
1414 :
1415 25 : complete:
1416 25 : bio->retry_count = 0;
1417 25 : bio->submit_tsc = 0;
1418 25 : bdev_io->u.bdev.accel_sequence = NULL;
1419 25 : __bdev_nvme_io_complete(bdev_io, 0, cpl);
1420 : }
1421 :
1422 : static inline void
1423 11 : bdev_nvme_io_complete(struct nvme_bdev_io *bio, int rc)
1424 : {
1425 11 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1426 : struct nvme_bdev_channel *nbdev_ch;
1427 : enum spdk_bdev_io_status io_status;
1428 :
1429 11 : assert(!bdev_nvme_io_type_is_admin(bdev_io->type));
1430 :
1431 11 : switch (rc) {
1432 1 : case 0:
1433 1 : io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
1434 1 : break;
1435 0 : case -ENOMEM:
1436 0 : io_status = SPDK_BDEV_IO_STATUS_NOMEM;
1437 0 : break;
1438 10 : case -ENXIO:
1439 10 : if (g_opts.bdev_retry_count == -1 || bio->retry_count < g_opts.bdev_retry_count) {
1440 10 : nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
1441 :
1442 10 : bdev_nvme_clear_current_io_path(nbdev_ch);
1443 10 : bio->io_path = NULL;
1444 :
1445 10 : if (any_io_path_may_become_available(nbdev_ch)) {
1446 8 : bdev_nvme_queue_retry_io(nbdev_ch, bio, 1000ULL);
1447 8 : return;
1448 : }
1449 : }
1450 :
1451 : /* fallthrough */
1452 : default:
1453 2 : spdk_accel_sequence_abort(bdev_io->u.bdev.accel_sequence);
1454 2 : bdev_io->u.bdev.accel_sequence = NULL;
1455 2 : io_status = SPDK_BDEV_IO_STATUS_FAILED;
1456 2 : break;
1457 : }
1458 :
1459 3 : bio->retry_count = 0;
1460 3 : bio->submit_tsc = 0;
1461 3 : __bdev_nvme_io_complete(bdev_io, io_status, NULL);
1462 : }
1463 :
1464 : static inline void
1465 4 : bdev_nvme_admin_complete(struct nvme_bdev_io *bio, int rc)
1466 : {
1467 4 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1468 : enum spdk_bdev_io_status io_status;
1469 :
1470 4 : switch (rc) {
1471 1 : case 0:
1472 1 : io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
1473 1 : break;
1474 0 : case -ENOMEM:
1475 0 : io_status = SPDK_BDEV_IO_STATUS_NOMEM;
1476 0 : break;
1477 3 : case -ENXIO:
1478 : /* fallthrough */
1479 : default:
1480 3 : io_status = SPDK_BDEV_IO_STATUS_FAILED;
1481 3 : break;
1482 : }
1483 :
1484 4 : __bdev_nvme_io_complete(bdev_io, io_status, NULL);
1485 4 : }
1486 :
1487 : static void
1488 3 : bdev_nvme_clear_io_path_caches_done(struct spdk_io_channel_iter *i, int status)
1489 : {
1490 3 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
1491 :
1492 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
1493 :
1494 3 : assert(nvme_ctrlr->io_path_cache_clearing == true);
1495 3 : nvme_ctrlr->io_path_cache_clearing = false;
1496 :
1497 3 : if (!nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
1498 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1499 3 : return;
1500 : }
1501 :
1502 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1503 :
1504 0 : nvme_ctrlr_unregister(nvme_ctrlr);
1505 : }
1506 :
1507 : static void
1508 320 : _bdev_nvme_clear_io_path_cache(struct nvme_qpair *nvme_qpair)
1509 : {
1510 : struct nvme_io_path *io_path;
1511 :
1512 459 : TAILQ_FOREACH(io_path, &nvme_qpair->io_path_list, tailq) {
1513 139 : if (io_path->nbdev_ch == NULL) {
1514 64 : continue;
1515 : }
1516 75 : bdev_nvme_clear_current_io_path(io_path->nbdev_ch);
1517 : }
1518 320 : }
1519 :
1520 : static void
1521 1 : bdev_nvme_clear_io_path_cache(struct spdk_io_channel_iter *i)
1522 : {
1523 1 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
1524 1 : struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(_ch);
1525 :
1526 1 : assert(ctrlr_ch->qpair != NULL);
1527 :
1528 1 : _bdev_nvme_clear_io_path_cache(ctrlr_ch->qpair);
1529 :
1530 1 : spdk_for_each_channel_continue(i, 0);
1531 1 : }
1532 :
1533 : static void
1534 3 : bdev_nvme_clear_io_path_caches(struct nvme_ctrlr *nvme_ctrlr)
1535 : {
1536 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
1537 3 : if (!nvme_ctrlr_is_available(nvme_ctrlr) ||
1538 : nvme_ctrlr->io_path_cache_clearing) {
1539 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1540 0 : return;
1541 : }
1542 :
1543 3 : nvme_ctrlr->io_path_cache_clearing = true;
1544 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
1545 :
1546 3 : spdk_for_each_channel(nvme_ctrlr,
1547 : bdev_nvme_clear_io_path_cache,
1548 : NULL,
1549 : bdev_nvme_clear_io_path_caches_done);
1550 : }
1551 :
1552 : static struct nvme_qpair *
1553 99 : nvme_poll_group_get_qpair(struct nvme_poll_group *group, struct spdk_nvme_qpair *qpair)
1554 : {
1555 : struct nvme_qpair *nvme_qpair;
1556 :
1557 108 : TAILQ_FOREACH(nvme_qpair, &group->qpair_list, tailq) {
1558 108 : if (nvme_qpair->qpair == qpair) {
1559 99 : break;
1560 : }
1561 : }
1562 :
1563 99 : return nvme_qpair;
1564 : }
1565 :
1566 : static void nvme_qpair_delete(struct nvme_qpair *nvme_qpair);
1567 :
1568 : static void
1569 99 : bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx)
1570 : {
1571 99 : struct nvme_poll_group *group = poll_group_ctx;
1572 : struct nvme_qpair *nvme_qpair;
1573 : struct nvme_ctrlr_channel *ctrlr_ch;
1574 : int status;
1575 :
1576 99 : nvme_qpair = nvme_poll_group_get_qpair(group, qpair);
1577 99 : if (nvme_qpair == NULL) {
1578 0 : return;
1579 : }
1580 :
1581 99 : if (nvme_qpair->qpair != NULL) {
1582 99 : spdk_nvme_ctrlr_free_io_qpair(nvme_qpair->qpair);
1583 99 : nvme_qpair->qpair = NULL;
1584 : }
1585 :
1586 99 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
1587 :
1588 99 : ctrlr_ch = nvme_qpair->ctrlr_ch;
1589 :
1590 99 : if (ctrlr_ch != NULL) {
1591 56 : if (ctrlr_ch->reset_iter != NULL) {
1592 : /* We are in a full reset sequence. */
1593 52 : if (ctrlr_ch->connect_poller != NULL) {
1594 : /* qpair was failed to connect. Abort the reset sequence. */
1595 0 : SPDK_DEBUGLOG(bdev_nvme, "qpair %p was failed to connect. abort the reset ctrlr sequence.\n",
1596 : qpair);
1597 0 : spdk_poller_unregister(&ctrlr_ch->connect_poller);
1598 0 : status = -1;
1599 : } else {
1600 : /* qpair was completed to disconnect. Just move to the next ctrlr_channel. */
1601 52 : SPDK_DEBUGLOG(bdev_nvme, "qpair %p was disconnected and freed in a reset ctrlr sequence.\n",
1602 : qpair);
1603 52 : status = 0;
1604 : }
1605 52 : spdk_for_each_channel_continue(ctrlr_ch->reset_iter, status);
1606 52 : ctrlr_ch->reset_iter = NULL;
1607 : } else {
1608 : /* qpair was disconnected unexpectedly. Reset controller for recovery. */
1609 4 : SPDK_NOTICELOG("qpair %p was disconnected and freed. reset controller.\n", qpair);
1610 4 : bdev_nvme_failover_ctrlr(nvme_qpair->ctrlr);
1611 : }
1612 : } else {
1613 : /* In this case, ctrlr_channel is already deleted. */
1614 43 : SPDK_DEBUGLOG(bdev_nvme, "qpair %p was disconnected and freed. delete nvme_qpair.\n", qpair);
1615 43 : nvme_qpair_delete(nvme_qpair);
1616 : }
1617 : }
1618 :
1619 : static void
1620 0 : bdev_nvme_check_io_qpairs(struct nvme_poll_group *group)
1621 : {
1622 : struct nvme_qpair *nvme_qpair;
1623 :
1624 0 : TAILQ_FOREACH(nvme_qpair, &group->qpair_list, tailq) {
1625 0 : if (nvme_qpair->qpair == NULL || nvme_qpair->ctrlr_ch == NULL) {
1626 0 : continue;
1627 : }
1628 :
1629 0 : if (spdk_nvme_qpair_get_failure_reason(nvme_qpair->qpair) !=
1630 : SPDK_NVME_QPAIR_FAILURE_NONE) {
1631 0 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
1632 : }
1633 : }
1634 0 : }
1635 :
1636 : static int
1637 1025 : bdev_nvme_poll(void *arg)
1638 : {
1639 1025 : struct nvme_poll_group *group = arg;
1640 : int64_t num_completions;
1641 :
1642 1025 : if (group->collect_spin_stat && group->start_ticks == 0) {
1643 0 : group->start_ticks = spdk_get_ticks();
1644 : }
1645 :
1646 1025 : num_completions = spdk_nvme_poll_group_process_completions(group->group, 0,
1647 : bdev_nvme_disconnected_qpair_cb);
1648 1025 : if (group->collect_spin_stat) {
1649 0 : if (num_completions > 0) {
1650 0 : if (group->end_ticks != 0) {
1651 0 : group->spin_ticks += (group->end_ticks - group->start_ticks);
1652 0 : group->end_ticks = 0;
1653 : }
1654 0 : group->start_ticks = 0;
1655 : } else {
1656 0 : group->end_ticks = spdk_get_ticks();
1657 : }
1658 : }
1659 :
1660 1025 : if (spdk_unlikely(num_completions < 0)) {
1661 0 : bdev_nvme_check_io_qpairs(group);
1662 : }
1663 :
1664 1025 : return num_completions > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
1665 : }
1666 :
1667 : static int bdev_nvme_poll_adminq(void *arg);
1668 :
1669 : static void
1670 100 : bdev_nvme_change_adminq_poll_period(struct nvme_ctrlr *nvme_ctrlr, uint64_t new_period_us)
1671 : {
1672 100 : spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
1673 :
1674 100 : nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq,
1675 : nvme_ctrlr, new_period_us);
1676 100 : }
1677 :
1678 : static int
1679 146 : bdev_nvme_poll_adminq(void *arg)
1680 : {
1681 : int32_t rc;
1682 146 : struct nvme_ctrlr *nvme_ctrlr = arg;
1683 : nvme_ctrlr_disconnected_cb disconnected_cb;
1684 :
1685 146 : assert(nvme_ctrlr != NULL);
1686 :
1687 146 : rc = spdk_nvme_ctrlr_process_admin_completions(nvme_ctrlr->ctrlr);
1688 146 : if (rc < 0) {
1689 53 : disconnected_cb = nvme_ctrlr->disconnected_cb;
1690 53 : nvme_ctrlr->disconnected_cb = NULL;
1691 :
1692 53 : if (disconnected_cb != NULL) {
1693 50 : bdev_nvme_change_adminq_poll_period(nvme_ctrlr,
1694 : g_opts.nvme_adminq_poll_period_us);
1695 50 : disconnected_cb(nvme_ctrlr);
1696 : } else {
1697 3 : bdev_nvme_failover_ctrlr(nvme_ctrlr);
1698 : }
1699 93 : } else if (spdk_nvme_ctrlr_get_admin_qp_failure_reason(nvme_ctrlr->ctrlr) !=
1700 : SPDK_NVME_QPAIR_FAILURE_NONE) {
1701 0 : bdev_nvme_clear_io_path_caches(nvme_ctrlr);
1702 : }
1703 :
1704 146 : return rc == 0 ? SPDK_POLLER_IDLE : SPDK_POLLER_BUSY;
1705 : }
1706 :
1707 : static void
1708 37 : nvme_bdev_free(void *io_device)
1709 : {
1710 37 : struct nvme_bdev *nvme_disk = io_device;
1711 :
1712 37 : pthread_mutex_destroy(&nvme_disk->mutex);
1713 37 : free(nvme_disk->disk.name);
1714 37 : free(nvme_disk->err_stat);
1715 37 : free(nvme_disk);
1716 37 : }
1717 :
1718 : static int
1719 36 : bdev_nvme_destruct(void *ctx)
1720 : {
1721 36 : struct nvme_bdev *nvme_disk = ctx;
1722 : struct nvme_ns *nvme_ns, *tmp_nvme_ns;
1723 :
1724 : SPDK_DTRACE_PROBE2(bdev_nvme_destruct, nvme_disk->nbdev_ctrlr->name, nvme_disk->nsid);
1725 :
1726 73 : TAILQ_FOREACH_SAFE(nvme_ns, &nvme_disk->nvme_ns_list, tailq, tmp_nvme_ns) {
1727 37 : pthread_mutex_lock(&nvme_ns->ctrlr->mutex);
1728 :
1729 37 : nvme_ns->bdev = NULL;
1730 :
1731 37 : assert(nvme_ns->id > 0);
1732 :
1733 37 : if (nvme_ctrlr_get_ns(nvme_ns->ctrlr, nvme_ns->id) == NULL) {
1734 0 : pthread_mutex_unlock(&nvme_ns->ctrlr->mutex);
1735 :
1736 0 : nvme_ctrlr_release(nvme_ns->ctrlr);
1737 0 : nvme_ns_free(nvme_ns);
1738 : } else {
1739 37 : pthread_mutex_unlock(&nvme_ns->ctrlr->mutex);
1740 : }
1741 : }
1742 :
1743 36 : pthread_mutex_lock(&g_bdev_nvme_mutex);
1744 36 : TAILQ_REMOVE(&nvme_disk->nbdev_ctrlr->bdevs, nvme_disk, tailq);
1745 36 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
1746 :
1747 36 : spdk_io_device_unregister(nvme_disk, nvme_bdev_free);
1748 :
1749 36 : return 0;
1750 : }
1751 :
1752 : static int
1753 100 : bdev_nvme_create_qpair(struct nvme_qpair *nvme_qpair)
1754 : {
1755 : struct nvme_ctrlr *nvme_ctrlr;
1756 100 : struct spdk_nvme_io_qpair_opts opts;
1757 : struct spdk_nvme_qpair *qpair;
1758 : int rc;
1759 :
1760 100 : nvme_ctrlr = nvme_qpair->ctrlr;
1761 :
1762 100 : spdk_nvme_ctrlr_get_default_io_qpair_opts(nvme_ctrlr->ctrlr, &opts, sizeof(opts));
1763 100 : opts.delay_cmd_submit = g_opts.delay_cmd_submit;
1764 100 : opts.create_only = true;
1765 100 : opts.async_mode = true;
1766 100 : opts.io_queue_requests = spdk_max(g_opts.io_queue_requests, opts.io_queue_requests);
1767 100 : g_opts.io_queue_requests = opts.io_queue_requests;
1768 :
1769 100 : qpair = spdk_nvme_ctrlr_alloc_io_qpair(nvme_ctrlr->ctrlr, &opts, sizeof(opts));
1770 100 : if (qpair == NULL) {
1771 0 : return -1;
1772 : }
1773 :
1774 : SPDK_DTRACE_PROBE3(bdev_nvme_create_qpair, nvme_ctrlr->nbdev_ctrlr->name,
1775 : spdk_nvme_qpair_get_id(qpair), spdk_thread_get_id(nvme_ctrlr->thread));
1776 :
1777 100 : assert(nvme_qpair->group != NULL);
1778 :
1779 100 : rc = spdk_nvme_poll_group_add(nvme_qpair->group->group, qpair);
1780 100 : if (rc != 0) {
1781 0 : SPDK_ERRLOG("Unable to begin polling on NVMe Channel.\n");
1782 0 : goto err;
1783 : }
1784 :
1785 100 : rc = spdk_nvme_ctrlr_connect_io_qpair(nvme_ctrlr->ctrlr, qpair);
1786 100 : if (rc != 0) {
1787 0 : SPDK_ERRLOG("Unable to connect I/O qpair.\n");
1788 0 : goto err;
1789 : }
1790 :
1791 100 : nvme_qpair->qpair = qpair;
1792 :
1793 100 : if (!g_opts.disable_auto_failback) {
1794 71 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
1795 : }
1796 :
1797 100 : return 0;
1798 :
1799 0 : err:
1800 0 : spdk_nvme_ctrlr_free_io_qpair(qpair);
1801 :
1802 0 : return rc;
1803 : }
1804 :
1805 : static void bdev_nvme_reset_io_continue(void *cb_arg, int rc);
1806 :
1807 : static void
1808 82 : bdev_nvme_complete_pending_resets(struct spdk_io_channel_iter *i)
1809 : {
1810 82 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
1811 82 : struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(_ch);
1812 82 : int rc = 0;
1813 : struct spdk_bdev_io *bdev_io;
1814 : struct nvme_bdev_io *bio;
1815 :
1816 82 : if (spdk_io_channel_iter_get_ctx(i) != NULL) {
1817 35 : rc = -1;
1818 : }
1819 :
1820 86 : while (!TAILQ_EMPTY(&ctrlr_ch->pending_resets)) {
1821 4 : bdev_io = TAILQ_FIRST(&ctrlr_ch->pending_resets);
1822 4 : TAILQ_REMOVE(&ctrlr_ch->pending_resets, bdev_io, module_link);
1823 :
1824 4 : bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
1825 4 : bdev_nvme_reset_io_continue(bio, rc);
1826 : }
1827 :
1828 82 : spdk_for_each_channel_continue(i, 0);
1829 82 : }
1830 :
1831 : /* This function marks the current trid as failed by storing the current ticks
1832 : * and then sets the next trid to the active trid within a controller if exists.
1833 : *
1834 : * The purpose of the boolean return value is to request the caller to disconnect
1835 : * the current trid now to try connecting the next trid.
1836 : */
1837 : static bool
1838 36 : bdev_nvme_failover_trid(struct nvme_ctrlr *nvme_ctrlr, bool remove, bool start)
1839 : {
1840 : struct nvme_path_id *path_id, *next_path;
1841 : int rc __attribute__((unused));
1842 :
1843 36 : path_id = TAILQ_FIRST(&nvme_ctrlr->trids);
1844 36 : assert(path_id);
1845 36 : assert(path_id == nvme_ctrlr->active_path_id);
1846 36 : next_path = TAILQ_NEXT(path_id, link);
1847 :
1848 : /* Update the last failed time. It means the trid is failed if its last
1849 : * failed time is non-zero.
1850 : */
1851 36 : path_id->last_failed_tsc = spdk_get_ticks();
1852 :
1853 36 : if (next_path == NULL) {
1854 : /* There is no alternate trid within a controller. */
1855 25 : return false;
1856 : }
1857 :
1858 11 : if (!start && nvme_ctrlr->opts.reconnect_delay_sec == 0) {
1859 : /* Connect is not retried in a controller reset sequence. Connecting
1860 : * the next trid will be done by the next bdev_nvme_failover_ctrlr() call.
1861 : */
1862 3 : return false;
1863 : }
1864 :
1865 8 : assert(path_id->trid.trtype != SPDK_NVME_TRANSPORT_PCIE);
1866 :
1867 8 : SPDK_NOTICELOG("Start failover from %s:%s to %s:%s\n", path_id->trid.traddr,
1868 : path_id->trid.trsvcid, next_path->trid.traddr, next_path->trid.trsvcid);
1869 :
1870 8 : spdk_nvme_ctrlr_fail(nvme_ctrlr->ctrlr);
1871 8 : nvme_ctrlr->active_path_id = next_path;
1872 8 : rc = spdk_nvme_ctrlr_set_trid(nvme_ctrlr->ctrlr, &next_path->trid);
1873 8 : assert(rc == 0);
1874 8 : TAILQ_REMOVE(&nvme_ctrlr->trids, path_id, link);
1875 8 : if (!remove) {
1876 : /** Shuffle the old trid to the end of the list and use the new one.
1877 : * Allows for round robin through multiple connections.
1878 : */
1879 6 : TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, path_id, link);
1880 : } else {
1881 2 : free(path_id);
1882 : }
1883 :
1884 8 : if (start || next_path->last_failed_tsc == 0) {
1885 : /* bdev_nvme_failover_ctrlr() is just called or the next trid is not failed
1886 : * or used yet. Try the next trid now.
1887 : */
1888 7 : return true;
1889 : }
1890 :
1891 1 : if (spdk_get_ticks() > next_path->last_failed_tsc + spdk_get_ticks_hz() *
1892 1 : nvme_ctrlr->opts.reconnect_delay_sec) {
1893 : /* Enough backoff passed since the next trid failed. Try the next trid now. */
1894 0 : return true;
1895 : }
1896 :
1897 : /* The next trid will be tried after reconnect_delay_sec seconds. */
1898 1 : return false;
1899 : }
1900 :
1901 : static bool
1902 68 : bdev_nvme_check_ctrlr_loss_timeout(struct nvme_ctrlr *nvme_ctrlr)
1903 : {
1904 : int32_t elapsed;
1905 :
1906 68 : if (nvme_ctrlr->opts.ctrlr_loss_timeout_sec == 0 ||
1907 36 : nvme_ctrlr->opts.ctrlr_loss_timeout_sec == -1) {
1908 42 : return false;
1909 : }
1910 :
1911 26 : elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz();
1912 26 : if (elapsed >= nvme_ctrlr->opts.ctrlr_loss_timeout_sec) {
1913 6 : return true;
1914 : } else {
1915 20 : return false;
1916 : }
1917 : }
1918 :
1919 : static bool
1920 12 : bdev_nvme_check_fast_io_fail_timeout(struct nvme_ctrlr *nvme_ctrlr)
1921 : {
1922 : uint32_t elapsed;
1923 :
1924 12 : if (nvme_ctrlr->opts.fast_io_fail_timeout_sec == 0) {
1925 8 : return false;
1926 : }
1927 :
1928 4 : elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz();
1929 4 : if (elapsed >= nvme_ctrlr->opts.fast_io_fail_timeout_sec) {
1930 2 : return true;
1931 : } else {
1932 2 : return false;
1933 : }
1934 : }
1935 :
1936 : static void bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success);
1937 :
1938 : static void
1939 51 : nvme_ctrlr_disconnect(struct nvme_ctrlr *nvme_ctrlr, nvme_ctrlr_disconnected_cb cb_fn)
1940 : {
1941 : int rc;
1942 :
1943 51 : rc = spdk_nvme_ctrlr_disconnect(nvme_ctrlr->ctrlr);
1944 51 : if (rc != 0) {
1945 : /* Disconnect fails if ctrlr is already resetting or removed. In this case,
1946 : * fail the reset sequence immediately.
1947 : */
1948 1 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
1949 1 : return;
1950 : }
1951 :
1952 : /* spdk_nvme_ctrlr_disconnect() may complete asynchronously later by polling adminq.
1953 : * Set callback here to execute the specified operation after ctrlr is really disconnected.
1954 : */
1955 50 : assert(nvme_ctrlr->disconnected_cb == NULL);
1956 50 : nvme_ctrlr->disconnected_cb = cb_fn;
1957 :
1958 : /* During disconnection, reduce the period to poll adminq more often. */
1959 50 : bdev_nvme_change_adminq_poll_period(nvme_ctrlr, 0);
1960 : }
1961 :
1962 : enum bdev_nvme_op_after_reset {
1963 : OP_NONE,
1964 : OP_COMPLETE_PENDING_DESTRUCT,
1965 : OP_DESTRUCT,
1966 : OP_DELAYED_RECONNECT,
1967 : OP_FAILOVER,
1968 : };
1969 :
1970 : typedef enum bdev_nvme_op_after_reset _bdev_nvme_op_after_reset;
1971 :
1972 : static _bdev_nvme_op_after_reset
1973 50 : bdev_nvme_check_op_after_reset(struct nvme_ctrlr *nvme_ctrlr, bool success)
1974 : {
1975 50 : if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
1976 : /* Complete pending destruct after reset completes. */
1977 0 : return OP_COMPLETE_PENDING_DESTRUCT;
1978 50 : } else if (nvme_ctrlr->pending_failover) {
1979 3 : nvme_ctrlr->pending_failover = false;
1980 3 : nvme_ctrlr->reset_start_tsc = 0;
1981 3 : return OP_FAILOVER;
1982 47 : } else if (success || nvme_ctrlr->opts.reconnect_delay_sec == 0) {
1983 33 : nvme_ctrlr->reset_start_tsc = 0;
1984 33 : return OP_NONE;
1985 14 : } else if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
1986 2 : return OP_DESTRUCT;
1987 : } else {
1988 12 : if (bdev_nvme_check_fast_io_fail_timeout(nvme_ctrlr)) {
1989 2 : nvme_ctrlr->fast_io_fail_timedout = true;
1990 : }
1991 12 : return OP_DELAYED_RECONNECT;
1992 : }
1993 : }
1994 :
1995 : static int bdev_nvme_delete_ctrlr(struct nvme_ctrlr *nvme_ctrlr, bool hotplug);
1996 : static void bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
1997 :
1998 : static int
1999 9 : bdev_nvme_reconnect_delay_timer_expired(void *ctx)
2000 : {
2001 9 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2002 :
2003 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect_delay, nvme_ctrlr->nbdev_ctrlr->name);
2004 9 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2005 :
2006 9 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
2007 :
2008 9 : if (!nvme_ctrlr->reconnect_is_delayed) {
2009 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2010 0 : return SPDK_POLLER_BUSY;
2011 : }
2012 :
2013 9 : nvme_ctrlr->reconnect_is_delayed = false;
2014 :
2015 9 : if (nvme_ctrlr->destruct) {
2016 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2017 0 : return SPDK_POLLER_BUSY;
2018 : }
2019 :
2020 9 : assert(nvme_ctrlr->resetting == false);
2021 9 : nvme_ctrlr->resetting = true;
2022 :
2023 9 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2024 :
2025 9 : spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
2026 :
2027 9 : bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
2028 9 : return SPDK_POLLER_BUSY;
2029 : }
2030 :
2031 : static void
2032 12 : bdev_nvme_start_reconnect_delay_timer(struct nvme_ctrlr *nvme_ctrlr)
2033 : {
2034 12 : spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);
2035 :
2036 12 : assert(nvme_ctrlr->reconnect_is_delayed == false);
2037 12 : nvme_ctrlr->reconnect_is_delayed = true;
2038 :
2039 12 : assert(nvme_ctrlr->reconnect_delay_timer == NULL);
2040 12 : nvme_ctrlr->reconnect_delay_timer = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_delay_timer_expired,
2041 : nvme_ctrlr,
2042 : nvme_ctrlr->opts.reconnect_delay_sec * SPDK_SEC_TO_USEC);
2043 12 : }
2044 :
2045 : static void remove_discovery_entry(struct nvme_ctrlr *nvme_ctrlr);
2046 :
2047 : static void
2048 48 : _bdev_nvme_reset_ctrlr_complete(struct spdk_io_channel_iter *i, int status)
2049 : {
2050 48 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2051 48 : bool success = spdk_io_channel_iter_get_ctx(i) == NULL;
2052 48 : bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
2053 48 : void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
2054 : enum bdev_nvme_op_after_reset op_after_reset;
2055 :
2056 48 : assert(nvme_ctrlr->thread == spdk_get_thread());
2057 :
2058 48 : nvme_ctrlr->ctrlr_op_cb_fn = NULL;
2059 48 : nvme_ctrlr->ctrlr_op_cb_arg = NULL;
2060 :
2061 48 : if (!success) {
2062 21 : SPDK_ERRLOG("Resetting controller failed.\n");
2063 : } else {
2064 27 : SPDK_NOTICELOG("Resetting controller successful.\n");
2065 : }
2066 :
2067 48 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2068 48 : nvme_ctrlr->resetting = false;
2069 48 : nvme_ctrlr->dont_retry = false;
2070 48 : nvme_ctrlr->in_failover = false;
2071 :
2072 48 : op_after_reset = bdev_nvme_check_op_after_reset(nvme_ctrlr, success);
2073 48 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2074 :
2075 : /* Delay callbacks when the next operation is a failover. */
2076 48 : if (ctrlr_op_cb_fn && op_after_reset != OP_FAILOVER) {
2077 10 : ctrlr_op_cb_fn(ctrlr_op_cb_arg, success ? 0 : -1);
2078 : }
2079 :
2080 48 : switch (op_after_reset) {
2081 0 : case OP_COMPLETE_PENDING_DESTRUCT:
2082 0 : nvme_ctrlr_unregister(nvme_ctrlr);
2083 0 : break;
2084 2 : case OP_DESTRUCT:
2085 2 : bdev_nvme_delete_ctrlr(nvme_ctrlr, false);
2086 2 : remove_discovery_entry(nvme_ctrlr);
2087 2 : break;
2088 12 : case OP_DELAYED_RECONNECT:
2089 12 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_start_reconnect_delay_timer);
2090 12 : break;
2091 3 : case OP_FAILOVER:
2092 3 : nvme_ctrlr->ctrlr_op_cb_fn = ctrlr_op_cb_fn;
2093 3 : nvme_ctrlr->ctrlr_op_cb_arg = ctrlr_op_cb_arg;
2094 3 : bdev_nvme_failover_ctrlr(nvme_ctrlr);
2095 3 : break;
2096 31 : default:
2097 31 : break;
2098 : }
2099 48 : }
2100 :
2101 : static void
2102 50 : bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success)
2103 : {
2104 50 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2105 50 : if (!success) {
2106 : /* Connecting the active trid failed. Set the next alternate trid to the
2107 : * active trid if it exists.
2108 : */
2109 23 : if (bdev_nvme_failover_trid(nvme_ctrlr, false, false)) {
2110 : /* The next alternate trid exists and is ready to try. Try it now. */
2111 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2112 :
2113 2 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reconnect_ctrlr);
2114 2 : return;
2115 : }
2116 :
2117 : /* We came here if there is no alternate trid or if the next trid exists but
2118 : * is not ready to try. We will try the active trid after reconnect_delay_sec
2119 : * seconds if it is non-zero or at the next reset call otherwise.
2120 : */
2121 : } else {
2122 : /* Connecting the active trid succeeded. Clear the last failed time because it
2123 : * means the trid is failed if its last failed time is non-zero.
2124 : */
2125 27 : nvme_ctrlr->active_path_id->last_failed_tsc = 0;
2126 : }
2127 48 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2128 :
2129 : /* Make sure we clear any pending resets before returning. */
2130 48 : spdk_for_each_channel(nvme_ctrlr,
2131 : bdev_nvme_complete_pending_resets,
2132 : success ? NULL : (void *)0x1,
2133 : _bdev_nvme_reset_ctrlr_complete);
2134 : }
2135 :
2136 : static void
2137 0 : bdev_nvme_reset_create_qpairs_failed(struct spdk_io_channel_iter *i, int status)
2138 : {
2139 0 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2140 :
2141 0 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
2142 0 : }
2143 :
2144 : static void
2145 62 : bdev_nvme_reset_destroy_qpair(struct spdk_io_channel_iter *i)
2146 : {
2147 62 : struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
2148 62 : struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(ch);
2149 : struct nvme_qpair *nvme_qpair;
2150 :
2151 62 : nvme_qpair = ctrlr_ch->qpair;
2152 62 : assert(nvme_qpair != NULL);
2153 :
2154 62 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
2155 :
2156 62 : if (nvme_qpair->qpair != NULL) {
2157 52 : if (nvme_qpair->ctrlr->dont_retry) {
2158 39 : spdk_nvme_qpair_set_abort_dnr(nvme_qpair->qpair, true);
2159 : }
2160 52 : spdk_nvme_ctrlr_disconnect_io_qpair(nvme_qpair->qpair);
2161 :
2162 : /* The current full reset sequence will move to the next
2163 : * ctrlr_channel after the qpair is actually disconnected.
2164 : */
2165 52 : assert(ctrlr_ch->reset_iter == NULL);
2166 52 : ctrlr_ch->reset_iter = i;
2167 : } else {
2168 10 : spdk_for_each_channel_continue(i, 0);
2169 : }
2170 62 : }
2171 :
2172 : static void
2173 27 : bdev_nvme_reset_create_qpairs_done(struct spdk_io_channel_iter *i, int status)
2174 : {
2175 27 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2176 :
2177 27 : if (status == 0) {
2178 27 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, true);
2179 : } else {
2180 : /* Delete the added qpairs and quiesce ctrlr to make the states clean. */
2181 0 : spdk_for_each_channel(nvme_ctrlr,
2182 : bdev_nvme_reset_destroy_qpair,
2183 : NULL,
2184 : bdev_nvme_reset_create_qpairs_failed);
2185 : }
2186 27 : }
2187 :
2188 : static int
2189 43 : bdev_nvme_reset_check_qpair_connected(void *ctx)
2190 : {
2191 43 : struct nvme_ctrlr_channel *ctrlr_ch = ctx;
2192 :
2193 43 : if (ctrlr_ch->reset_iter == NULL) {
2194 : /* qpair was already failed to connect and the reset sequence is being aborted. */
2195 0 : assert(ctrlr_ch->connect_poller == NULL);
2196 0 : assert(ctrlr_ch->qpair->qpair == NULL);
2197 0 : return SPDK_POLLER_BUSY;
2198 : }
2199 :
2200 43 : assert(ctrlr_ch->qpair->qpair != NULL);
2201 :
2202 43 : if (!spdk_nvme_qpair_is_connected(ctrlr_ch->qpair->qpair)) {
2203 0 : return SPDK_POLLER_BUSY;
2204 : }
2205 :
2206 43 : spdk_poller_unregister(&ctrlr_ch->connect_poller);
2207 :
2208 : /* qpair was completed to connect. Move to the next ctrlr_channel */
2209 43 : spdk_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
2210 43 : ctrlr_ch->reset_iter = NULL;
2211 :
2212 43 : if (!g_opts.disable_auto_failback) {
2213 30 : _bdev_nvme_clear_io_path_cache(ctrlr_ch->qpair);
2214 : }
2215 :
2216 43 : return SPDK_POLLER_BUSY;
2217 : }
2218 :
2219 : static void
2220 43 : bdev_nvme_reset_create_qpair(struct spdk_io_channel_iter *i)
2221 : {
2222 43 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
2223 43 : struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(_ch);
2224 : int rc;
2225 :
2226 43 : rc = bdev_nvme_create_qpair(ctrlr_ch->qpair);
2227 43 : if (rc == 0) {
2228 43 : ctrlr_ch->connect_poller = SPDK_POLLER_REGISTER(bdev_nvme_reset_check_qpair_connected,
2229 : ctrlr_ch, 0);
2230 :
2231 : /* The current full reset sequence will move to the next
2232 : * ctrlr_channel after the qpair is actually connected.
2233 : */
2234 43 : assert(ctrlr_ch->reset_iter == NULL);
2235 43 : ctrlr_ch->reset_iter = i;
2236 : } else {
2237 0 : spdk_for_each_channel_continue(i, rc);
2238 : }
2239 43 : }
2240 :
2241 : static void
2242 27 : nvme_ctrlr_check_namespaces(struct nvme_ctrlr *nvme_ctrlr)
2243 : {
2244 27 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
2245 : struct nvme_ns *nvme_ns;
2246 :
2247 39 : for (nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
2248 : nvme_ns != NULL;
2249 12 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns)) {
2250 12 : if (!spdk_nvme_ctrlr_is_active_ns(ctrlr, nvme_ns->id)) {
2251 1 : SPDK_DEBUGLOG(bdev_nvme, "NSID %u was removed during reset.\n", nvme_ns->id);
2252 : /* NS can be added again. Just nullify nvme_ns->ns. */
2253 1 : nvme_ns->ns = NULL;
2254 : }
2255 : }
2256 27 : }
2257 :
2258 :
2259 : static int
2260 49 : bdev_nvme_reconnect_ctrlr_poll(void *arg)
2261 : {
2262 49 : struct nvme_ctrlr *nvme_ctrlr = arg;
2263 49 : int rc = -ETIMEDOUT;
2264 :
2265 49 : if (!bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
2266 47 : rc = spdk_nvme_ctrlr_reconnect_poll_async(nvme_ctrlr->ctrlr);
2267 47 : if (rc == -EAGAIN) {
2268 0 : return SPDK_POLLER_BUSY;
2269 : }
2270 : }
2271 :
2272 49 : spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
2273 49 : if (rc == 0) {
2274 27 : nvme_ctrlr_check_namespaces(nvme_ctrlr);
2275 :
2276 : /* Recreate all of the I/O queue pairs */
2277 27 : spdk_for_each_channel(nvme_ctrlr,
2278 : bdev_nvme_reset_create_qpair,
2279 : NULL,
2280 : bdev_nvme_reset_create_qpairs_done);
2281 : } else {
2282 22 : bdev_nvme_reset_ctrlr_complete(nvme_ctrlr, false);
2283 : }
2284 49 : return SPDK_POLLER_BUSY;
2285 : }
2286 :
2287 : static void
2288 49 : bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2289 : {
2290 49 : spdk_nvme_ctrlr_reconnect_async(nvme_ctrlr->ctrlr);
2291 :
2292 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reconnect, nvme_ctrlr->nbdev_ctrlr->name);
2293 49 : assert(nvme_ctrlr->reset_detach_poller == NULL);
2294 49 : nvme_ctrlr->reset_detach_poller = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_ctrlr_poll,
2295 : nvme_ctrlr, 0);
2296 49 : }
2297 :
2298 : static void
2299 36 : bdev_nvme_reset_destroy_qpair_done(struct spdk_io_channel_iter *i, int status)
2300 : {
2301 36 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2302 :
2303 : SPDK_DTRACE_PROBE1(bdev_nvme_ctrlr_reset, nvme_ctrlr->nbdev_ctrlr->name);
2304 36 : assert(status == 0);
2305 :
2306 36 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2307 0 : bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
2308 : } else {
2309 36 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reconnect_ctrlr);
2310 : }
2311 36 : }
2312 :
2313 : static void
2314 36 : bdev_nvme_reset_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
2315 : {
2316 36 : spdk_for_each_channel(nvme_ctrlr,
2317 : bdev_nvme_reset_destroy_qpair,
2318 : NULL,
2319 : bdev_nvme_reset_destroy_qpair_done);
2320 36 : }
2321 :
2322 : static void
2323 3 : bdev_nvme_reconnect_ctrlr_now(void *ctx)
2324 : {
2325 3 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2326 :
2327 3 : assert(nvme_ctrlr->resetting == true);
2328 3 : assert(nvme_ctrlr->thread == spdk_get_thread());
2329 :
2330 3 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
2331 :
2332 3 : spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
2333 :
2334 3 : bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
2335 3 : }
2336 :
2337 : static void
2338 36 : _bdev_nvme_reset_ctrlr(void *ctx)
2339 : {
2340 36 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2341 :
2342 36 : assert(nvme_ctrlr->resetting == true);
2343 36 : assert(nvme_ctrlr->thread == spdk_get_thread());
2344 :
2345 36 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2346 0 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_reset_destroy_qpairs);
2347 : } else {
2348 36 : bdev_nvme_reset_destroy_qpairs(nvme_ctrlr);
2349 : }
2350 36 : }
2351 :
2352 : static int
2353 34 : bdev_nvme_reset_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2354 : {
2355 : spdk_msg_fn msg_fn;
2356 :
2357 34 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2358 34 : if (nvme_ctrlr->destruct) {
2359 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2360 3 : return -ENXIO;
2361 : }
2362 :
2363 31 : if (nvme_ctrlr->resetting) {
2364 6 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2365 6 : SPDK_NOTICELOG("Unable to perform reset, already in progress.\n");
2366 6 : return -EBUSY;
2367 : }
2368 :
2369 25 : if (nvme_ctrlr->disabled) {
2370 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2371 0 : SPDK_NOTICELOG("Unable to perform reset. Controller is disabled.\n");
2372 0 : return -EALREADY;
2373 : }
2374 :
2375 25 : nvme_ctrlr->resetting = true;
2376 25 : nvme_ctrlr->dont_retry = true;
2377 :
2378 25 : if (nvme_ctrlr->reconnect_is_delayed) {
2379 1 : SPDK_DEBUGLOG(bdev_nvme, "Reconnect is already scheduled.\n");
2380 1 : msg_fn = bdev_nvme_reconnect_ctrlr_now;
2381 1 : nvme_ctrlr->reconnect_is_delayed = false;
2382 : } else {
2383 24 : msg_fn = _bdev_nvme_reset_ctrlr;
2384 24 : assert(nvme_ctrlr->reset_start_tsc == 0);
2385 : }
2386 :
2387 25 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
2388 :
2389 25 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2390 :
2391 25 : spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
2392 25 : return 0;
2393 : }
2394 :
2395 : static int
2396 3 : bdev_nvme_enable_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2397 : {
2398 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2399 3 : if (nvme_ctrlr->destruct) {
2400 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2401 0 : return -ENXIO;
2402 : }
2403 :
2404 3 : if (nvme_ctrlr->resetting) {
2405 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2406 0 : return -EBUSY;
2407 : }
2408 :
2409 3 : if (!nvme_ctrlr->disabled) {
2410 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2411 1 : return -EALREADY;
2412 : }
2413 :
2414 2 : nvme_ctrlr->disabled = false;
2415 2 : nvme_ctrlr->resetting = true;
2416 :
2417 2 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
2418 :
2419 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2420 :
2421 2 : spdk_thread_send_msg(nvme_ctrlr->thread, bdev_nvme_reconnect_ctrlr_now, nvme_ctrlr);
2422 2 : return 0;
2423 : }
2424 :
2425 : static void
2426 2 : _bdev_nvme_disable_ctrlr_complete(struct spdk_io_channel_iter *i, int status)
2427 : {
2428 2 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2429 2 : bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
2430 2 : void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
2431 : enum bdev_nvme_op_after_reset op_after_disable;
2432 :
2433 2 : assert(nvme_ctrlr->thread == spdk_get_thread());
2434 :
2435 2 : nvme_ctrlr->ctrlr_op_cb_fn = NULL;
2436 2 : nvme_ctrlr->ctrlr_op_cb_arg = NULL;
2437 :
2438 2 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2439 :
2440 2 : nvme_ctrlr->resetting = false;
2441 2 : nvme_ctrlr->dont_retry = false;
2442 :
2443 2 : op_after_disable = bdev_nvme_check_op_after_reset(nvme_ctrlr, true);
2444 :
2445 2 : nvme_ctrlr->disabled = true;
2446 2 : spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);
2447 :
2448 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2449 :
2450 2 : if (ctrlr_op_cb_fn) {
2451 0 : ctrlr_op_cb_fn(ctrlr_op_cb_arg, 0);
2452 : }
2453 :
2454 2 : switch (op_after_disable) {
2455 0 : case OP_COMPLETE_PENDING_DESTRUCT:
2456 0 : nvme_ctrlr_unregister(nvme_ctrlr);
2457 0 : break;
2458 2 : default:
2459 2 : break;
2460 : }
2461 :
2462 2 : }
2463 :
2464 : static void
2465 2 : bdev_nvme_disable_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr)
2466 : {
2467 : /* Make sure we clear any pending resets before returning. */
2468 2 : spdk_for_each_channel(nvme_ctrlr,
2469 : bdev_nvme_complete_pending_resets,
2470 : NULL,
2471 : _bdev_nvme_disable_ctrlr_complete);
2472 2 : }
2473 :
2474 : static void
2475 1 : bdev_nvme_disable_destroy_qpairs_done(struct spdk_io_channel_iter *i, int status)
2476 : {
2477 1 : struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
2478 :
2479 1 : assert(status == 0);
2480 :
2481 1 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2482 0 : bdev_nvme_disable_ctrlr_complete(nvme_ctrlr);
2483 : } else {
2484 1 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_disable_ctrlr_complete);
2485 : }
2486 1 : }
2487 :
2488 : static void
2489 1 : bdev_nvme_disable_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
2490 : {
2491 1 : spdk_for_each_channel(nvme_ctrlr,
2492 : bdev_nvme_reset_destroy_qpair,
2493 : NULL,
2494 : bdev_nvme_disable_destroy_qpairs_done);
2495 1 : }
2496 :
2497 : static void
2498 1 : _bdev_nvme_cancel_reconnect_and_disable_ctrlr(void *ctx)
2499 : {
2500 1 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2501 :
2502 1 : assert(nvme_ctrlr->resetting == true);
2503 1 : assert(nvme_ctrlr->thread == spdk_get_thread());
2504 :
2505 1 : spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
2506 :
2507 1 : bdev_nvme_disable_ctrlr_complete(nvme_ctrlr);
2508 1 : }
2509 :
2510 : static void
2511 1 : _bdev_nvme_disconnect_and_disable_ctrlr(void *ctx)
2512 : {
2513 1 : struct nvme_ctrlr *nvme_ctrlr = ctx;
2514 :
2515 1 : assert(nvme_ctrlr->resetting == true);
2516 1 : assert(nvme_ctrlr->thread == spdk_get_thread());
2517 :
2518 1 : if (!spdk_nvme_ctrlr_is_fabrics(nvme_ctrlr->ctrlr)) {
2519 0 : nvme_ctrlr_disconnect(nvme_ctrlr, bdev_nvme_disable_destroy_qpairs);
2520 : } else {
2521 1 : bdev_nvme_disable_destroy_qpairs(nvme_ctrlr);
2522 : }
2523 1 : }
2524 :
2525 : static int
2526 5 : bdev_nvme_disable_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2527 : {
2528 : spdk_msg_fn msg_fn;
2529 :
2530 5 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2531 5 : if (nvme_ctrlr->destruct) {
2532 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2533 1 : return -ENXIO;
2534 : }
2535 :
2536 4 : if (nvme_ctrlr->resetting) {
2537 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2538 1 : return -EBUSY;
2539 : }
2540 :
2541 3 : if (nvme_ctrlr->disabled) {
2542 1 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2543 1 : return -EALREADY;
2544 : }
2545 :
2546 2 : nvme_ctrlr->resetting = true;
2547 2 : nvme_ctrlr->dont_retry = true;
2548 :
2549 2 : if (nvme_ctrlr->reconnect_is_delayed) {
2550 1 : msg_fn = _bdev_nvme_cancel_reconnect_and_disable_ctrlr;
2551 1 : nvme_ctrlr->reconnect_is_delayed = false;
2552 : } else {
2553 1 : msg_fn = _bdev_nvme_disconnect_and_disable_ctrlr;
2554 : }
2555 :
2556 2 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
2557 :
2558 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2559 :
2560 2 : spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
2561 2 : return 0;
2562 : }
2563 :
2564 : static int
2565 16 : nvme_ctrlr_op(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op,
2566 : bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
2567 : {
2568 : int rc;
2569 :
2570 16 : switch (op) {
2571 15 : case NVME_CTRLR_OP_RESET:
2572 15 : rc = bdev_nvme_reset_ctrlr(nvme_ctrlr);
2573 15 : break;
2574 0 : case NVME_CTRLR_OP_ENABLE:
2575 0 : rc = bdev_nvme_enable_ctrlr(nvme_ctrlr);
2576 0 : break;
2577 0 : case NVME_CTRLR_OP_DISABLE:
2578 0 : rc = bdev_nvme_disable_ctrlr(nvme_ctrlr);
2579 0 : break;
2580 1 : default:
2581 1 : rc = -EINVAL;
2582 1 : break;
2583 : }
2584 :
2585 16 : if (rc == 0) {
2586 9 : assert(nvme_ctrlr->ctrlr_op_cb_fn == NULL);
2587 9 : assert(nvme_ctrlr->ctrlr_op_cb_arg == NULL);
2588 9 : nvme_ctrlr->ctrlr_op_cb_fn = cb_fn;
2589 9 : nvme_ctrlr->ctrlr_op_cb_arg = cb_arg;
2590 : }
2591 16 : return rc;
2592 : }
2593 :
2594 : struct nvme_ctrlr_op_rpc_ctx {
2595 : struct nvme_ctrlr *nvme_ctrlr;
2596 : struct spdk_thread *orig_thread;
2597 : enum nvme_ctrlr_op op;
2598 : int rc;
2599 : bdev_nvme_ctrlr_op_cb cb_fn;
2600 : void *cb_arg;
2601 : };
2602 :
2603 : static void
2604 4 : _nvme_ctrlr_op_rpc_complete(void *_ctx)
2605 : {
2606 4 : struct nvme_ctrlr_op_rpc_ctx *ctx = _ctx;
2607 :
2608 4 : assert(ctx != NULL);
2609 4 : assert(ctx->cb_fn != NULL);
2610 :
2611 4 : ctx->cb_fn(ctx->cb_arg, ctx->rc);
2612 :
2613 4 : free(ctx);
2614 4 : }
2615 :
2616 : static void
2617 4 : nvme_ctrlr_op_rpc_complete(void *cb_arg, int rc)
2618 : {
2619 4 : struct nvme_ctrlr_op_rpc_ctx *ctx = cb_arg;
2620 :
2621 4 : ctx->rc = rc;
2622 :
2623 4 : spdk_thread_send_msg(ctx->orig_thread, _nvme_ctrlr_op_rpc_complete, ctx);
2624 4 : }
2625 :
2626 : void
2627 4 : nvme_ctrlr_op_rpc(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op,
2628 : bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
2629 : {
2630 : struct nvme_ctrlr_op_rpc_ctx *ctx;
2631 : int rc;
2632 :
2633 4 : assert(cb_fn != NULL);
2634 :
2635 4 : ctx = calloc(1, sizeof(*ctx));
2636 4 : if (ctx == NULL) {
2637 0 : SPDK_ERRLOG("Failed to allocate nvme_ctrlr_op_rpc_ctx.\n");
2638 0 : cb_fn(cb_arg, -ENOMEM);
2639 0 : return;
2640 : }
2641 :
2642 4 : ctx->orig_thread = spdk_get_thread();
2643 4 : ctx->cb_fn = cb_fn;
2644 4 : ctx->cb_arg = cb_arg;
2645 :
2646 4 : rc = nvme_ctrlr_op(nvme_ctrlr, op, nvme_ctrlr_op_rpc_complete, ctx);
2647 4 : if (rc == 0) {
2648 1 : return;
2649 3 : } else if (rc == -EALREADY) {
2650 0 : rc = 0;
2651 : }
2652 :
2653 3 : nvme_ctrlr_op_rpc_complete(ctx, rc);
2654 : }
2655 :
2656 : static void nvme_bdev_ctrlr_op_rpc_continue(void *cb_arg, int rc);
2657 :
2658 : static void
2659 2 : _nvme_bdev_ctrlr_op_rpc_continue(void *_ctx)
2660 : {
2661 2 : struct nvme_ctrlr_op_rpc_ctx *ctx = _ctx;
2662 : struct nvme_ctrlr *prev_nvme_ctrlr, *next_nvme_ctrlr;
2663 : int rc;
2664 :
2665 2 : prev_nvme_ctrlr = ctx->nvme_ctrlr;
2666 2 : ctx->nvme_ctrlr = NULL;
2667 :
2668 2 : if (ctx->rc != 0) {
2669 0 : goto complete;
2670 : }
2671 :
2672 2 : next_nvme_ctrlr = TAILQ_NEXT(prev_nvme_ctrlr, tailq);
2673 2 : if (next_nvme_ctrlr == NULL) {
2674 1 : goto complete;
2675 : }
2676 :
2677 1 : rc = nvme_ctrlr_op(next_nvme_ctrlr, ctx->op, nvme_bdev_ctrlr_op_rpc_continue, ctx);
2678 1 : if (rc == 0) {
2679 1 : ctx->nvme_ctrlr = next_nvme_ctrlr;
2680 1 : return;
2681 0 : } else if (rc == -EALREADY) {
2682 0 : ctx->nvme_ctrlr = next_nvme_ctrlr;
2683 0 : rc = 0;
2684 : }
2685 :
2686 0 : ctx->rc = rc;
2687 :
2688 1 : complete:
2689 1 : ctx->cb_fn(ctx->cb_arg, ctx->rc);
2690 1 : free(ctx);
2691 : }
2692 :
2693 : static void
2694 2 : nvme_bdev_ctrlr_op_rpc_continue(void *cb_arg, int rc)
2695 : {
2696 2 : struct nvme_ctrlr_op_rpc_ctx *ctx = cb_arg;
2697 :
2698 2 : ctx->rc = rc;
2699 :
2700 2 : spdk_thread_send_msg(ctx->orig_thread, _nvme_bdev_ctrlr_op_rpc_continue, ctx);
2701 2 : }
2702 :
2703 : void
2704 1 : nvme_bdev_ctrlr_op_rpc(struct nvme_bdev_ctrlr *nbdev_ctrlr, enum nvme_ctrlr_op op,
2705 : bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg)
2706 : {
2707 : struct nvme_ctrlr_op_rpc_ctx *ctx;
2708 : struct nvme_ctrlr *nvme_ctrlr;
2709 : int rc;
2710 :
2711 1 : assert(cb_fn != NULL);
2712 :
2713 1 : ctx = calloc(1, sizeof(*ctx));
2714 1 : if (ctx == NULL) {
2715 0 : SPDK_ERRLOG("Failed to allocate nvme_ctrlr_op_rpc_ctx.\n");
2716 0 : cb_fn(cb_arg, -ENOMEM);
2717 0 : return;
2718 : }
2719 :
2720 1 : ctx->orig_thread = spdk_get_thread();
2721 1 : ctx->op = op;
2722 1 : ctx->cb_fn = cb_fn;
2723 1 : ctx->cb_arg = cb_arg;
2724 :
2725 1 : nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs);
2726 1 : assert(nvme_ctrlr != NULL);
2727 :
2728 1 : rc = nvme_ctrlr_op(nvme_ctrlr, op, nvme_bdev_ctrlr_op_rpc_continue, ctx);
2729 1 : if (rc == 0) {
2730 1 : ctx->nvme_ctrlr = nvme_ctrlr;
2731 1 : return;
2732 0 : } else if (rc == -EALREADY) {
2733 0 : ctx->nvme_ctrlr = nvme_ctrlr;
2734 0 : rc = 0;
2735 : }
2736 :
2737 0 : nvme_bdev_ctrlr_op_rpc_continue(ctx, rc);
2738 : }
2739 :
2740 : static int _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio);
2741 :
2742 : static void
2743 7 : _bdev_nvme_reset_io_complete(struct spdk_io_channel_iter *i, int status)
2744 : {
2745 7 : struct nvme_bdev_io *bio = spdk_io_channel_iter_get_ctx(i);
2746 : enum spdk_bdev_io_status io_status;
2747 :
2748 7 : if (bio->cpl.cdw0 == 0) {
2749 5 : io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
2750 : } else {
2751 2 : io_status = SPDK_BDEV_IO_STATUS_FAILED;
2752 : }
2753 :
2754 7 : __bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), io_status, NULL);
2755 7 : }
2756 :
2757 : static void
2758 14 : bdev_nvme_abort_bdev_channel(struct spdk_io_channel_iter *i)
2759 : {
2760 14 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
2761 14 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
2762 :
2763 14 : bdev_nvme_abort_retry_ios(nbdev_ch);
2764 :
2765 14 : spdk_for_each_channel_continue(i, 0);
2766 14 : }
2767 :
2768 : static void
2769 7 : bdev_nvme_reset_io_complete(struct nvme_bdev_io *bio)
2770 : {
2771 7 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
2772 7 : struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt;
2773 :
2774 : /* Abort all queued I/Os for retry. */
2775 7 : spdk_for_each_channel(nbdev,
2776 : bdev_nvme_abort_bdev_channel,
2777 : bio,
2778 : _bdev_nvme_reset_io_complete);
2779 7 : }
2780 :
2781 : static void
2782 10 : _bdev_nvme_reset_io_continue(void *ctx)
2783 : {
2784 10 : struct nvme_bdev_io *bio = ctx;
2785 : struct nvme_io_path *prev_io_path, *next_io_path;
2786 : int rc;
2787 :
2788 10 : prev_io_path = bio->io_path;
2789 10 : bio->io_path = NULL;
2790 :
2791 10 : if (bio->cpl.cdw0 != 0) {
2792 2 : goto complete;
2793 : }
2794 :
2795 8 : next_io_path = STAILQ_NEXT(prev_io_path, stailq);
2796 8 : if (next_io_path == NULL) {
2797 5 : goto complete;
2798 : }
2799 :
2800 3 : rc = _bdev_nvme_reset_io(next_io_path, bio);
2801 3 : if (rc == 0) {
2802 3 : return;
2803 : }
2804 :
2805 0 : bio->cpl.cdw0 = 1;
2806 :
2807 7 : complete:
2808 7 : bdev_nvme_reset_io_complete(bio);
2809 : }
2810 :
2811 : static void
2812 10 : bdev_nvme_reset_io_continue(void *cb_arg, int rc)
2813 : {
2814 10 : struct nvme_bdev_io *bio = cb_arg;
2815 10 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
2816 :
2817 10 : bio->cpl.cdw0 = (rc == 0) ? 0 : 1;
2818 :
2819 10 : spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), _bdev_nvme_reset_io_continue, bio);
2820 10 : }
2821 :
2822 : static int
2823 10 : _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio)
2824 : {
2825 : struct nvme_ctrlr_channel *ctrlr_ch;
2826 : struct spdk_bdev_io *bdev_io;
2827 : int rc;
2828 :
2829 10 : rc = nvme_ctrlr_op(io_path->qpair->ctrlr, NVME_CTRLR_OP_RESET,
2830 : bdev_nvme_reset_io_continue, bio);
2831 10 : if (rc != 0 && rc != -EBUSY) {
2832 0 : return rc;
2833 : }
2834 :
2835 10 : assert(bio->io_path == NULL);
2836 10 : bio->io_path = io_path;
2837 :
2838 10 : if (rc == -EBUSY) {
2839 4 : ctrlr_ch = io_path->qpair->ctrlr_ch;
2840 4 : assert(ctrlr_ch != NULL);
2841 : /*
2842 : * Reset call is queued only if it is from the app framework. This is on purpose so that
2843 : * we don't interfere with the app framework reset strategy. i.e. we are deferring to the
2844 : * upper level. If they are in the middle of a reset, we won't try to schedule another one.
2845 : */
2846 4 : bdev_io = spdk_bdev_io_from_ctx(bio);
2847 4 : TAILQ_INSERT_TAIL(&ctrlr_ch->pending_resets, bdev_io, module_link);
2848 : }
2849 :
2850 10 : return 0;
2851 : }
2852 :
2853 : static void
2854 7 : bdev_nvme_reset_io(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio)
2855 : {
2856 : struct nvme_io_path *io_path;
2857 : int rc;
2858 :
2859 7 : bio->cpl.cdw0 = 0;
2860 :
2861 : /* Reset all nvme_ctrlrs of a bdev controller sequentially. */
2862 7 : io_path = STAILQ_FIRST(&nbdev_ch->io_path_list);
2863 7 : assert(io_path != NULL);
2864 :
2865 7 : rc = _bdev_nvme_reset_io(io_path, bio);
2866 7 : if (rc != 0) {
2867 : /* If the current nvme_ctrlr is disabled, skip it and move to the next nvme_ctrlr. */
2868 0 : rc = (rc == -EALREADY) ? 0 : rc;
2869 :
2870 0 : bdev_nvme_reset_io_continue(bio, rc);
2871 : }
2872 7 : }
2873 :
2874 : static int
2875 18 : bdev_nvme_failover_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, bool remove)
2876 : {
2877 18 : if (nvme_ctrlr->destruct) {
2878 : /* Don't bother resetting if the controller is in the process of being destructed. */
2879 2 : return -ENXIO;
2880 : }
2881 :
2882 16 : if (nvme_ctrlr->resetting) {
2883 3 : if (!nvme_ctrlr->in_failover) {
2884 3 : SPDK_NOTICELOG("Reset is already in progress. Defer failover until reset completes.\n");
2885 :
2886 : /* Defer failover until reset completes. */
2887 3 : nvme_ctrlr->pending_failover = true;
2888 3 : return -EINPROGRESS;
2889 : } else {
2890 0 : SPDK_NOTICELOG("Unable to perform failover, already in progress.\n");
2891 0 : return -EBUSY;
2892 : }
2893 : }
2894 :
2895 13 : bdev_nvme_failover_trid(nvme_ctrlr, remove, true);
2896 :
2897 13 : if (nvme_ctrlr->reconnect_is_delayed) {
2898 1 : SPDK_NOTICELOG("Reconnect is already scheduled.\n");
2899 :
2900 : /* We rely on the next reconnect for the failover. */
2901 1 : return -EALREADY;
2902 : }
2903 :
2904 12 : if (nvme_ctrlr->disabled) {
2905 0 : SPDK_NOTICELOG("Controller is disabled.\n");
2906 :
2907 : /* We rely on the enablement for the failover. */
2908 0 : return -EALREADY;
2909 : }
2910 :
2911 12 : nvme_ctrlr->resetting = true;
2912 12 : nvme_ctrlr->in_failover = true;
2913 :
2914 12 : assert(nvme_ctrlr->reset_start_tsc == 0);
2915 12 : nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
2916 :
2917 12 : return 0;
2918 : }
2919 :
2920 : static int
2921 16 : bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
2922 : {
2923 : int rc;
2924 :
2925 16 : pthread_mutex_lock(&nvme_ctrlr->mutex);
2926 16 : rc = bdev_nvme_failover_ctrlr_unsafe(nvme_ctrlr, false);
2927 16 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
2928 :
2929 16 : if (rc == 0) {
2930 11 : spdk_thread_send_msg(nvme_ctrlr->thread, _bdev_nvme_reset_ctrlr, nvme_ctrlr);
2931 5 : } else if (rc == -EALREADY) {
2932 0 : rc = 0;
2933 : }
2934 :
2935 16 : return rc;
2936 : }
2937 :
2938 : static int bdev_nvme_unmap(struct nvme_bdev_io *bio, uint64_t offset_blocks,
2939 : uint64_t num_blocks);
2940 :
2941 : static int bdev_nvme_write_zeroes(struct nvme_bdev_io *bio, uint64_t offset_blocks,
2942 : uint64_t num_blocks);
2943 :
2944 : static int bdev_nvme_copy(struct nvme_bdev_io *bio, uint64_t dst_offset_blocks,
2945 : uint64_t src_offset_blocks,
2946 : uint64_t num_blocks);
2947 :
2948 : static void
2949 1 : bdev_nvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
2950 : bool success)
2951 : {
2952 1 : struct nvme_bdev_io *bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
2953 : int ret;
2954 :
2955 1 : if (!success) {
2956 0 : ret = -EINVAL;
2957 0 : goto exit;
2958 : }
2959 :
2960 1 : if (spdk_unlikely(!nvme_io_path_is_available(bio->io_path))) {
2961 0 : ret = -ENXIO;
2962 0 : goto exit;
2963 : }
2964 :
2965 1 : ret = bdev_nvme_readv(bio,
2966 : bdev_io->u.bdev.iovs,
2967 : bdev_io->u.bdev.iovcnt,
2968 : bdev_io->u.bdev.md_buf,
2969 : bdev_io->u.bdev.num_blocks,
2970 : bdev_io->u.bdev.offset_blocks,
2971 : bdev_io->u.bdev.dif_check_flags,
2972 : bdev_io->u.bdev.memory_domain,
2973 : bdev_io->u.bdev.memory_domain_ctx,
2974 : bdev_io->u.bdev.accel_sequence);
2975 :
2976 1 : exit:
2977 1 : if (spdk_unlikely(ret != 0)) {
2978 0 : bdev_nvme_io_complete(bio, ret);
2979 : }
2980 1 : }
2981 :
2982 : static inline void
2983 51 : _bdev_nvme_submit_request(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io)
2984 : {
2985 51 : struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
2986 51 : struct spdk_bdev *bdev = bdev_io->bdev;
2987 : struct nvme_bdev_io *nbdev_io_to_abort;
2988 51 : int rc = 0;
2989 :
2990 51 : switch (bdev_io->type) {
2991 3 : case SPDK_BDEV_IO_TYPE_READ:
2992 3 : if (bdev_io->u.bdev.iovs && bdev_io->u.bdev.iovs[0].iov_base) {
2993 :
2994 2 : rc = bdev_nvme_readv(nbdev_io,
2995 : bdev_io->u.bdev.iovs,
2996 : bdev_io->u.bdev.iovcnt,
2997 : bdev_io->u.bdev.md_buf,
2998 : bdev_io->u.bdev.num_blocks,
2999 : bdev_io->u.bdev.offset_blocks,
3000 : bdev_io->u.bdev.dif_check_flags,
3001 : bdev_io->u.bdev.memory_domain,
3002 : bdev_io->u.bdev.memory_domain_ctx,
3003 : bdev_io->u.bdev.accel_sequence);
3004 : } else {
3005 1 : spdk_bdev_io_get_buf(bdev_io, bdev_nvme_get_buf_cb,
3006 1 : bdev_io->u.bdev.num_blocks * bdev->blocklen);
3007 1 : rc = 0;
3008 : }
3009 3 : break;
3010 25 : case SPDK_BDEV_IO_TYPE_WRITE:
3011 25 : rc = bdev_nvme_writev(nbdev_io,
3012 : bdev_io->u.bdev.iovs,
3013 : bdev_io->u.bdev.iovcnt,
3014 : bdev_io->u.bdev.md_buf,
3015 : bdev_io->u.bdev.num_blocks,
3016 : bdev_io->u.bdev.offset_blocks,
3017 : bdev_io->u.bdev.dif_check_flags,
3018 : bdev_io->u.bdev.memory_domain,
3019 : bdev_io->u.bdev.memory_domain_ctx,
3020 : bdev_io->u.bdev.accel_sequence,
3021 : bdev_io->u.bdev.nvme_cdw12,
3022 : bdev_io->u.bdev.nvme_cdw13);
3023 25 : break;
3024 1 : case SPDK_BDEV_IO_TYPE_COMPARE:
3025 1 : rc = bdev_nvme_comparev(nbdev_io,
3026 : bdev_io->u.bdev.iovs,
3027 : bdev_io->u.bdev.iovcnt,
3028 : bdev_io->u.bdev.md_buf,
3029 : bdev_io->u.bdev.num_blocks,
3030 : bdev_io->u.bdev.offset_blocks,
3031 : bdev_io->u.bdev.dif_check_flags);
3032 1 : break;
3033 2 : case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
3034 2 : rc = bdev_nvme_comparev_and_writev(nbdev_io,
3035 : bdev_io->u.bdev.iovs,
3036 : bdev_io->u.bdev.iovcnt,
3037 : bdev_io->u.bdev.fused_iovs,
3038 : bdev_io->u.bdev.fused_iovcnt,
3039 : bdev_io->u.bdev.md_buf,
3040 : bdev_io->u.bdev.num_blocks,
3041 : bdev_io->u.bdev.offset_blocks,
3042 : bdev_io->u.bdev.dif_check_flags);
3043 2 : break;
3044 1 : case SPDK_BDEV_IO_TYPE_UNMAP:
3045 1 : rc = bdev_nvme_unmap(nbdev_io,
3046 : bdev_io->u.bdev.offset_blocks,
3047 : bdev_io->u.bdev.num_blocks);
3048 1 : break;
3049 0 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3050 0 : rc = bdev_nvme_write_zeroes(nbdev_io,
3051 : bdev_io->u.bdev.offset_blocks,
3052 : bdev_io->u.bdev.num_blocks);
3053 0 : break;
3054 7 : case SPDK_BDEV_IO_TYPE_RESET:
3055 7 : nbdev_io->io_path = NULL;
3056 7 : bdev_nvme_reset_io(nbdev_ch, nbdev_io);
3057 7 : return;
3058 :
3059 1 : case SPDK_BDEV_IO_TYPE_FLUSH:
3060 1 : bdev_nvme_io_complete(nbdev_io, 0);
3061 1 : return;
3062 :
3063 0 : case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
3064 0 : rc = bdev_nvme_zone_appendv(nbdev_io,
3065 : bdev_io->u.bdev.iovs,
3066 : bdev_io->u.bdev.iovcnt,
3067 : bdev_io->u.bdev.md_buf,
3068 : bdev_io->u.bdev.num_blocks,
3069 : bdev_io->u.bdev.offset_blocks,
3070 : bdev_io->u.bdev.dif_check_flags);
3071 0 : break;
3072 0 : case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
3073 0 : rc = bdev_nvme_get_zone_info(nbdev_io,
3074 : bdev_io->u.zone_mgmt.zone_id,
3075 : bdev_io->u.zone_mgmt.num_zones,
3076 0 : bdev_io->u.zone_mgmt.buf);
3077 0 : break;
3078 0 : case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
3079 0 : rc = bdev_nvme_zone_management(nbdev_io,
3080 : bdev_io->u.zone_mgmt.zone_id,
3081 : bdev_io->u.zone_mgmt.zone_action);
3082 0 : break;
3083 5 : case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
3084 5 : nbdev_io->io_path = NULL;
3085 5 : bdev_nvme_admin_passthru(nbdev_ch,
3086 : nbdev_io,
3087 : &bdev_io->u.nvme_passthru.cmd,
3088 : bdev_io->u.nvme_passthru.buf,
3089 : bdev_io->u.nvme_passthru.nbytes);
3090 5 : return;
3091 :
3092 0 : case SPDK_BDEV_IO_TYPE_NVME_IO:
3093 0 : rc = bdev_nvme_io_passthru(nbdev_io,
3094 : &bdev_io->u.nvme_passthru.cmd,
3095 : bdev_io->u.nvme_passthru.buf,
3096 : bdev_io->u.nvme_passthru.nbytes);
3097 0 : break;
3098 0 : case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
3099 0 : rc = bdev_nvme_io_passthru_md(nbdev_io,
3100 : &bdev_io->u.nvme_passthru.cmd,
3101 : bdev_io->u.nvme_passthru.buf,
3102 : bdev_io->u.nvme_passthru.nbytes,
3103 : bdev_io->u.nvme_passthru.md_buf,
3104 : bdev_io->u.nvme_passthru.md_len);
3105 0 : break;
3106 0 : case SPDK_BDEV_IO_TYPE_NVME_IOV_MD:
3107 0 : rc = bdev_nvme_iov_passthru_md(nbdev_io,
3108 : &bdev_io->u.nvme_passthru.cmd,
3109 : bdev_io->u.nvme_passthru.iovs,
3110 : bdev_io->u.nvme_passthru.iovcnt,
3111 : bdev_io->u.nvme_passthru.nbytes,
3112 : bdev_io->u.nvme_passthru.md_buf,
3113 : bdev_io->u.nvme_passthru.md_len);
3114 0 : break;
3115 6 : case SPDK_BDEV_IO_TYPE_ABORT:
3116 6 : nbdev_io->io_path = NULL;
3117 6 : nbdev_io_to_abort = (struct nvme_bdev_io *)bdev_io->u.abort.bio_to_abort->driver_ctx;
3118 6 : bdev_nvme_abort(nbdev_ch,
3119 : nbdev_io,
3120 : nbdev_io_to_abort);
3121 6 : return;
3122 :
3123 0 : case SPDK_BDEV_IO_TYPE_COPY:
3124 0 : rc = bdev_nvme_copy(nbdev_io,
3125 : bdev_io->u.bdev.offset_blocks,
3126 : bdev_io->u.bdev.copy.src_offset_blocks,
3127 : bdev_io->u.bdev.num_blocks);
3128 0 : break;
3129 0 : default:
3130 0 : rc = -EINVAL;
3131 0 : break;
3132 : }
3133 :
3134 32 : if (spdk_unlikely(rc != 0)) {
3135 0 : bdev_nvme_io_complete(nbdev_io, rc);
3136 : }
3137 : }
3138 :
3139 : static void
3140 58 : bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
3141 : {
3142 58 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
3143 58 : struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
3144 :
3145 58 : if (spdk_likely(nbdev_io->submit_tsc == 0)) {
3146 58 : nbdev_io->submit_tsc = spdk_bdev_io_get_submit_tsc(bdev_io);
3147 : } else {
3148 : /* There are cases where submit_tsc != 0, i.e. retry I/O.
3149 : * We need to update submit_tsc here.
3150 : */
3151 0 : nbdev_io->submit_tsc = spdk_get_ticks();
3152 : }
3153 :
3154 58 : spdk_trace_record(TRACE_BDEV_NVME_IO_START, 0, 0, (uintptr_t)nbdev_io, (uintptr_t)bdev_io);
3155 58 : nbdev_io->io_path = bdev_nvme_find_io_path(nbdev_ch);
3156 58 : if (spdk_unlikely(!nbdev_io->io_path)) {
3157 11 : if (!bdev_nvme_io_type_is_admin(bdev_io->type)) {
3158 10 : bdev_nvme_io_complete(nbdev_io, -ENXIO);
3159 10 : return;
3160 : }
3161 :
3162 : /* Admin commands do not use the optimal I/O path.
3163 : * Simply fall through even if it is not found.
3164 : */
3165 : }
3166 :
3167 48 : _bdev_nvme_submit_request(nbdev_ch, bdev_io);
3168 : }
3169 :
3170 : static bool
3171 0 : bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
3172 : {
3173 0 : struct nvme_bdev *nbdev = ctx;
3174 : struct nvme_ns *nvme_ns;
3175 : struct spdk_nvme_ns *ns;
3176 : struct spdk_nvme_ctrlr *ctrlr;
3177 : const struct spdk_nvme_ctrlr_data *cdata;
3178 :
3179 0 : nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
3180 0 : assert(nvme_ns != NULL);
3181 0 : ns = nvme_ns->ns;
3182 0 : if (ns == NULL) {
3183 0 : return false;
3184 : }
3185 :
3186 0 : ctrlr = spdk_nvme_ns_get_ctrlr(ns);
3187 :
3188 0 : switch (io_type) {
3189 0 : case SPDK_BDEV_IO_TYPE_READ:
3190 : case SPDK_BDEV_IO_TYPE_WRITE:
3191 : case SPDK_BDEV_IO_TYPE_RESET:
3192 : case SPDK_BDEV_IO_TYPE_FLUSH:
3193 : case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
3194 : case SPDK_BDEV_IO_TYPE_NVME_IO:
3195 : case SPDK_BDEV_IO_TYPE_ABORT:
3196 0 : return true;
3197 :
3198 0 : case SPDK_BDEV_IO_TYPE_COMPARE:
3199 0 : return spdk_nvme_ns_supports_compare(ns);
3200 :
3201 0 : case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
3202 0 : return spdk_nvme_ns_get_md_size(ns) ? true : false;
3203 :
3204 0 : case SPDK_BDEV_IO_TYPE_UNMAP:
3205 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
3206 0 : return cdata->oncs.dsm;
3207 :
3208 0 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3209 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
3210 0 : return cdata->oncs.write_zeroes;
3211 :
3212 0 : case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
3213 0 : if (spdk_nvme_ctrlr_get_flags(ctrlr) &
3214 : SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED) {
3215 0 : return true;
3216 : }
3217 0 : return false;
3218 :
3219 0 : case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
3220 : case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
3221 0 : return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS;
3222 :
3223 0 : case SPDK_BDEV_IO_TYPE_ZONE_APPEND:
3224 0 : return spdk_nvme_ns_get_csi(ns) == SPDK_NVME_CSI_ZNS &&
3225 0 : spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED;
3226 :
3227 0 : case SPDK_BDEV_IO_TYPE_COPY:
3228 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
3229 0 : return cdata->oncs.copy;
3230 :
3231 0 : default:
3232 0 : return false;
3233 : }
3234 : }
3235 :
3236 : static int
3237 57 : nvme_qpair_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ctrlr_channel *ctrlr_ch)
3238 : {
3239 : struct nvme_qpair *nvme_qpair;
3240 : struct spdk_io_channel *pg_ch;
3241 : int rc;
3242 :
3243 57 : nvme_qpair = calloc(1, sizeof(*nvme_qpair));
3244 57 : if (!nvme_qpair) {
3245 0 : SPDK_ERRLOG("Failed to alloc nvme_qpair.\n");
3246 0 : return -1;
3247 : }
3248 :
3249 57 : TAILQ_INIT(&nvme_qpair->io_path_list);
3250 :
3251 57 : nvme_qpair->ctrlr = nvme_ctrlr;
3252 57 : nvme_qpair->ctrlr_ch = ctrlr_ch;
3253 :
3254 57 : pg_ch = spdk_get_io_channel(&g_nvme_bdev_ctrlrs);
3255 57 : if (!pg_ch) {
3256 0 : free(nvme_qpair);
3257 0 : return -1;
3258 : }
3259 :
3260 57 : nvme_qpair->group = spdk_io_channel_get_ctx(pg_ch);
3261 :
3262 : #ifdef SPDK_CONFIG_VTUNE
3263 : nvme_qpair->group->collect_spin_stat = true;
3264 : #else
3265 57 : nvme_qpair->group->collect_spin_stat = false;
3266 : #endif
3267 :
3268 57 : if (!nvme_ctrlr->disabled) {
3269 : /* If a nvme_ctrlr is disabled, don't try to create qpair for it. Qpair will
3270 : * be created when it's enabled.
3271 : */
3272 57 : rc = bdev_nvme_create_qpair(nvme_qpair);
3273 57 : if (rc != 0) {
3274 : /* nvme_ctrlr can't create IO qpair if connection is down.
3275 : * If reconnect_delay_sec is non-zero, creating IO qpair is retried
3276 : * after reconnect_delay_sec seconds. If bdev_retry_count is non-zero,
3277 : * submitted IO will be queued until IO qpair is successfully created.
3278 : *
3279 : * Hence, if both are satisfied, ignore the failure.
3280 : */
3281 0 : if (nvme_ctrlr->opts.reconnect_delay_sec == 0 || g_opts.bdev_retry_count == 0) {
3282 0 : spdk_put_io_channel(pg_ch);
3283 0 : free(nvme_qpair);
3284 0 : return rc;
3285 : }
3286 : }
3287 : }
3288 :
3289 57 : TAILQ_INSERT_TAIL(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);
3290 :
3291 57 : ctrlr_ch->qpair = nvme_qpair;
3292 :
3293 57 : pthread_mutex_lock(&nvme_qpair->ctrlr->mutex);
3294 57 : nvme_qpair->ctrlr->ref++;
3295 57 : pthread_mutex_unlock(&nvme_qpair->ctrlr->mutex);
3296 :
3297 57 : return 0;
3298 : }
3299 :
3300 : static int
3301 57 : bdev_nvme_create_ctrlr_channel_cb(void *io_device, void *ctx_buf)
3302 : {
3303 57 : struct nvme_ctrlr *nvme_ctrlr = io_device;
3304 57 : struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf;
3305 :
3306 57 : TAILQ_INIT(&ctrlr_ch->pending_resets);
3307 :
3308 57 : return nvme_qpair_create(nvme_ctrlr, ctrlr_ch);
3309 : }
3310 :
3311 : static void
3312 57 : nvme_qpair_delete(struct nvme_qpair *nvme_qpair)
3313 : {
3314 : struct nvme_io_path *io_path, *next;
3315 :
3316 57 : assert(nvme_qpair->group != NULL);
3317 :
3318 92 : TAILQ_FOREACH_SAFE(io_path, &nvme_qpair->io_path_list, tailq, next) {
3319 35 : TAILQ_REMOVE(&nvme_qpair->io_path_list, io_path, tailq);
3320 35 : nvme_io_path_free(io_path);
3321 : }
3322 :
3323 57 : TAILQ_REMOVE(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);
3324 :
3325 57 : spdk_put_io_channel(spdk_io_channel_from_ctx(nvme_qpair->group));
3326 :
3327 57 : nvme_ctrlr_release(nvme_qpair->ctrlr);
3328 :
3329 57 : free(nvme_qpair);
3330 57 : }
3331 :
3332 : static void
3333 57 : bdev_nvme_destroy_ctrlr_channel_cb(void *io_device, void *ctx_buf)
3334 : {
3335 57 : struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf;
3336 : struct nvme_qpair *nvme_qpair;
3337 :
3338 57 : nvme_qpair = ctrlr_ch->qpair;
3339 57 : assert(nvme_qpair != NULL);
3340 :
3341 57 : _bdev_nvme_clear_io_path_cache(nvme_qpair);
3342 :
3343 57 : if (nvme_qpair->qpair != NULL) {
3344 43 : if (ctrlr_ch->reset_iter == NULL) {
3345 43 : spdk_nvme_ctrlr_disconnect_io_qpair(nvme_qpair->qpair);
3346 : } else {
3347 : /* Skip current ctrlr_channel in a full reset sequence because
3348 : * it is being deleted now. The qpair is already being disconnected.
3349 : * We do not have to restart disconnecting it.
3350 : */
3351 0 : spdk_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
3352 : }
3353 :
3354 : /* We cannot release a reference to the poll group now.
3355 : * The qpair may be disconnected asynchronously later.
3356 : * We need to poll it until it is actually disconnected.
3357 : * Just detach the qpair from the deleting ctrlr_channel.
3358 : */
3359 43 : nvme_qpair->ctrlr_ch = NULL;
3360 : } else {
3361 14 : assert(ctrlr_ch->reset_iter == NULL);
3362 :
3363 14 : nvme_qpair_delete(nvme_qpair);
3364 : }
3365 57 : }
3366 :
3367 : static inline struct spdk_io_channel *
3368 0 : bdev_nvme_get_accel_channel(struct nvme_poll_group *group)
3369 : {
3370 0 : if (spdk_unlikely(!group->accel_channel)) {
3371 0 : group->accel_channel = spdk_accel_get_io_channel();
3372 0 : if (!group->accel_channel) {
3373 0 : SPDK_ERRLOG("Cannot get the accel_channel for bdev nvme polling group=%p\n",
3374 : group);
3375 0 : return NULL;
3376 : }
3377 : }
3378 :
3379 0 : return group->accel_channel;
3380 : }
3381 :
3382 : static void
3383 0 : bdev_nvme_submit_accel_crc32c(void *ctx, uint32_t *dst, struct iovec *iov,
3384 : uint32_t iov_cnt, uint32_t seed,
3385 : spdk_nvme_accel_completion_cb cb_fn, void *cb_arg)
3386 : {
3387 : struct spdk_io_channel *accel_ch;
3388 0 : struct nvme_poll_group *group = ctx;
3389 : int rc;
3390 :
3391 0 : assert(cb_fn != NULL);
3392 :
3393 0 : accel_ch = bdev_nvme_get_accel_channel(group);
3394 0 : if (spdk_unlikely(accel_ch == NULL)) {
3395 0 : cb_fn(cb_arg, -ENOMEM);
3396 0 : return;
3397 : }
3398 :
3399 0 : rc = spdk_accel_submit_crc32cv(accel_ch, dst, iov, iov_cnt, seed, cb_fn, cb_arg);
3400 0 : if (rc) {
3401 : /* For the two cases, spdk_accel_submit_crc32cv does not call the user's cb_fn */
3402 0 : if (rc == -ENOMEM || rc == -EINVAL) {
3403 0 : cb_fn(cb_arg, rc);
3404 : }
3405 0 : SPDK_ERRLOG("Cannot complete the accelerated crc32c operation with iov=%p\n", iov);
3406 : }
3407 : }
3408 :
3409 : static void
3410 0 : bdev_nvme_finish_sequence(void *seq, spdk_nvme_accel_completion_cb cb_fn, void *cb_arg)
3411 : {
3412 0 : spdk_accel_sequence_finish(seq, cb_fn, cb_arg);
3413 0 : }
3414 :
3415 : static void
3416 0 : bdev_nvme_abort_sequence(void *seq)
3417 : {
3418 0 : spdk_accel_sequence_abort(seq);
3419 0 : }
3420 :
3421 : static void
3422 0 : bdev_nvme_reverse_sequence(void *seq)
3423 : {
3424 0 : spdk_accel_sequence_reverse(seq);
3425 0 : }
3426 :
3427 : static int
3428 0 : bdev_nvme_append_crc32c(void *ctx, void **seq, uint32_t *dst, struct iovec *iovs, uint32_t iovcnt,
3429 : struct spdk_memory_domain *domain, void *domain_ctx, uint32_t seed,
3430 : spdk_nvme_accel_step_cb cb_fn, void *cb_arg)
3431 : {
3432 : struct spdk_io_channel *ch;
3433 0 : struct nvme_poll_group *group = ctx;
3434 :
3435 0 : ch = bdev_nvme_get_accel_channel(group);
3436 0 : if (spdk_unlikely(ch == NULL)) {
3437 0 : return -ENOMEM;
3438 : }
3439 :
3440 0 : return spdk_accel_append_crc32c((struct spdk_accel_sequence **)seq, ch, dst, iovs, iovcnt,
3441 : domain, domain_ctx, seed, cb_fn, cb_arg);
3442 : }
3443 :
3444 : static struct spdk_nvme_accel_fn_table g_bdev_nvme_accel_fn_table = {
3445 : .table_size = sizeof(struct spdk_nvme_accel_fn_table),
3446 : .submit_accel_crc32c = bdev_nvme_submit_accel_crc32c,
3447 : .append_crc32c = bdev_nvme_append_crc32c,
3448 : .finish_sequence = bdev_nvme_finish_sequence,
3449 : .reverse_sequence = bdev_nvme_reverse_sequence,
3450 : .abort_sequence = bdev_nvme_abort_sequence,
3451 : };
3452 :
3453 : static int
3454 42 : bdev_nvme_create_poll_group_cb(void *io_device, void *ctx_buf)
3455 : {
3456 42 : struct nvme_poll_group *group = ctx_buf;
3457 :
3458 42 : TAILQ_INIT(&group->qpair_list);
3459 :
3460 42 : group->group = spdk_nvme_poll_group_create(group, &g_bdev_nvme_accel_fn_table);
3461 42 : if (group->group == NULL) {
3462 0 : return -1;
3463 : }
3464 :
3465 42 : group->poller = SPDK_POLLER_REGISTER(bdev_nvme_poll, group, g_opts.nvme_ioq_poll_period_us);
3466 :
3467 42 : if (group->poller == NULL) {
3468 0 : spdk_nvme_poll_group_destroy(group->group);
3469 0 : return -1;
3470 : }
3471 :
3472 42 : return 0;
3473 : }
3474 :
3475 : static void
3476 42 : bdev_nvme_destroy_poll_group_cb(void *io_device, void *ctx_buf)
3477 : {
3478 42 : struct nvme_poll_group *group = ctx_buf;
3479 :
3480 42 : assert(TAILQ_EMPTY(&group->qpair_list));
3481 :
3482 42 : if (group->accel_channel) {
3483 0 : spdk_put_io_channel(group->accel_channel);
3484 : }
3485 :
3486 42 : spdk_poller_unregister(&group->poller);
3487 42 : if (spdk_nvme_poll_group_destroy(group->group)) {
3488 0 : SPDK_ERRLOG("Unable to destroy a poll group for the NVMe bdev module.\n");
3489 0 : assert(false);
3490 : }
3491 42 : }
3492 :
3493 : static struct spdk_io_channel *
3494 0 : bdev_nvme_get_io_channel(void *ctx)
3495 : {
3496 0 : struct nvme_bdev *nvme_bdev = ctx;
3497 :
3498 0 : return spdk_get_io_channel(nvme_bdev);
3499 : }
3500 :
3501 : static void *
3502 0 : bdev_nvme_get_module_ctx(void *ctx)
3503 : {
3504 0 : struct nvme_bdev *nvme_bdev = ctx;
3505 : struct nvme_ns *nvme_ns;
3506 :
3507 0 : if (!nvme_bdev || nvme_bdev->disk.module != &nvme_if) {
3508 0 : return NULL;
3509 : }
3510 :
3511 0 : nvme_ns = TAILQ_FIRST(&nvme_bdev->nvme_ns_list);
3512 0 : if (!nvme_ns) {
3513 0 : return NULL;
3514 : }
3515 :
3516 0 : return nvme_ns->ns;
3517 : }
3518 :
3519 : static const char *
3520 0 : _nvme_ana_state_str(enum spdk_nvme_ana_state ana_state)
3521 : {
3522 0 : switch (ana_state) {
3523 0 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
3524 0 : return "optimized";
3525 0 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
3526 0 : return "non_optimized";
3527 0 : case SPDK_NVME_ANA_INACCESSIBLE_STATE:
3528 0 : return "inaccessible";
3529 0 : case SPDK_NVME_ANA_PERSISTENT_LOSS_STATE:
3530 0 : return "persistent_loss";
3531 0 : case SPDK_NVME_ANA_CHANGE_STATE:
3532 0 : return "change";
3533 0 : default:
3534 0 : return NULL;
3535 : }
3536 : }
3537 :
3538 : static int
3539 8 : bdev_nvme_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size)
3540 : {
3541 8 : struct spdk_memory_domain **_domains = NULL;
3542 8 : struct nvme_bdev *nbdev = ctx;
3543 : struct nvme_ns *nvme_ns;
3544 8 : int i = 0, _array_size = array_size;
3545 8 : int rc = 0;
3546 :
3547 22 : TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
3548 14 : if (domains && array_size >= i) {
3549 11 : _domains = &domains[i];
3550 : } else {
3551 3 : _domains = NULL;
3552 : }
3553 14 : rc = spdk_nvme_ctrlr_get_memory_domains(nvme_ns->ctrlr->ctrlr, _domains, _array_size);
3554 14 : if (rc > 0) {
3555 13 : i += rc;
3556 13 : if (_array_size >= rc) {
3557 9 : _array_size -= rc;
3558 : } else {
3559 4 : _array_size = 0;
3560 : }
3561 1 : } else if (rc < 0) {
3562 0 : return rc;
3563 : }
3564 : }
3565 :
3566 8 : return i;
3567 : }
3568 :
3569 : static const char *
3570 0 : nvme_ctrlr_get_state_str(struct nvme_ctrlr *nvme_ctrlr)
3571 : {
3572 0 : if (nvme_ctrlr->destruct) {
3573 0 : return "deleting";
3574 0 : } else if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
3575 0 : return "failed";
3576 0 : } else if (nvme_ctrlr->resetting) {
3577 0 : return "resetting";
3578 0 : } else if (nvme_ctrlr->reconnect_is_delayed > 0) {
3579 0 : return "reconnect_is_delayed";
3580 0 : } else if (nvme_ctrlr->disabled) {
3581 0 : return "disabled";
3582 : } else {
3583 0 : return "enabled";
3584 : }
3585 : }
3586 :
3587 : void
3588 0 : nvme_ctrlr_info_json(struct spdk_json_write_ctx *w, struct nvme_ctrlr *nvme_ctrlr)
3589 0 : {
3590 : struct spdk_nvme_transport_id *trid;
3591 : const struct spdk_nvme_ctrlr_opts *opts;
3592 : const struct spdk_nvme_ctrlr_data *cdata;
3593 : struct nvme_path_id *path_id;
3594 :
3595 0 : spdk_json_write_object_begin(w);
3596 :
3597 0 : spdk_json_write_named_string(w, "state", nvme_ctrlr_get_state_str(nvme_ctrlr));
3598 :
3599 : #ifdef SPDK_CONFIG_NVME_CUSE
3600 0 : size_t cuse_name_size = 128;
3601 0 : char cuse_name[cuse_name_size];
3602 :
3603 0 : int rc = spdk_nvme_cuse_get_ctrlr_name(nvme_ctrlr->ctrlr, cuse_name, &cuse_name_size);
3604 0 : if (rc == 0) {
3605 0 : spdk_json_write_named_string(w, "cuse_device", cuse_name);
3606 : }
3607 : #endif
3608 0 : trid = &nvme_ctrlr->active_path_id->trid;
3609 0 : spdk_json_write_named_object_begin(w, "trid");
3610 0 : nvme_bdev_dump_trid_json(trid, w);
3611 0 : spdk_json_write_object_end(w);
3612 :
3613 0 : path_id = TAILQ_NEXT(nvme_ctrlr->active_path_id, link);
3614 0 : if (path_id != NULL) {
3615 0 : spdk_json_write_named_array_begin(w, "alternate_trids");
3616 : do {
3617 0 : trid = &path_id->trid;
3618 0 : spdk_json_write_object_begin(w);
3619 0 : nvme_bdev_dump_trid_json(trid, w);
3620 0 : spdk_json_write_object_end(w);
3621 :
3622 0 : path_id = TAILQ_NEXT(path_id, link);
3623 0 : } while (path_id != NULL);
3624 0 : spdk_json_write_array_end(w);
3625 : }
3626 :
3627 0 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
3628 0 : spdk_json_write_named_uint16(w, "cntlid", cdata->cntlid);
3629 :
3630 0 : opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
3631 0 : spdk_json_write_named_object_begin(w, "host");
3632 0 : spdk_json_write_named_string(w, "nqn", opts->hostnqn);
3633 0 : spdk_json_write_named_string(w, "addr", opts->src_addr);
3634 0 : spdk_json_write_named_string(w, "svcid", opts->src_svcid);
3635 0 : spdk_json_write_object_end(w);
3636 :
3637 0 : spdk_json_write_object_end(w);
3638 0 : }
3639 :
3640 : static void
3641 0 : nvme_namespace_info_json(struct spdk_json_write_ctx *w,
3642 : struct nvme_ns *nvme_ns)
3643 0 : {
3644 : struct spdk_nvme_ns *ns;
3645 : struct spdk_nvme_ctrlr *ctrlr;
3646 : const struct spdk_nvme_ctrlr_data *cdata;
3647 : const struct spdk_nvme_transport_id *trid;
3648 : union spdk_nvme_vs_register vs;
3649 : const struct spdk_nvme_ns_data *nsdata;
3650 0 : char buf[128];
3651 :
3652 0 : ns = nvme_ns->ns;
3653 0 : if (ns == NULL) {
3654 0 : return;
3655 : }
3656 :
3657 0 : ctrlr = spdk_nvme_ns_get_ctrlr(ns);
3658 :
3659 0 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
3660 0 : trid = spdk_nvme_ctrlr_get_transport_id(ctrlr);
3661 0 : vs = spdk_nvme_ctrlr_get_regs_vs(ctrlr);
3662 :
3663 0 : spdk_json_write_object_begin(w);
3664 :
3665 0 : if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
3666 0 : spdk_json_write_named_string(w, "pci_address", trid->traddr);
3667 : }
3668 :
3669 0 : spdk_json_write_named_object_begin(w, "trid");
3670 :
3671 0 : nvme_bdev_dump_trid_json(trid, w);
3672 :
3673 0 : spdk_json_write_object_end(w);
3674 :
3675 : #ifdef SPDK_CONFIG_NVME_CUSE
3676 0 : size_t cuse_name_size = 128;
3677 0 : char cuse_name[cuse_name_size];
3678 :
3679 0 : int rc = spdk_nvme_cuse_get_ns_name(ctrlr, spdk_nvme_ns_get_id(ns),
3680 : cuse_name, &cuse_name_size);
3681 0 : if (rc == 0) {
3682 0 : spdk_json_write_named_string(w, "cuse_device", cuse_name);
3683 : }
3684 : #endif
3685 :
3686 0 : spdk_json_write_named_object_begin(w, "ctrlr_data");
3687 :
3688 0 : spdk_json_write_named_uint16(w, "cntlid", cdata->cntlid);
3689 :
3690 0 : spdk_json_write_named_string_fmt(w, "vendor_id", "0x%04x", cdata->vid);
3691 :
3692 0 : snprintf(buf, sizeof(cdata->mn) + 1, "%s", cdata->mn);
3693 0 : spdk_str_trim(buf);
3694 0 : spdk_json_write_named_string(w, "model_number", buf);
3695 :
3696 0 : snprintf(buf, sizeof(cdata->sn) + 1, "%s", cdata->sn);
3697 0 : spdk_str_trim(buf);
3698 0 : spdk_json_write_named_string(w, "serial_number", buf);
3699 :
3700 0 : snprintf(buf, sizeof(cdata->fr) + 1, "%s", cdata->fr);
3701 0 : spdk_str_trim(buf);
3702 0 : spdk_json_write_named_string(w, "firmware_revision", buf);
3703 :
3704 0 : if (cdata->subnqn[0] != '\0') {
3705 0 : spdk_json_write_named_string(w, "subnqn", cdata->subnqn);
3706 : }
3707 :
3708 0 : spdk_json_write_named_object_begin(w, "oacs");
3709 :
3710 0 : spdk_json_write_named_uint32(w, "security", cdata->oacs.security);
3711 0 : spdk_json_write_named_uint32(w, "format", cdata->oacs.format);
3712 0 : spdk_json_write_named_uint32(w, "firmware", cdata->oacs.firmware);
3713 0 : spdk_json_write_named_uint32(w, "ns_manage", cdata->oacs.ns_manage);
3714 :
3715 0 : spdk_json_write_object_end(w);
3716 :
3717 0 : spdk_json_write_named_bool(w, "multi_ctrlr", cdata->cmic.multi_ctrlr);
3718 0 : spdk_json_write_named_bool(w, "ana_reporting", cdata->cmic.ana_reporting);
3719 :
3720 0 : spdk_json_write_object_end(w);
3721 :
3722 0 : spdk_json_write_named_object_begin(w, "vs");
3723 :
3724 0 : spdk_json_write_name(w, "nvme_version");
3725 0 : if (vs.bits.ter) {
3726 0 : spdk_json_write_string_fmt(w, "%u.%u.%u", vs.bits.mjr, vs.bits.mnr, vs.bits.ter);
3727 : } else {
3728 0 : spdk_json_write_string_fmt(w, "%u.%u", vs.bits.mjr, vs.bits.mnr);
3729 : }
3730 :
3731 0 : spdk_json_write_object_end(w);
3732 :
3733 0 : nsdata = spdk_nvme_ns_get_data(ns);
3734 :
3735 0 : spdk_json_write_named_object_begin(w, "ns_data");
3736 :
3737 0 : spdk_json_write_named_uint32(w, "id", spdk_nvme_ns_get_id(ns));
3738 :
3739 0 : if (cdata->cmic.ana_reporting) {
3740 0 : spdk_json_write_named_string(w, "ana_state",
3741 : _nvme_ana_state_str(nvme_ns->ana_state));
3742 : }
3743 :
3744 0 : spdk_json_write_named_bool(w, "can_share", nsdata->nmic.can_share);
3745 :
3746 0 : spdk_json_write_object_end(w);
3747 :
3748 0 : if (cdata->oacs.security) {
3749 0 : spdk_json_write_named_object_begin(w, "security");
3750 :
3751 0 : spdk_json_write_named_bool(w, "opal", nvme_ns->bdev->opal);
3752 :
3753 0 : spdk_json_write_object_end(w);
3754 : }
3755 :
3756 0 : spdk_json_write_object_end(w);
3757 : }
3758 :
3759 : static const char *
3760 0 : nvme_bdev_get_mp_policy_str(struct nvme_bdev *nbdev)
3761 : {
3762 0 : switch (nbdev->mp_policy) {
3763 0 : case BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE:
3764 0 : return "active_passive";
3765 0 : case BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE:
3766 0 : return "active_active";
3767 0 : default:
3768 0 : assert(false);
3769 : return "invalid";
3770 : }
3771 : }
3772 :
3773 : static const char *
3774 0 : nvme_bdev_get_mp_selector_str(struct nvme_bdev *nbdev)
3775 : {
3776 0 : switch (nbdev->mp_selector) {
3777 0 : case BDEV_NVME_MP_SELECTOR_ROUND_ROBIN:
3778 0 : return "round_robin";
3779 0 : case BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH:
3780 0 : return "queue_depth";
3781 0 : default:
3782 0 : assert(false);
3783 : return "invalid";
3784 : }
3785 : }
3786 :
3787 : static int
3788 0 : bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
3789 : {
3790 0 : struct nvme_bdev *nvme_bdev = ctx;
3791 : struct nvme_ns *nvme_ns;
3792 :
3793 0 : pthread_mutex_lock(&nvme_bdev->mutex);
3794 0 : spdk_json_write_named_array_begin(w, "nvme");
3795 0 : TAILQ_FOREACH(nvme_ns, &nvme_bdev->nvme_ns_list, tailq) {
3796 0 : nvme_namespace_info_json(w, nvme_ns);
3797 : }
3798 0 : spdk_json_write_array_end(w);
3799 0 : spdk_json_write_named_string(w, "mp_policy", nvme_bdev_get_mp_policy_str(nvme_bdev));
3800 0 : if (nvme_bdev->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
3801 0 : spdk_json_write_named_string(w, "selector", nvme_bdev_get_mp_selector_str(nvme_bdev));
3802 0 : if (nvme_bdev->mp_selector == BDEV_NVME_MP_SELECTOR_ROUND_ROBIN) {
3803 0 : spdk_json_write_named_uint32(w, "rr_min_io", nvme_bdev->rr_min_io);
3804 : }
3805 : }
3806 0 : pthread_mutex_unlock(&nvme_bdev->mutex);
3807 :
3808 0 : return 0;
3809 : }
3810 :
3811 : static void
3812 0 : bdev_nvme_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
3813 : {
3814 : /* No config per bdev needed */
3815 0 : }
3816 :
3817 : static uint64_t
3818 0 : bdev_nvme_get_spin_time(struct spdk_io_channel *ch)
3819 : {
3820 0 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(ch);
3821 : struct nvme_io_path *io_path;
3822 : struct nvme_poll_group *group;
3823 0 : uint64_t spin_time = 0;
3824 :
3825 0 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
3826 0 : group = io_path->qpair->group;
3827 :
3828 0 : if (!group || !group->collect_spin_stat) {
3829 0 : continue;
3830 : }
3831 :
3832 0 : if (group->end_ticks != 0) {
3833 0 : group->spin_ticks += (group->end_ticks - group->start_ticks);
3834 0 : group->end_ticks = 0;
3835 : }
3836 :
3837 0 : spin_time += group->spin_ticks;
3838 0 : group->start_ticks = 0;
3839 0 : group->spin_ticks = 0;
3840 : }
3841 :
3842 0 : return (spin_time * 1000000ULL) / spdk_get_ticks_hz();
3843 : }
3844 :
3845 : static void
3846 0 : bdev_nvme_reset_device_stat(void *ctx)
3847 : {
3848 0 : struct nvme_bdev *nbdev = ctx;
3849 :
3850 0 : if (nbdev->err_stat != NULL) {
3851 0 : memset(nbdev->err_stat, 0, sizeof(struct nvme_error_stat));
3852 : }
3853 0 : }
3854 :
3855 : /* JSON string should be lowercases and underscore delimited string. */
3856 : static void
3857 0 : bdev_nvme_format_nvme_status(char *dst, const char *src)
3858 : {
3859 0 : char tmp[256];
3860 :
3861 0 : spdk_strcpy_replace(dst, 256, src, " - ", "_");
3862 0 : spdk_strcpy_replace(tmp, 256, dst, "-", "_");
3863 0 : spdk_strcpy_replace(dst, 256, tmp, " ", "_");
3864 0 : spdk_strlwr(dst);
3865 0 : }
3866 :
3867 : static void
3868 0 : bdev_nvme_dump_device_stat_json(void *ctx, struct spdk_json_write_ctx *w)
3869 : {
3870 0 : struct nvme_bdev *nbdev = ctx;
3871 0 : struct spdk_nvme_status status = {};
3872 : uint16_t sct, sc;
3873 0 : char status_json[256];
3874 : const char *status_str;
3875 :
3876 0 : if (nbdev->err_stat == NULL) {
3877 0 : return;
3878 : }
3879 :
3880 0 : spdk_json_write_named_object_begin(w, "nvme_error");
3881 :
3882 0 : spdk_json_write_named_object_begin(w, "status_type");
3883 0 : for (sct = 0; sct < 8; sct++) {
3884 0 : if (nbdev->err_stat->status_type[sct] == 0) {
3885 0 : continue;
3886 : }
3887 0 : status.sct = sct;
3888 :
3889 0 : status_str = spdk_nvme_cpl_get_status_type_string(&status);
3890 0 : assert(status_str != NULL);
3891 0 : bdev_nvme_format_nvme_status(status_json, status_str);
3892 :
3893 0 : spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status_type[sct]);
3894 : }
3895 0 : spdk_json_write_object_end(w);
3896 :
3897 0 : spdk_json_write_named_object_begin(w, "status_code");
3898 0 : for (sct = 0; sct < 4; sct++) {
3899 0 : status.sct = sct;
3900 0 : for (sc = 0; sc < 256; sc++) {
3901 0 : if (nbdev->err_stat->status[sct][sc] == 0) {
3902 0 : continue;
3903 : }
3904 0 : status.sc = sc;
3905 :
3906 0 : status_str = spdk_nvme_cpl_get_status_string(&status);
3907 0 : assert(status_str != NULL);
3908 0 : bdev_nvme_format_nvme_status(status_json, status_str);
3909 :
3910 0 : spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status[sct][sc]);
3911 : }
3912 : }
3913 0 : spdk_json_write_object_end(w);
3914 :
3915 0 : spdk_json_write_object_end(w);
3916 : }
3917 :
3918 : static bool
3919 0 : bdev_nvme_accel_sequence_supported(void *ctx, enum spdk_bdev_io_type type)
3920 : {
3921 0 : struct nvme_bdev *nbdev = ctx;
3922 : struct spdk_nvme_ctrlr *ctrlr;
3923 :
3924 0 : if (!g_opts.allow_accel_sequence) {
3925 0 : return false;
3926 : }
3927 :
3928 0 : switch (type) {
3929 0 : case SPDK_BDEV_IO_TYPE_WRITE:
3930 : case SPDK_BDEV_IO_TYPE_READ:
3931 0 : break;
3932 0 : default:
3933 0 : return false;
3934 : }
3935 :
3936 0 : ctrlr = bdev_nvme_get_ctrlr(&nbdev->disk);
3937 0 : assert(ctrlr != NULL);
3938 :
3939 0 : return spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_ACCEL_SEQUENCE_SUPPORTED;
3940 : }
3941 :
3942 : static const struct spdk_bdev_fn_table nvmelib_fn_table = {
3943 : .destruct = bdev_nvme_destruct,
3944 : .submit_request = bdev_nvme_submit_request,
3945 : .io_type_supported = bdev_nvme_io_type_supported,
3946 : .get_io_channel = bdev_nvme_get_io_channel,
3947 : .dump_info_json = bdev_nvme_dump_info_json,
3948 : .write_config_json = bdev_nvme_write_config_json,
3949 : .get_spin_time = bdev_nvme_get_spin_time,
3950 : .get_module_ctx = bdev_nvme_get_module_ctx,
3951 : .get_memory_domains = bdev_nvme_get_memory_domains,
3952 : .accel_sequence_supported = bdev_nvme_accel_sequence_supported,
3953 : .reset_device_stat = bdev_nvme_reset_device_stat,
3954 : .dump_device_stat_json = bdev_nvme_dump_device_stat_json,
3955 : };
3956 :
3957 : typedef int (*bdev_nvme_parse_ana_log_page_cb)(
3958 : const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg);
3959 :
3960 : static int
3961 40 : bdev_nvme_parse_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
3962 : bdev_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg)
3963 : {
3964 : struct spdk_nvme_ana_group_descriptor *copied_desc;
3965 : uint8_t *orig_desc;
3966 : uint32_t i, desc_size, copy_len;
3967 40 : int rc = 0;
3968 :
3969 40 : if (nvme_ctrlr->ana_log_page == NULL) {
3970 0 : return -EINVAL;
3971 : }
3972 :
3973 40 : copied_desc = nvme_ctrlr->copied_ana_desc;
3974 :
3975 40 : orig_desc = (uint8_t *)nvme_ctrlr->ana_log_page + sizeof(struct spdk_nvme_ana_page);
3976 40 : copy_len = nvme_ctrlr->max_ana_log_page_size - sizeof(struct spdk_nvme_ana_page);
3977 :
3978 69 : for (i = 0; i < nvme_ctrlr->ana_log_page->num_ana_group_desc; i++) {
3979 65 : memcpy(copied_desc, orig_desc, copy_len);
3980 :
3981 65 : rc = cb_fn(copied_desc, cb_arg);
3982 65 : if (rc != 0) {
3983 36 : break;
3984 : }
3985 :
3986 29 : desc_size = sizeof(struct spdk_nvme_ana_group_descriptor) +
3987 29 : copied_desc->num_of_nsid * sizeof(uint32_t);
3988 29 : orig_desc += desc_size;
3989 29 : copy_len -= desc_size;
3990 : }
3991 :
3992 40 : return rc;
3993 : }
3994 :
3995 : static int
3996 5 : nvme_ns_ana_transition_timedout(void *ctx)
3997 : {
3998 5 : struct nvme_ns *nvme_ns = ctx;
3999 :
4000 5 : spdk_poller_unregister(&nvme_ns->anatt_timer);
4001 5 : nvme_ns->ana_transition_timedout = true;
4002 :
4003 5 : return SPDK_POLLER_BUSY;
4004 : }
4005 :
4006 : static void
4007 45 : _nvme_ns_set_ana_state(struct nvme_ns *nvme_ns,
4008 : const struct spdk_nvme_ana_group_descriptor *desc)
4009 : {
4010 : const struct spdk_nvme_ctrlr_data *cdata;
4011 :
4012 45 : nvme_ns->ana_group_id = desc->ana_group_id;
4013 45 : nvme_ns->ana_state = desc->ana_state;
4014 45 : nvme_ns->ana_state_updating = false;
4015 :
4016 45 : switch (nvme_ns->ana_state) {
4017 38 : case SPDK_NVME_ANA_OPTIMIZED_STATE:
4018 : case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
4019 38 : nvme_ns->ana_transition_timedout = false;
4020 38 : spdk_poller_unregister(&nvme_ns->anatt_timer);
4021 38 : break;
4022 :
4023 6 : case SPDK_NVME_ANA_INACCESSIBLE_STATE:
4024 : case SPDK_NVME_ANA_CHANGE_STATE:
4025 6 : if (nvme_ns->anatt_timer != NULL) {
4026 1 : break;
4027 : }
4028 :
4029 5 : cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
4030 5 : nvme_ns->anatt_timer = SPDK_POLLER_REGISTER(nvme_ns_ana_transition_timedout,
4031 : nvme_ns,
4032 : cdata->anatt * SPDK_SEC_TO_USEC);
4033 5 : break;
4034 1 : default:
4035 1 : break;
4036 : }
4037 45 : }
4038 :
4039 : static int
4040 59 : nvme_ns_set_ana_state(const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg)
4041 : {
4042 59 : struct nvme_ns *nvme_ns = cb_arg;
4043 : uint32_t i;
4044 :
4045 59 : assert(nvme_ns->ns != NULL);
4046 :
4047 81 : for (i = 0; i < desc->num_of_nsid; i++) {
4048 58 : if (desc->nsid[i] != spdk_nvme_ns_get_id(nvme_ns->ns)) {
4049 22 : continue;
4050 : }
4051 :
4052 36 : _nvme_ns_set_ana_state(nvme_ns, desc);
4053 36 : return 1;
4054 : }
4055 :
4056 23 : return 0;
4057 : }
4058 :
4059 : static int
4060 5 : nvme_generate_uuid(const char *sn, uint32_t nsid, struct spdk_uuid *uuid)
4061 : {
4062 5 : int rc = 0;
4063 5 : struct spdk_uuid new_uuid, namespace_uuid;
4064 5 : char merged_str[SPDK_NVME_CTRLR_SN_LEN + NSID_STR_LEN + 1] = {'\0'};
4065 : /* This namespace UUID was generated using uuid_generate() method. */
4066 5 : const char *namespace_str = {"edaed2de-24bc-4b07-b559-f47ecbe730fd"};
4067 : int size;
4068 :
4069 5 : assert(strlen(sn) <= SPDK_NVME_CTRLR_SN_LEN);
4070 :
4071 5 : spdk_uuid_set_null(&new_uuid);
4072 5 : spdk_uuid_set_null(&namespace_uuid);
4073 :
4074 5 : size = snprintf(merged_str, sizeof(merged_str), "%s%"PRIu32, sn, nsid);
4075 5 : if (size <= 0 || (unsigned long)size >= sizeof(merged_str)) {
4076 0 : return -EINVAL;
4077 : }
4078 :
4079 5 : spdk_uuid_parse(&namespace_uuid, namespace_str);
4080 :
4081 5 : rc = spdk_uuid_generate_sha1(&new_uuid, &namespace_uuid, merged_str, size);
4082 5 : if (rc == 0) {
4083 5 : memcpy(uuid, &new_uuid, sizeof(struct spdk_uuid));
4084 : }
4085 :
4086 5 : return rc;
4087 : }
4088 :
4089 : static int
4090 37 : nvme_disk_create(struct spdk_bdev *disk, const char *base_name,
4091 : struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns,
4092 : uint32_t prchk_flags, void *ctx)
4093 : {
4094 : const struct spdk_uuid *uuid;
4095 : const uint8_t *nguid;
4096 : const struct spdk_nvme_ctrlr_data *cdata;
4097 : const struct spdk_nvme_ns_data *nsdata;
4098 : const struct spdk_nvme_ctrlr_opts *opts;
4099 : enum spdk_nvme_csi csi;
4100 : uint32_t atomic_bs, phys_bs, bs;
4101 37 : char sn_tmp[SPDK_NVME_CTRLR_SN_LEN + 1] = {'\0'};
4102 : int rc;
4103 :
4104 37 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
4105 37 : csi = spdk_nvme_ns_get_csi(ns);
4106 37 : opts = spdk_nvme_ctrlr_get_opts(ctrlr);
4107 :
4108 37 : switch (csi) {
4109 37 : case SPDK_NVME_CSI_NVM:
4110 37 : disk->product_name = "NVMe disk";
4111 37 : break;
4112 0 : case SPDK_NVME_CSI_ZNS:
4113 0 : disk->product_name = "NVMe ZNS disk";
4114 0 : disk->zoned = true;
4115 0 : disk->zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
4116 0 : disk->max_zone_append_size = spdk_nvme_zns_ctrlr_get_max_zone_append_size(ctrlr) /
4117 0 : spdk_nvme_ns_get_extended_sector_size(ns);
4118 0 : disk->max_open_zones = spdk_nvme_zns_ns_get_max_open_zones(ns);
4119 0 : disk->max_active_zones = spdk_nvme_zns_ns_get_max_active_zones(ns);
4120 0 : break;
4121 0 : default:
4122 0 : SPDK_ERRLOG("unsupported CSI: %u\n", csi);
4123 0 : return -ENOTSUP;
4124 : }
4125 :
4126 37 : nguid = spdk_nvme_ns_get_nguid(ns);
4127 37 : if (!nguid) {
4128 37 : uuid = spdk_nvme_ns_get_uuid(ns);
4129 37 : if (uuid) {
4130 12 : disk->uuid = *uuid;
4131 25 : } else if (g_opts.generate_uuids) {
4132 0 : spdk_strcpy_pad(sn_tmp, cdata->sn, SPDK_NVME_CTRLR_SN_LEN, '\0');
4133 0 : rc = nvme_generate_uuid(sn_tmp, spdk_nvme_ns_get_id(ns), &disk->uuid);
4134 0 : if (rc < 0) {
4135 0 : SPDK_ERRLOG("UUID generation failed (%s)\n", spdk_strerror(-rc));
4136 0 : return rc;
4137 : }
4138 : }
4139 : } else {
4140 0 : memcpy(&disk->uuid, nguid, sizeof(disk->uuid));
4141 : }
4142 :
4143 37 : disk->name = spdk_sprintf_alloc("%sn%d", base_name, spdk_nvme_ns_get_id(ns));
4144 37 : if (!disk->name) {
4145 0 : return -ENOMEM;
4146 : }
4147 :
4148 37 : disk->write_cache = 0;
4149 37 : if (cdata->vwc.present) {
4150 : /* Enable if the Volatile Write Cache exists */
4151 0 : disk->write_cache = 1;
4152 : }
4153 37 : if (cdata->oncs.write_zeroes) {
4154 0 : disk->max_write_zeroes = UINT16_MAX + 1;
4155 : }
4156 37 : disk->blocklen = spdk_nvme_ns_get_extended_sector_size(ns);
4157 37 : disk->blockcnt = spdk_nvme_ns_get_num_sectors(ns);
4158 37 : disk->max_segment_size = spdk_nvme_ctrlr_get_max_xfer_size(ctrlr);
4159 37 : disk->ctratt.raw = cdata->ctratt.raw;
4160 : /* NVMe driver will split one request into multiple requests
4161 : * based on MDTS and stripe boundary, the bdev layer will use
4162 : * max_segment_size and max_num_segments to split one big IO
4163 : * into multiple requests, then small request can't run out
4164 : * of NVMe internal requests data structure.
4165 : */
4166 37 : if (opts && opts->io_queue_requests) {
4167 0 : disk->max_num_segments = opts->io_queue_requests / 2;
4168 : }
4169 37 : if (spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_SGL_SUPPORTED) {
4170 : /* The nvme driver will try to split I/O that have too many
4171 : * SGEs, but it doesn't work if that last SGE doesn't end on
4172 : * an aggregate total that is block aligned. The bdev layer has
4173 : * a more robust splitting framework, so use that instead for
4174 : * this case. (See issue #3269.)
4175 : */
4176 0 : uint16_t max_sges = spdk_nvme_ctrlr_get_max_sges(ctrlr);
4177 :
4178 0 : if (disk->max_num_segments == 0) {
4179 0 : disk->max_num_segments = max_sges;
4180 : } else {
4181 0 : disk->max_num_segments = spdk_min(disk->max_num_segments, max_sges);
4182 : }
4183 : }
4184 37 : disk->optimal_io_boundary = spdk_nvme_ns_get_optimal_io_boundary(ns);
4185 :
4186 37 : nsdata = spdk_nvme_ns_get_data(ns);
4187 37 : bs = spdk_nvme_ns_get_sector_size(ns);
4188 37 : atomic_bs = bs;
4189 37 : phys_bs = bs;
4190 37 : if (nsdata->nabo == 0) {
4191 37 : if (nsdata->nsfeat.ns_atomic_write_unit && nsdata->nawupf) {
4192 0 : atomic_bs = bs * (1 + nsdata->nawupf);
4193 : } else {
4194 37 : atomic_bs = bs * (1 + cdata->awupf);
4195 : }
4196 : }
4197 37 : if (nsdata->nsfeat.optperf) {
4198 0 : phys_bs = bs * (1 + nsdata->npwg);
4199 : }
4200 37 : disk->phys_blocklen = spdk_min(phys_bs, atomic_bs);
4201 :
4202 37 : disk->md_len = spdk_nvme_ns_get_md_size(ns);
4203 37 : if (disk->md_len != 0) {
4204 0 : disk->md_interleave = nsdata->flbas.extended;
4205 0 : disk->dif_type = (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns);
4206 0 : if (disk->dif_type != SPDK_DIF_DISABLE) {
4207 0 : disk->dif_is_head_of_md = nsdata->dps.md_start;
4208 0 : disk->dif_check_flags = prchk_flags;
4209 : }
4210 : }
4211 :
4212 37 : if (!(spdk_nvme_ctrlr_get_flags(ctrlr) &
4213 : SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED)) {
4214 37 : disk->acwu = 0;
4215 0 : } else if (nsdata->nsfeat.ns_atomic_write_unit) {
4216 0 : disk->acwu = nsdata->nacwu + 1; /* 0-based */
4217 : } else {
4218 0 : disk->acwu = cdata->acwu + 1; /* 0-based */
4219 : }
4220 :
4221 37 : if (cdata->oncs.copy) {
4222 : /* For now bdev interface allows only single segment copy */
4223 0 : disk->max_copy = nsdata->mssrl;
4224 : }
4225 :
4226 37 : disk->ctxt = ctx;
4227 37 : disk->fn_table = &nvmelib_fn_table;
4228 37 : disk->module = &nvme_if;
4229 :
4230 37 : return 0;
4231 : }
4232 :
4233 : static struct nvme_bdev *
4234 37 : nvme_bdev_alloc(void)
4235 : {
4236 : struct nvme_bdev *bdev;
4237 : int rc;
4238 :
4239 37 : bdev = calloc(1, sizeof(*bdev));
4240 37 : if (!bdev) {
4241 0 : SPDK_ERRLOG("bdev calloc() failed\n");
4242 0 : return NULL;
4243 : }
4244 :
4245 37 : if (g_opts.nvme_error_stat) {
4246 0 : bdev->err_stat = calloc(1, sizeof(struct nvme_error_stat));
4247 0 : if (!bdev->err_stat) {
4248 0 : SPDK_ERRLOG("err_stat calloc() failed\n");
4249 0 : free(bdev);
4250 0 : return NULL;
4251 : }
4252 : }
4253 :
4254 37 : rc = pthread_mutex_init(&bdev->mutex, NULL);
4255 37 : if (rc != 0) {
4256 0 : free(bdev->err_stat);
4257 0 : free(bdev);
4258 0 : return NULL;
4259 : }
4260 :
4261 37 : bdev->ref = 1;
4262 37 : bdev->mp_policy = BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE;
4263 37 : bdev->mp_selector = BDEV_NVME_MP_SELECTOR_ROUND_ROBIN;
4264 37 : bdev->rr_min_io = UINT32_MAX;
4265 37 : TAILQ_INIT(&bdev->nvme_ns_list);
4266 :
4267 37 : return bdev;
4268 : }
4269 :
4270 : static int
4271 37 : nvme_bdev_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
4272 : {
4273 : struct nvme_bdev *bdev;
4274 37 : struct nvme_bdev_ctrlr *nbdev_ctrlr = nvme_ctrlr->nbdev_ctrlr;
4275 : int rc;
4276 :
4277 37 : bdev = nvme_bdev_alloc();
4278 37 : if (bdev == NULL) {
4279 0 : SPDK_ERRLOG("Failed to allocate NVMe bdev\n");
4280 0 : return -ENOMEM;
4281 : }
4282 :
4283 37 : bdev->opal = nvme_ctrlr->opal_dev != NULL;
4284 :
4285 37 : rc = nvme_disk_create(&bdev->disk, nbdev_ctrlr->name, nvme_ctrlr->ctrlr,
4286 : nvme_ns->ns, nvme_ctrlr->opts.prchk_flags, bdev);
4287 37 : if (rc != 0) {
4288 0 : SPDK_ERRLOG("Failed to create NVMe disk\n");
4289 0 : nvme_bdev_free(bdev);
4290 0 : return rc;
4291 : }
4292 :
4293 37 : spdk_io_device_register(bdev,
4294 : bdev_nvme_create_bdev_channel_cb,
4295 : bdev_nvme_destroy_bdev_channel_cb,
4296 : sizeof(struct nvme_bdev_channel),
4297 37 : bdev->disk.name);
4298 :
4299 37 : nvme_ns->bdev = bdev;
4300 37 : bdev->nsid = nvme_ns->id;
4301 37 : TAILQ_INSERT_TAIL(&bdev->nvme_ns_list, nvme_ns, tailq);
4302 :
4303 37 : bdev->nbdev_ctrlr = nbdev_ctrlr;
4304 37 : TAILQ_INSERT_TAIL(&nbdev_ctrlr->bdevs, bdev, tailq);
4305 :
4306 37 : rc = spdk_bdev_register(&bdev->disk);
4307 37 : if (rc != 0) {
4308 1 : SPDK_ERRLOG("spdk_bdev_register() failed\n");
4309 1 : spdk_io_device_unregister(bdev, NULL);
4310 1 : nvme_ns->bdev = NULL;
4311 1 : TAILQ_REMOVE(&nbdev_ctrlr->bdevs, bdev, tailq);
4312 1 : nvme_bdev_free(bdev);
4313 1 : return rc;
4314 : }
4315 :
4316 36 : return 0;
4317 : }
4318 :
4319 : static bool
4320 23 : bdev_nvme_compare_ns(struct spdk_nvme_ns *ns1, struct spdk_nvme_ns *ns2)
4321 : {
4322 : const struct spdk_nvme_ns_data *nsdata1, *nsdata2;
4323 : const struct spdk_uuid *uuid1, *uuid2;
4324 :
4325 23 : nsdata1 = spdk_nvme_ns_get_data(ns1);
4326 23 : nsdata2 = spdk_nvme_ns_get_data(ns2);
4327 23 : uuid1 = spdk_nvme_ns_get_uuid(ns1);
4328 23 : uuid2 = spdk_nvme_ns_get_uuid(ns2);
4329 :
4330 45 : return memcmp(nsdata1->nguid, nsdata2->nguid, sizeof(nsdata1->nguid)) == 0 &&
4331 22 : nsdata1->eui64 == nsdata2->eui64 &&
4332 21 : ((uuid1 == NULL && uuid2 == NULL) ||
4333 59 : (uuid1 != NULL && uuid2 != NULL && spdk_uuid_compare(uuid1, uuid2) == 0)) &&
4334 18 : spdk_nvme_ns_get_csi(ns1) == spdk_nvme_ns_get_csi(ns2);
4335 : }
4336 :
4337 : static bool
4338 0 : hotplug_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
4339 : struct spdk_nvme_ctrlr_opts *opts)
4340 : {
4341 : struct nvme_probe_skip_entry *entry;
4342 :
4343 0 : TAILQ_FOREACH(entry, &g_skipped_nvme_ctrlrs, tailq) {
4344 0 : if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
4345 0 : return false;
4346 : }
4347 : }
4348 :
4349 0 : opts->arbitration_burst = (uint8_t)g_opts.arbitration_burst;
4350 0 : opts->low_priority_weight = (uint8_t)g_opts.low_priority_weight;
4351 0 : opts->medium_priority_weight = (uint8_t)g_opts.medium_priority_weight;
4352 0 : opts->high_priority_weight = (uint8_t)g_opts.high_priority_weight;
4353 0 : opts->disable_read_ana_log_page = true;
4354 :
4355 0 : SPDK_DEBUGLOG(bdev_nvme, "Attaching to %s\n", trid->traddr);
4356 :
4357 0 : return true;
4358 : }
4359 :
4360 : static void
4361 0 : nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl)
4362 : {
4363 0 : struct nvme_ctrlr *nvme_ctrlr = ctx;
4364 :
4365 0 : if (spdk_nvme_cpl_is_error(cpl)) {
4366 0 : SPDK_WARNLOG("Abort failed. Resetting controller. sc is %u, sct is %u.\n", cpl->status.sc,
4367 : cpl->status.sct);
4368 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4369 0 : } else if (cpl->cdw0 & 0x1) {
4370 0 : SPDK_WARNLOG("Specified command could not be aborted.\n");
4371 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4372 : }
4373 0 : }
4374 :
4375 : static void
4376 0 : timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
4377 : struct spdk_nvme_qpair *qpair, uint16_t cid)
4378 : {
4379 0 : struct nvme_ctrlr *nvme_ctrlr = cb_arg;
4380 : union spdk_nvme_csts_register csts;
4381 : int rc;
4382 :
4383 0 : assert(nvme_ctrlr->ctrlr == ctrlr);
4384 :
4385 0 : SPDK_WARNLOG("Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n", ctrlr, qpair, cid);
4386 :
4387 : /* Only try to read CSTS if it's a PCIe controller or we have a timeout on an I/O
4388 : * queue. (Note: qpair == NULL when there's an admin cmd timeout.) Otherwise we
4389 : * would submit another fabrics cmd on the admin queue to read CSTS and check for its
4390 : * completion recursively.
4391 : */
4392 0 : if (nvme_ctrlr->active_path_id->trid.trtype == SPDK_NVME_TRANSPORT_PCIE || qpair != NULL) {
4393 0 : csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr);
4394 0 : if (csts.bits.cfs) {
4395 0 : SPDK_ERRLOG("Controller Fatal Status, reset required\n");
4396 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4397 0 : return;
4398 : }
4399 : }
4400 :
4401 0 : switch (g_opts.action_on_timeout) {
4402 0 : case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT:
4403 0 : if (qpair) {
4404 : /* Don't send abort to ctrlr when ctrlr is not available. */
4405 0 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4406 0 : if (!nvme_ctrlr_is_available(nvme_ctrlr)) {
4407 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4408 0 : SPDK_NOTICELOG("Quit abort. Ctrlr is not available.\n");
4409 0 : return;
4410 : }
4411 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4412 :
4413 0 : rc = spdk_nvme_ctrlr_cmd_abort(ctrlr, qpair, cid,
4414 : nvme_abort_cpl, nvme_ctrlr);
4415 0 : if (rc == 0) {
4416 0 : return;
4417 : }
4418 :
4419 0 : SPDK_ERRLOG("Unable to send abort. Resetting, rc is %d.\n", rc);
4420 : }
4421 :
4422 : /* FALLTHROUGH */
4423 : case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET:
4424 0 : bdev_nvme_reset_ctrlr(nvme_ctrlr);
4425 0 : break;
4426 0 : case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE:
4427 0 : SPDK_DEBUGLOG(bdev_nvme, "No action for nvme controller timeout.\n");
4428 0 : break;
4429 0 : default:
4430 0 : SPDK_ERRLOG("An invalid timeout action value is found.\n");
4431 0 : break;
4432 : }
4433 : }
4434 :
4435 : static struct nvme_ns *
4436 50 : nvme_ns_alloc(void)
4437 : {
4438 : struct nvme_ns *nvme_ns;
4439 :
4440 50 : nvme_ns = calloc(1, sizeof(struct nvme_ns));
4441 50 : if (nvme_ns == NULL) {
4442 0 : return NULL;
4443 : }
4444 :
4445 50 : if (g_opts.io_path_stat) {
4446 0 : nvme_ns->stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
4447 0 : if (nvme_ns->stat == NULL) {
4448 0 : free(nvme_ns);
4449 0 : return NULL;
4450 : }
4451 0 : spdk_bdev_reset_io_stat(nvme_ns->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
4452 : }
4453 :
4454 50 : return nvme_ns;
4455 : }
4456 :
4457 : static void
4458 50 : nvme_ns_free(struct nvme_ns *nvme_ns)
4459 : {
4460 50 : free(nvme_ns->stat);
4461 50 : free(nvme_ns);
4462 50 : }
4463 :
4464 : static void
4465 50 : nvme_ctrlr_populate_namespace_done(struct nvme_ns *nvme_ns, int rc)
4466 : {
4467 50 : struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr;
4468 50 : struct nvme_async_probe_ctx *ctx = nvme_ns->probe_ctx;
4469 :
4470 50 : if (rc == 0) {
4471 48 : nvme_ns->probe_ctx = NULL;
4472 48 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4473 48 : nvme_ctrlr->ref++;
4474 48 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4475 : } else {
4476 2 : RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
4477 2 : nvme_ns_free(nvme_ns);
4478 : }
4479 :
4480 50 : if (ctx) {
4481 49 : ctx->populates_in_progress--;
4482 49 : if (ctx->populates_in_progress == 0) {
4483 12 : nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx);
4484 : }
4485 : }
4486 50 : }
4487 :
4488 : static void
4489 2 : bdev_nvme_add_io_path(struct spdk_io_channel_iter *i)
4490 : {
4491 2 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
4492 2 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
4493 2 : struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
4494 : int rc;
4495 :
4496 2 : rc = _bdev_nvme_add_io_path(nbdev_ch, nvme_ns);
4497 2 : if (rc != 0) {
4498 0 : SPDK_ERRLOG("Failed to add I/O path to bdev_channel dynamically.\n");
4499 : }
4500 :
4501 2 : spdk_for_each_channel_continue(i, rc);
4502 2 : }
4503 :
4504 : static void
4505 2 : bdev_nvme_delete_io_path(struct spdk_io_channel_iter *i)
4506 : {
4507 2 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
4508 2 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
4509 2 : struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
4510 : struct nvme_io_path *io_path;
4511 :
4512 2 : io_path = _bdev_nvme_get_io_path(nbdev_ch, nvme_ns);
4513 2 : if (io_path != NULL) {
4514 2 : _bdev_nvme_delete_io_path(nbdev_ch, io_path);
4515 : }
4516 :
4517 2 : spdk_for_each_channel_continue(i, 0);
4518 2 : }
4519 :
4520 : static void
4521 0 : bdev_nvme_add_io_path_failed(struct spdk_io_channel_iter *i, int status)
4522 : {
4523 0 : struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
4524 :
4525 0 : nvme_ctrlr_populate_namespace_done(nvme_ns, -1);
4526 0 : }
4527 :
4528 : static void
4529 12 : bdev_nvme_add_io_path_done(struct spdk_io_channel_iter *i, int status)
4530 : {
4531 12 : struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
4532 12 : struct nvme_bdev *bdev = spdk_io_channel_iter_get_io_device(i);
4533 :
4534 12 : if (status == 0) {
4535 12 : nvme_ctrlr_populate_namespace_done(nvme_ns, 0);
4536 : } else {
4537 : /* Delete the added io_paths and fail populating the namespace. */
4538 0 : spdk_for_each_channel(bdev,
4539 : bdev_nvme_delete_io_path,
4540 : nvme_ns,
4541 : bdev_nvme_add_io_path_failed);
4542 : }
4543 12 : }
4544 :
4545 : static int
4546 13 : nvme_bdev_add_ns(struct nvme_bdev *bdev, struct nvme_ns *nvme_ns)
4547 : {
4548 : struct nvme_ns *tmp_ns;
4549 : const struct spdk_nvme_ns_data *nsdata;
4550 :
4551 13 : nsdata = spdk_nvme_ns_get_data(nvme_ns->ns);
4552 13 : if (!nsdata->nmic.can_share) {
4553 0 : SPDK_ERRLOG("Namespace cannot be shared.\n");
4554 0 : return -EINVAL;
4555 : }
4556 :
4557 13 : pthread_mutex_lock(&bdev->mutex);
4558 :
4559 13 : tmp_ns = TAILQ_FIRST(&bdev->nvme_ns_list);
4560 13 : assert(tmp_ns != NULL);
4561 :
4562 13 : if (tmp_ns->ns != NULL && !bdev_nvme_compare_ns(nvme_ns->ns, tmp_ns->ns)) {
4563 1 : pthread_mutex_unlock(&bdev->mutex);
4564 1 : SPDK_ERRLOG("Namespaces are not identical.\n");
4565 1 : return -EINVAL;
4566 : }
4567 :
4568 12 : bdev->ref++;
4569 12 : TAILQ_INSERT_TAIL(&bdev->nvme_ns_list, nvme_ns, tailq);
4570 12 : nvme_ns->bdev = bdev;
4571 :
4572 12 : pthread_mutex_unlock(&bdev->mutex);
4573 :
4574 : /* Add nvme_io_path to nvme_bdev_channels dynamically. */
4575 12 : spdk_for_each_channel(bdev,
4576 : bdev_nvme_add_io_path,
4577 : nvme_ns,
4578 : bdev_nvme_add_io_path_done);
4579 :
4580 12 : return 0;
4581 : }
4582 :
4583 : static void
4584 50 : nvme_ctrlr_populate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
4585 : {
4586 : struct spdk_nvme_ns *ns;
4587 : struct nvme_bdev *bdev;
4588 50 : int rc = 0;
4589 :
4590 50 : ns = spdk_nvme_ctrlr_get_ns(nvme_ctrlr->ctrlr, nvme_ns->id);
4591 50 : if (!ns) {
4592 0 : SPDK_DEBUGLOG(bdev_nvme, "Invalid NS %d\n", nvme_ns->id);
4593 0 : rc = -EINVAL;
4594 0 : goto done;
4595 : }
4596 :
4597 50 : nvme_ns->ns = ns;
4598 50 : nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
4599 :
4600 50 : if (nvme_ctrlr->ana_log_page != NULL) {
4601 37 : bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ns_set_ana_state, nvme_ns);
4602 : }
4603 :
4604 50 : bdev = nvme_bdev_ctrlr_get_bdev(nvme_ctrlr->nbdev_ctrlr, nvme_ns->id);
4605 50 : if (bdev == NULL) {
4606 37 : rc = nvme_bdev_create(nvme_ctrlr, nvme_ns);
4607 : } else {
4608 13 : rc = nvme_bdev_add_ns(bdev, nvme_ns);
4609 13 : if (rc == 0) {
4610 12 : return;
4611 : }
4612 : }
4613 1 : done:
4614 38 : nvme_ctrlr_populate_namespace_done(nvme_ns, rc);
4615 : }
4616 :
4617 : static void
4618 48 : nvme_ctrlr_depopulate_namespace_done(struct nvme_ns *nvme_ns)
4619 : {
4620 48 : struct nvme_ctrlr *nvme_ctrlr = nvme_ns->ctrlr;
4621 :
4622 48 : assert(nvme_ctrlr != NULL);
4623 :
4624 48 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4625 :
4626 48 : RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
4627 :
4628 48 : if (nvme_ns->bdev != NULL) {
4629 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4630 0 : return;
4631 : }
4632 :
4633 48 : nvme_ns_free(nvme_ns);
4634 48 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4635 :
4636 48 : nvme_ctrlr_release(nvme_ctrlr);
4637 : }
4638 :
4639 : static void
4640 11 : bdev_nvme_delete_io_path_done(struct spdk_io_channel_iter *i, int status)
4641 : {
4642 11 : struct nvme_ns *nvme_ns = spdk_io_channel_iter_get_ctx(i);
4643 :
4644 11 : nvme_ctrlr_depopulate_namespace_done(nvme_ns);
4645 11 : }
4646 :
4647 : static void
4648 48 : nvme_ctrlr_depopulate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns)
4649 : {
4650 : struct nvme_bdev *bdev;
4651 :
4652 48 : spdk_poller_unregister(&nvme_ns->anatt_timer);
4653 :
4654 48 : bdev = nvme_ns->bdev;
4655 48 : if (bdev != NULL) {
4656 44 : pthread_mutex_lock(&bdev->mutex);
4657 :
4658 44 : assert(bdev->ref > 0);
4659 44 : bdev->ref--;
4660 44 : if (bdev->ref == 0) {
4661 33 : pthread_mutex_unlock(&bdev->mutex);
4662 :
4663 33 : spdk_bdev_unregister(&bdev->disk, NULL, NULL);
4664 : } else {
4665 : /* spdk_bdev_unregister() is not called until the last nvme_ns is
4666 : * depopulated. Hence we need to remove nvme_ns from bdev->nvme_ns_list
4667 : * and clear nvme_ns->bdev here.
4668 : */
4669 11 : TAILQ_REMOVE(&bdev->nvme_ns_list, nvme_ns, tailq);
4670 11 : nvme_ns->bdev = NULL;
4671 :
4672 11 : pthread_mutex_unlock(&bdev->mutex);
4673 :
4674 : /* Delete nvme_io_paths from nvme_bdev_channels dynamically. After that,
4675 : * we call depopulate_namespace_done() to avoid use-after-free.
4676 : */
4677 11 : spdk_for_each_channel(bdev,
4678 : bdev_nvme_delete_io_path,
4679 : nvme_ns,
4680 : bdev_nvme_delete_io_path_done);
4681 11 : return;
4682 : }
4683 : }
4684 :
4685 37 : nvme_ctrlr_depopulate_namespace_done(nvme_ns);
4686 : }
4687 :
4688 : static void
4689 61 : nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr,
4690 : struct nvme_async_probe_ctx *ctx)
4691 : {
4692 61 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
4693 : struct nvme_ns *nvme_ns, *next;
4694 : struct spdk_nvme_ns *ns;
4695 : struct nvme_bdev *bdev;
4696 : uint32_t nsid;
4697 : int rc;
4698 : uint64_t num_sectors;
4699 :
4700 61 : if (ctx) {
4701 : /* Initialize this count to 1 to handle the populate functions
4702 : * calling nvme_ctrlr_populate_namespace_done() immediately.
4703 : */
4704 45 : ctx->populates_in_progress = 1;
4705 : }
4706 :
4707 : /* First loop over our existing namespaces and see if they have been
4708 : * removed. */
4709 61 : nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
4710 65 : while (nvme_ns != NULL) {
4711 4 : next = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
4712 :
4713 4 : if (spdk_nvme_ctrlr_is_active_ns(ctrlr, nvme_ns->id)) {
4714 : /* NS is still there or added again. Its attributes may have changed. */
4715 3 : ns = spdk_nvme_ctrlr_get_ns(ctrlr, nvme_ns->id);
4716 3 : if (nvme_ns->ns != ns) {
4717 1 : assert(nvme_ns->ns == NULL);
4718 1 : nvme_ns->ns = ns;
4719 1 : SPDK_DEBUGLOG(bdev_nvme, "NSID %u was added\n", nvme_ns->id);
4720 : }
4721 :
4722 3 : num_sectors = spdk_nvme_ns_get_num_sectors(ns);
4723 3 : bdev = nvme_ns->bdev;
4724 3 : assert(bdev != NULL);
4725 3 : if (bdev->disk.blockcnt != num_sectors) {
4726 1 : SPDK_NOTICELOG("NSID %u is resized: bdev name %s, old size %" PRIu64 ", new size %" PRIu64 "\n",
4727 : nvme_ns->id,
4728 : bdev->disk.name,
4729 : bdev->disk.blockcnt,
4730 : num_sectors);
4731 1 : rc = spdk_bdev_notify_blockcnt_change(&bdev->disk, num_sectors);
4732 1 : if (rc != 0) {
4733 0 : SPDK_ERRLOG("Could not change num blocks for nvme bdev: name %s, errno: %d.\n",
4734 : bdev->disk.name, rc);
4735 : }
4736 : }
4737 : } else {
4738 : /* Namespace was removed */
4739 1 : nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns);
4740 : }
4741 :
4742 4 : nvme_ns = next;
4743 : }
4744 :
4745 : /* Loop through all of the namespaces at the nvme level and see if any of them are new */
4746 61 : nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
4747 114 : while (nsid != 0) {
4748 53 : nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid);
4749 :
4750 53 : if (nvme_ns == NULL) {
4751 : /* Found a new one */
4752 50 : nvme_ns = nvme_ns_alloc();
4753 50 : if (nvme_ns == NULL) {
4754 0 : SPDK_ERRLOG("Failed to allocate namespace\n");
4755 : /* This just fails to attach the namespace. It may work on a future attempt. */
4756 0 : continue;
4757 : }
4758 :
4759 50 : nvme_ns->id = nsid;
4760 50 : nvme_ns->ctrlr = nvme_ctrlr;
4761 :
4762 50 : nvme_ns->bdev = NULL;
4763 :
4764 50 : if (ctx) {
4765 49 : ctx->populates_in_progress++;
4766 : }
4767 50 : nvme_ns->probe_ctx = ctx;
4768 :
4769 50 : RB_INSERT(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns);
4770 :
4771 50 : nvme_ctrlr_populate_namespace(nvme_ctrlr, nvme_ns);
4772 : }
4773 :
4774 53 : nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid);
4775 : }
4776 :
4777 61 : if (ctx) {
4778 : /* Decrement this count now that the loop is over to account
4779 : * for the one we started with. If the count is then 0, we
4780 : * know any populate_namespace functions completed immediately,
4781 : * so we'll kick the callback here.
4782 : */
4783 45 : ctx->populates_in_progress--;
4784 45 : if (ctx->populates_in_progress == 0) {
4785 33 : nvme_ctrlr_populate_namespaces_done(nvme_ctrlr, ctx);
4786 : }
4787 : }
4788 :
4789 61 : }
4790 :
4791 : static void
4792 59 : nvme_ctrlr_depopulate_namespaces(struct nvme_ctrlr *nvme_ctrlr)
4793 : {
4794 : struct nvme_ns *nvme_ns, *tmp;
4795 :
4796 106 : RB_FOREACH_SAFE(nvme_ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp) {
4797 47 : nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns);
4798 : }
4799 59 : }
4800 :
4801 : static uint32_t
4802 36 : nvme_ctrlr_get_ana_log_page_size(struct nvme_ctrlr *nvme_ctrlr)
4803 : {
4804 36 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
4805 : const struct spdk_nvme_ctrlr_data *cdata;
4806 36 : uint32_t nsid, ns_count = 0;
4807 :
4808 36 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
4809 :
4810 80 : for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
4811 44 : nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) {
4812 44 : ns_count++;
4813 : }
4814 :
4815 36 : return sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid *
4816 36 : sizeof(struct spdk_nvme_ana_group_descriptor) + ns_count *
4817 : sizeof(uint32_t);
4818 : }
4819 :
4820 : static int
4821 6 : nvme_ctrlr_set_ana_states(const struct spdk_nvme_ana_group_descriptor *desc,
4822 : void *cb_arg)
4823 : {
4824 6 : struct nvme_ctrlr *nvme_ctrlr = cb_arg;
4825 : struct nvme_ns *nvme_ns;
4826 : uint32_t i, nsid;
4827 :
4828 11 : for (i = 0; i < desc->num_of_nsid; i++) {
4829 5 : nsid = desc->nsid[i];
4830 5 : if (nsid == 0) {
4831 0 : continue;
4832 : }
4833 :
4834 5 : nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid);
4835 :
4836 5 : assert(nvme_ns != NULL);
4837 5 : if (nvme_ns == NULL) {
4838 : /* Target told us that an inactive namespace had an ANA change */
4839 0 : continue;
4840 : }
4841 :
4842 5 : _nvme_ns_set_ana_state(nvme_ns, desc);
4843 : }
4844 :
4845 6 : return 0;
4846 : }
4847 :
4848 : static void
4849 0 : bdev_nvme_disable_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
4850 : {
4851 : struct nvme_ns *nvme_ns;
4852 :
4853 0 : spdk_free(nvme_ctrlr->ana_log_page);
4854 0 : nvme_ctrlr->ana_log_page = NULL;
4855 :
4856 0 : for (nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
4857 : nvme_ns != NULL;
4858 0 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns)) {
4859 0 : nvme_ns->ana_state_updating = false;
4860 0 : nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
4861 : }
4862 0 : }
4863 :
4864 : static void
4865 3 : nvme_ctrlr_read_ana_log_page_done(void *ctx, const struct spdk_nvme_cpl *cpl)
4866 : {
4867 3 : struct nvme_ctrlr *nvme_ctrlr = ctx;
4868 :
4869 3 : if (cpl != NULL && spdk_nvme_cpl_is_success(cpl)) {
4870 3 : bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ctrlr_set_ana_states,
4871 : nvme_ctrlr);
4872 : } else {
4873 0 : bdev_nvme_disable_read_ana_log_page(nvme_ctrlr);
4874 : }
4875 :
4876 3 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4877 :
4878 3 : assert(nvme_ctrlr->ana_log_page_updating == true);
4879 3 : nvme_ctrlr->ana_log_page_updating = false;
4880 :
4881 3 : if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
4882 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4883 :
4884 0 : nvme_ctrlr_unregister(nvme_ctrlr);
4885 : } else {
4886 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4887 :
4888 3 : bdev_nvme_clear_io_path_caches(nvme_ctrlr);
4889 : }
4890 3 : }
4891 :
4892 : static int
4893 6 : nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
4894 : {
4895 : uint32_t ana_log_page_size;
4896 : int rc;
4897 :
4898 6 : if (nvme_ctrlr->ana_log_page == NULL) {
4899 0 : return -EINVAL;
4900 : }
4901 :
4902 6 : ana_log_page_size = nvme_ctrlr_get_ana_log_page_size(nvme_ctrlr);
4903 :
4904 6 : if (ana_log_page_size > nvme_ctrlr->max_ana_log_page_size) {
4905 0 : SPDK_ERRLOG("ANA log page size %" PRIu32 " is larger than allowed %" PRIu32 "\n",
4906 : ana_log_page_size, nvme_ctrlr->max_ana_log_page_size);
4907 0 : return -EINVAL;
4908 : }
4909 :
4910 6 : pthread_mutex_lock(&nvme_ctrlr->mutex);
4911 6 : if (!nvme_ctrlr_is_available(nvme_ctrlr) ||
4912 : nvme_ctrlr->ana_log_page_updating) {
4913 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4914 3 : return -EBUSY;
4915 : }
4916 :
4917 3 : nvme_ctrlr->ana_log_page_updating = true;
4918 3 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
4919 :
4920 3 : rc = spdk_nvme_ctrlr_cmd_get_log_page(nvme_ctrlr->ctrlr,
4921 : SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
4922 : SPDK_NVME_GLOBAL_NS_TAG,
4923 3 : nvme_ctrlr->ana_log_page,
4924 : ana_log_page_size, 0,
4925 : nvme_ctrlr_read_ana_log_page_done,
4926 : nvme_ctrlr);
4927 3 : if (rc != 0) {
4928 0 : nvme_ctrlr_read_ana_log_page_done(nvme_ctrlr, NULL);
4929 : }
4930 :
4931 3 : return rc;
4932 : }
4933 :
4934 : static void
4935 0 : dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
4936 : {
4937 0 : }
4938 :
4939 : struct bdev_nvme_set_preferred_path_ctx {
4940 : struct spdk_bdev_desc *desc;
4941 : struct nvme_ns *nvme_ns;
4942 : bdev_nvme_set_preferred_path_cb cb_fn;
4943 : void *cb_arg;
4944 : };
4945 :
4946 : static void
4947 3 : bdev_nvme_set_preferred_path_done(struct spdk_io_channel_iter *i, int status)
4948 : {
4949 3 : struct bdev_nvme_set_preferred_path_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
4950 :
4951 3 : assert(ctx != NULL);
4952 3 : assert(ctx->desc != NULL);
4953 3 : assert(ctx->cb_fn != NULL);
4954 :
4955 3 : spdk_bdev_close(ctx->desc);
4956 :
4957 3 : ctx->cb_fn(ctx->cb_arg, status);
4958 :
4959 3 : free(ctx);
4960 3 : }
4961 :
4962 : static void
4963 2 : _bdev_nvme_set_preferred_path(struct spdk_io_channel_iter *i)
4964 : {
4965 2 : struct bdev_nvme_set_preferred_path_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
4966 2 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
4967 2 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
4968 : struct nvme_io_path *io_path, *prev;
4969 :
4970 2 : prev = NULL;
4971 3 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
4972 3 : if (io_path->nvme_ns == ctx->nvme_ns) {
4973 2 : break;
4974 : }
4975 1 : prev = io_path;
4976 : }
4977 :
4978 2 : if (io_path != NULL) {
4979 2 : if (prev != NULL) {
4980 1 : STAILQ_REMOVE_AFTER(&nbdev_ch->io_path_list, prev, stailq);
4981 1 : STAILQ_INSERT_HEAD(&nbdev_ch->io_path_list, io_path, stailq);
4982 : }
4983 :
4984 : /* We can set io_path to nbdev_ch->current_io_path directly here.
4985 : * However, it needs to be conditional. To simplify the code,
4986 : * just clear nbdev_ch->current_io_path and let find_io_path()
4987 : * fill it.
4988 : *
4989 : * Automatic failback may be disabled. Hence even if the io_path is
4990 : * already at the head, clear nbdev_ch->current_io_path.
4991 : */
4992 2 : bdev_nvme_clear_current_io_path(nbdev_ch);
4993 : }
4994 :
4995 2 : spdk_for_each_channel_continue(i, 0);
4996 2 : }
4997 :
4998 : static struct nvme_ns *
4999 3 : bdev_nvme_set_preferred_ns(struct nvme_bdev *nbdev, uint16_t cntlid)
5000 : {
5001 : struct nvme_ns *nvme_ns, *prev;
5002 : const struct spdk_nvme_ctrlr_data *cdata;
5003 :
5004 3 : prev = NULL;
5005 6 : TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
5006 6 : cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
5007 :
5008 6 : if (cdata->cntlid == cntlid) {
5009 3 : break;
5010 : }
5011 3 : prev = nvme_ns;
5012 : }
5013 :
5014 3 : if (nvme_ns != NULL && prev != NULL) {
5015 2 : TAILQ_REMOVE(&nbdev->nvme_ns_list, nvme_ns, tailq);
5016 2 : TAILQ_INSERT_HEAD(&nbdev->nvme_ns_list, nvme_ns, tailq);
5017 : }
5018 :
5019 3 : return nvme_ns;
5020 : }
5021 :
5022 : /* This function supports only multipath mode. There is only a single I/O path
5023 : * for each NVMe-oF controller. Hence, just move the matched I/O path to the
5024 : * head of the I/O path list for each NVMe bdev channel.
5025 : *
5026 : * NVMe bdev channel may be acquired after completing this function. move the
5027 : * matched namespace to the head of the namespace list for the NVMe bdev too.
5028 : */
5029 : void
5030 3 : bdev_nvme_set_preferred_path(const char *name, uint16_t cntlid,
5031 : bdev_nvme_set_preferred_path_cb cb_fn, void *cb_arg)
5032 : {
5033 : struct bdev_nvme_set_preferred_path_ctx *ctx;
5034 : struct spdk_bdev *bdev;
5035 : struct nvme_bdev *nbdev;
5036 3 : int rc = 0;
5037 :
5038 3 : assert(cb_fn != NULL);
5039 :
5040 3 : ctx = calloc(1, sizeof(*ctx));
5041 3 : if (ctx == NULL) {
5042 0 : SPDK_ERRLOG("Failed to alloc context.\n");
5043 0 : rc = -ENOMEM;
5044 0 : goto err_alloc;
5045 : }
5046 :
5047 3 : ctx->cb_fn = cb_fn;
5048 3 : ctx->cb_arg = cb_arg;
5049 :
5050 3 : rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &ctx->desc);
5051 3 : if (rc != 0) {
5052 0 : SPDK_ERRLOG("Failed to open bdev %s.\n", name);
5053 0 : goto err_open;
5054 : }
5055 :
5056 3 : bdev = spdk_bdev_desc_get_bdev(ctx->desc);
5057 :
5058 3 : if (bdev->module != &nvme_if) {
5059 0 : SPDK_ERRLOG("bdev %s is not registered in this module.\n", name);
5060 0 : rc = -ENODEV;
5061 0 : goto err_bdev;
5062 : }
5063 :
5064 3 : nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
5065 :
5066 3 : pthread_mutex_lock(&nbdev->mutex);
5067 :
5068 3 : ctx->nvme_ns = bdev_nvme_set_preferred_ns(nbdev, cntlid);
5069 3 : if (ctx->nvme_ns == NULL) {
5070 0 : pthread_mutex_unlock(&nbdev->mutex);
5071 :
5072 0 : SPDK_ERRLOG("bdev %s does not have namespace to controller %u.\n", name, cntlid);
5073 0 : rc = -ENODEV;
5074 0 : goto err_bdev;
5075 : }
5076 :
5077 3 : pthread_mutex_unlock(&nbdev->mutex);
5078 :
5079 3 : spdk_for_each_channel(nbdev,
5080 : _bdev_nvme_set_preferred_path,
5081 : ctx,
5082 : bdev_nvme_set_preferred_path_done);
5083 3 : return;
5084 :
5085 0 : err_bdev:
5086 0 : spdk_bdev_close(ctx->desc);
5087 0 : err_open:
5088 0 : free(ctx);
5089 0 : err_alloc:
5090 0 : cb_fn(cb_arg, rc);
5091 : }
5092 :
5093 : struct bdev_nvme_set_multipath_policy_ctx {
5094 : struct spdk_bdev_desc *desc;
5095 : bdev_nvme_set_multipath_policy_cb cb_fn;
5096 : void *cb_arg;
5097 : };
5098 :
5099 : static void
5100 3 : bdev_nvme_set_multipath_policy_done(struct spdk_io_channel_iter *i, int status)
5101 : {
5102 3 : struct bdev_nvme_set_multipath_policy_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
5103 :
5104 3 : assert(ctx != NULL);
5105 3 : assert(ctx->desc != NULL);
5106 3 : assert(ctx->cb_fn != NULL);
5107 :
5108 3 : spdk_bdev_close(ctx->desc);
5109 :
5110 3 : ctx->cb_fn(ctx->cb_arg, status);
5111 :
5112 3 : free(ctx);
5113 3 : }
5114 :
5115 : static void
5116 1 : _bdev_nvme_set_multipath_policy(struct spdk_io_channel_iter *i)
5117 : {
5118 1 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
5119 1 : struct nvme_bdev_channel *nbdev_ch = spdk_io_channel_get_ctx(_ch);
5120 1 : struct nvme_bdev *nbdev = spdk_io_channel_get_io_device(_ch);
5121 :
5122 1 : nbdev_ch->mp_policy = nbdev->mp_policy;
5123 1 : nbdev_ch->mp_selector = nbdev->mp_selector;
5124 1 : nbdev_ch->rr_min_io = nbdev->rr_min_io;
5125 1 : bdev_nvme_clear_current_io_path(nbdev_ch);
5126 :
5127 1 : spdk_for_each_channel_continue(i, 0);
5128 1 : }
5129 :
5130 : void
5131 3 : bdev_nvme_set_multipath_policy(const char *name, enum bdev_nvme_multipath_policy policy,
5132 : enum bdev_nvme_multipath_selector selector, uint32_t rr_min_io,
5133 : bdev_nvme_set_multipath_policy_cb cb_fn, void *cb_arg)
5134 : {
5135 : struct bdev_nvme_set_multipath_policy_ctx *ctx;
5136 : struct spdk_bdev *bdev;
5137 : struct nvme_bdev *nbdev;
5138 : int rc;
5139 :
5140 3 : assert(cb_fn != NULL);
5141 :
5142 3 : switch (policy) {
5143 1 : case BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE:
5144 1 : break;
5145 2 : case BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE:
5146 : switch (selector) {
5147 1 : case BDEV_NVME_MP_SELECTOR_ROUND_ROBIN:
5148 1 : if (rr_min_io == UINT32_MAX) {
5149 0 : rr_min_io = 1;
5150 1 : } else if (rr_min_io == 0) {
5151 0 : rc = -EINVAL;
5152 0 : goto exit;
5153 : }
5154 1 : break;
5155 1 : case BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH:
5156 1 : break;
5157 0 : default:
5158 0 : rc = -EINVAL;
5159 0 : goto exit;
5160 : }
5161 2 : break;
5162 0 : default:
5163 0 : rc = -EINVAL;
5164 0 : goto exit;
5165 : }
5166 :
5167 3 : ctx = calloc(1, sizeof(*ctx));
5168 3 : if (ctx == NULL) {
5169 0 : SPDK_ERRLOG("Failed to alloc context.\n");
5170 0 : rc = -ENOMEM;
5171 0 : goto exit;
5172 : }
5173 :
5174 3 : ctx->cb_fn = cb_fn;
5175 3 : ctx->cb_arg = cb_arg;
5176 :
5177 3 : rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &ctx->desc);
5178 3 : if (rc != 0) {
5179 0 : SPDK_ERRLOG("Failed to open bdev %s.\n", name);
5180 0 : rc = -ENODEV;
5181 0 : goto err_open;
5182 : }
5183 :
5184 3 : bdev = spdk_bdev_desc_get_bdev(ctx->desc);
5185 3 : if (bdev->module != &nvme_if) {
5186 0 : SPDK_ERRLOG("bdev %s is not registered in this module.\n", name);
5187 0 : rc = -ENODEV;
5188 0 : goto err_module;
5189 : }
5190 3 : nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
5191 :
5192 3 : pthread_mutex_lock(&nbdev->mutex);
5193 3 : nbdev->mp_policy = policy;
5194 3 : nbdev->mp_selector = selector;
5195 3 : nbdev->rr_min_io = rr_min_io;
5196 3 : pthread_mutex_unlock(&nbdev->mutex);
5197 :
5198 3 : spdk_for_each_channel(nbdev,
5199 : _bdev_nvme_set_multipath_policy,
5200 : ctx,
5201 : bdev_nvme_set_multipath_policy_done);
5202 3 : return;
5203 :
5204 0 : err_module:
5205 0 : spdk_bdev_close(ctx->desc);
5206 0 : err_open:
5207 0 : free(ctx);
5208 0 : exit:
5209 0 : cb_fn(cb_arg, rc);
5210 : }
5211 :
5212 : static void
5213 3 : aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
5214 : {
5215 3 : struct nvme_ctrlr *nvme_ctrlr = arg;
5216 : union spdk_nvme_async_event_completion event;
5217 :
5218 3 : if (spdk_nvme_cpl_is_error(cpl)) {
5219 0 : SPDK_WARNLOG("AER request execute failed\n");
5220 0 : return;
5221 : }
5222 :
5223 3 : event.raw = cpl->cdw0;
5224 3 : if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
5225 3 : (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) {
5226 2 : nvme_ctrlr_populate_namespaces(nvme_ctrlr, NULL);
5227 1 : } else if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
5228 1 : (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_ANA_CHANGE)) {
5229 1 : nvme_ctrlr_read_ana_log_page(nvme_ctrlr);
5230 : }
5231 : }
5232 :
5233 : static void
5234 51 : free_nvme_async_probe_ctx(struct nvme_async_probe_ctx *ctx)
5235 : {
5236 51 : spdk_keyring_put_key(ctx->drv_opts.tls_psk);
5237 51 : spdk_keyring_put_key(ctx->drv_opts.dhchap_key);
5238 51 : spdk_keyring_put_key(ctx->drv_opts.dhchap_ctrlr_key);
5239 51 : free(ctx);
5240 51 : }
5241 :
5242 : static void
5243 51 : populate_namespaces_cb(struct nvme_async_probe_ctx *ctx, int rc)
5244 : {
5245 51 : if (ctx->cb_fn) {
5246 51 : ctx->cb_fn(ctx->cb_ctx, ctx->reported_bdevs, rc);
5247 : }
5248 :
5249 51 : ctx->namespaces_populated = true;
5250 51 : if (ctx->probe_done) {
5251 : /* The probe was already completed, so we need to free the context
5252 : * here. This can happen for cases like OCSSD, where we need to
5253 : * send additional commands to the SSD after attach.
5254 : */
5255 31 : free_nvme_async_probe_ctx(ctx);
5256 : }
5257 51 : }
5258 :
5259 : static void
5260 59 : nvme_ctrlr_create_done(struct nvme_ctrlr *nvme_ctrlr,
5261 : struct nvme_async_probe_ctx *ctx)
5262 : {
5263 59 : spdk_io_device_register(nvme_ctrlr,
5264 : bdev_nvme_create_ctrlr_channel_cb,
5265 : bdev_nvme_destroy_ctrlr_channel_cb,
5266 : sizeof(struct nvme_ctrlr_channel),
5267 59 : nvme_ctrlr->nbdev_ctrlr->name);
5268 :
5269 59 : nvme_ctrlr_populate_namespaces(nvme_ctrlr, ctx);
5270 59 : }
5271 :
5272 : static void
5273 30 : nvme_ctrlr_init_ana_log_page_done(void *_ctx, const struct spdk_nvme_cpl *cpl)
5274 : {
5275 30 : struct nvme_ctrlr *nvme_ctrlr = _ctx;
5276 30 : struct nvme_async_probe_ctx *ctx = nvme_ctrlr->probe_ctx;
5277 :
5278 30 : nvme_ctrlr->probe_ctx = NULL;
5279 :
5280 30 : if (spdk_nvme_cpl_is_error(cpl)) {
5281 0 : nvme_ctrlr_delete(nvme_ctrlr);
5282 :
5283 0 : if (ctx != NULL) {
5284 0 : ctx->reported_bdevs = 0;
5285 0 : populate_namespaces_cb(ctx, -1);
5286 : }
5287 0 : return;
5288 : }
5289 :
5290 30 : nvme_ctrlr_create_done(nvme_ctrlr, ctx);
5291 : }
5292 :
5293 : static int
5294 30 : nvme_ctrlr_init_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
5295 : struct nvme_async_probe_ctx *ctx)
5296 : {
5297 30 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
5298 : const struct spdk_nvme_ctrlr_data *cdata;
5299 : uint32_t ana_log_page_size;
5300 :
5301 30 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
5302 :
5303 : /* Set buffer size enough to include maximum number of allowed namespaces. */
5304 30 : ana_log_page_size = sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid *
5305 30 : sizeof(struct spdk_nvme_ana_group_descriptor) + cdata->mnan *
5306 : sizeof(uint32_t);
5307 :
5308 30 : nvme_ctrlr->ana_log_page = spdk_zmalloc(ana_log_page_size, 64, NULL,
5309 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
5310 30 : if (nvme_ctrlr->ana_log_page == NULL) {
5311 0 : SPDK_ERRLOG("could not allocate ANA log page buffer\n");
5312 0 : return -ENXIO;
5313 : }
5314 :
5315 : /* Each descriptor in a ANA log page is not ensured to be 8-bytes aligned.
5316 : * Hence copy each descriptor to a temporary area when parsing it.
5317 : *
5318 : * Allocate a buffer whose size is as large as ANA log page buffer because
5319 : * we do not know the size of a descriptor until actually reading it.
5320 : */
5321 30 : nvme_ctrlr->copied_ana_desc = calloc(1, ana_log_page_size);
5322 30 : if (nvme_ctrlr->copied_ana_desc == NULL) {
5323 0 : SPDK_ERRLOG("could not allocate a buffer to parse ANA descriptor\n");
5324 0 : return -ENOMEM;
5325 : }
5326 :
5327 30 : nvme_ctrlr->max_ana_log_page_size = ana_log_page_size;
5328 :
5329 30 : nvme_ctrlr->probe_ctx = ctx;
5330 :
5331 : /* Then, set the read size only to include the current active namespaces. */
5332 30 : ana_log_page_size = nvme_ctrlr_get_ana_log_page_size(nvme_ctrlr);
5333 :
5334 30 : if (ana_log_page_size > nvme_ctrlr->max_ana_log_page_size) {
5335 0 : SPDK_ERRLOG("ANA log page size %" PRIu32 " is larger than allowed %" PRIu32 "\n",
5336 : ana_log_page_size, nvme_ctrlr->max_ana_log_page_size);
5337 0 : return -EINVAL;
5338 : }
5339 :
5340 30 : return spdk_nvme_ctrlr_cmd_get_log_page(ctrlr,
5341 : SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
5342 : SPDK_NVME_GLOBAL_NS_TAG,
5343 30 : nvme_ctrlr->ana_log_page,
5344 : ana_log_page_size, 0,
5345 : nvme_ctrlr_init_ana_log_page_done,
5346 : nvme_ctrlr);
5347 : }
5348 :
5349 : /* hostnqn and subnqn were already verified before attaching a controller.
5350 : * Hence check only the multipath capability and cntlid here.
5351 : */
5352 : static bool
5353 16 : bdev_nvme_check_multipath(struct nvme_bdev_ctrlr *nbdev_ctrlr, struct spdk_nvme_ctrlr *ctrlr)
5354 : {
5355 : struct nvme_ctrlr *tmp;
5356 : const struct spdk_nvme_ctrlr_data *cdata, *tmp_cdata;
5357 :
5358 16 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
5359 :
5360 16 : if (!cdata->cmic.multi_ctrlr) {
5361 0 : SPDK_ERRLOG("Ctrlr%u does not support multipath.\n", cdata->cntlid);
5362 0 : return false;
5363 : }
5364 :
5365 33 : TAILQ_FOREACH(tmp, &nbdev_ctrlr->ctrlrs, tailq) {
5366 18 : tmp_cdata = spdk_nvme_ctrlr_get_data(tmp->ctrlr);
5367 :
5368 18 : if (!tmp_cdata->cmic.multi_ctrlr) {
5369 0 : SPDK_ERRLOG("Ctrlr%u does not support multipath.\n", cdata->cntlid);
5370 0 : return false;
5371 : }
5372 18 : if (cdata->cntlid == tmp_cdata->cntlid) {
5373 1 : SPDK_ERRLOG("cntlid %u are duplicated.\n", tmp_cdata->cntlid);
5374 1 : return false;
5375 : }
5376 : }
5377 :
5378 15 : return true;
5379 : }
5380 :
5381 : static int
5382 60 : nvme_bdev_ctrlr_create(const char *name, struct nvme_ctrlr *nvme_ctrlr)
5383 : {
5384 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
5385 60 : struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
5386 60 : int rc = 0;
5387 :
5388 60 : pthread_mutex_lock(&g_bdev_nvme_mutex);
5389 :
5390 60 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
5391 60 : if (nbdev_ctrlr != NULL) {
5392 16 : if (!bdev_nvme_check_multipath(nbdev_ctrlr, ctrlr)) {
5393 1 : rc = -EINVAL;
5394 1 : goto exit;
5395 : }
5396 : } else {
5397 44 : nbdev_ctrlr = calloc(1, sizeof(*nbdev_ctrlr));
5398 44 : if (nbdev_ctrlr == NULL) {
5399 0 : SPDK_ERRLOG("Failed to allocate nvme_bdev_ctrlr.\n");
5400 0 : rc = -ENOMEM;
5401 0 : goto exit;
5402 : }
5403 44 : nbdev_ctrlr->name = strdup(name);
5404 44 : if (nbdev_ctrlr->name == NULL) {
5405 0 : SPDK_ERRLOG("Failed to allocate name of nvme_bdev_ctrlr.\n");
5406 0 : free(nbdev_ctrlr);
5407 0 : goto exit;
5408 : }
5409 44 : TAILQ_INIT(&nbdev_ctrlr->ctrlrs);
5410 44 : TAILQ_INIT(&nbdev_ctrlr->bdevs);
5411 44 : TAILQ_INSERT_TAIL(&g_nvme_bdev_ctrlrs, nbdev_ctrlr, tailq);
5412 : }
5413 59 : nvme_ctrlr->nbdev_ctrlr = nbdev_ctrlr;
5414 59 : TAILQ_INSERT_TAIL(&nbdev_ctrlr->ctrlrs, nvme_ctrlr, tailq);
5415 60 : exit:
5416 60 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
5417 60 : return rc;
5418 : }
5419 :
5420 : static int
5421 60 : nvme_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr,
5422 : const char *name,
5423 : const struct spdk_nvme_transport_id *trid,
5424 : struct nvme_async_probe_ctx *ctx)
5425 : {
5426 : struct nvme_ctrlr *nvme_ctrlr;
5427 : struct nvme_path_id *path_id;
5428 : const struct spdk_nvme_ctrlr_data *cdata;
5429 : int rc;
5430 :
5431 60 : nvme_ctrlr = calloc(1, sizeof(*nvme_ctrlr));
5432 60 : if (nvme_ctrlr == NULL) {
5433 0 : SPDK_ERRLOG("Failed to allocate device struct\n");
5434 0 : return -ENOMEM;
5435 : }
5436 :
5437 60 : rc = pthread_mutex_init(&nvme_ctrlr->mutex, NULL);
5438 60 : if (rc != 0) {
5439 0 : free(nvme_ctrlr);
5440 0 : return rc;
5441 : }
5442 :
5443 60 : TAILQ_INIT(&nvme_ctrlr->trids);
5444 60 : RB_INIT(&nvme_ctrlr->namespaces);
5445 :
5446 : /* Get another reference to the key, so the first one can be released from probe_ctx */
5447 60 : if (ctx != NULL) {
5448 46 : if (ctx->drv_opts.tls_psk != NULL) {
5449 0 : nvme_ctrlr->psk = spdk_keyring_get_key(
5450 : spdk_key_get_name(ctx->drv_opts.tls_psk));
5451 0 : if (nvme_ctrlr->psk == NULL) {
5452 : /* Could only happen if the key was removed in the meantime */
5453 0 : SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
5454 : spdk_key_get_name(ctx->drv_opts.tls_psk));
5455 0 : rc = -ENOKEY;
5456 0 : goto err;
5457 : }
5458 : }
5459 :
5460 46 : if (ctx->drv_opts.dhchap_key != NULL) {
5461 0 : nvme_ctrlr->dhchap_key = spdk_keyring_get_key(
5462 : spdk_key_get_name(ctx->drv_opts.dhchap_key));
5463 0 : if (nvme_ctrlr->dhchap_key == NULL) {
5464 0 : SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
5465 : spdk_key_get_name(ctx->drv_opts.dhchap_key));
5466 0 : rc = -ENOKEY;
5467 0 : goto err;
5468 : }
5469 : }
5470 :
5471 46 : if (ctx->drv_opts.dhchap_ctrlr_key != NULL) {
5472 0 : nvme_ctrlr->dhchap_ctrlr_key =
5473 0 : spdk_keyring_get_key(
5474 : spdk_key_get_name(ctx->drv_opts.dhchap_ctrlr_key));
5475 0 : if (nvme_ctrlr->dhchap_ctrlr_key == NULL) {
5476 0 : SPDK_ERRLOG("Couldn't get a reference to the key '%s'\n",
5477 : spdk_key_get_name(ctx->drv_opts.dhchap_ctrlr_key));
5478 0 : rc = -ENOKEY;
5479 0 : goto err;
5480 : }
5481 : }
5482 : }
5483 :
5484 60 : path_id = calloc(1, sizeof(*path_id));
5485 60 : if (path_id == NULL) {
5486 0 : SPDK_ERRLOG("Failed to allocate trid entry pointer\n");
5487 0 : rc = -ENOMEM;
5488 0 : goto err;
5489 : }
5490 :
5491 60 : path_id->trid = *trid;
5492 60 : if (ctx != NULL) {
5493 46 : memcpy(path_id->hostid.hostaddr, ctx->drv_opts.src_addr, sizeof(path_id->hostid.hostaddr));
5494 46 : memcpy(path_id->hostid.hostsvcid, ctx->drv_opts.src_svcid, sizeof(path_id->hostid.hostsvcid));
5495 : }
5496 60 : nvme_ctrlr->active_path_id = path_id;
5497 60 : TAILQ_INSERT_HEAD(&nvme_ctrlr->trids, path_id, link);
5498 :
5499 60 : nvme_ctrlr->thread = spdk_get_thread();
5500 60 : nvme_ctrlr->ctrlr = ctrlr;
5501 60 : nvme_ctrlr->ref = 1;
5502 :
5503 60 : if (spdk_nvme_ctrlr_is_ocssd_supported(ctrlr)) {
5504 0 : SPDK_ERRLOG("OCSSDs are not supported");
5505 0 : rc = -ENOTSUP;
5506 0 : goto err;
5507 : }
5508 :
5509 60 : if (ctx != NULL) {
5510 46 : memcpy(&nvme_ctrlr->opts, &ctx->bdev_opts, sizeof(ctx->bdev_opts));
5511 : } else {
5512 14 : bdev_nvme_get_default_ctrlr_opts(&nvme_ctrlr->opts);
5513 : }
5514 :
5515 60 : nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, nvme_ctrlr,
5516 : g_opts.nvme_adminq_poll_period_us);
5517 :
5518 60 : if (g_opts.timeout_us > 0) {
5519 : /* Register timeout callback. Timeout values for IO vs. admin reqs can be different. */
5520 : /* If timeout_admin_us is 0 (not specified), admin uses same timeout as IO. */
5521 0 : uint64_t adm_timeout_us = (g_opts.timeout_admin_us == 0) ?
5522 0 : g_opts.timeout_us : g_opts.timeout_admin_us;
5523 0 : spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_opts.timeout_us,
5524 : adm_timeout_us, timeout_cb, nvme_ctrlr);
5525 : }
5526 :
5527 60 : spdk_nvme_ctrlr_register_aer_callback(ctrlr, aer_cb, nvme_ctrlr);
5528 60 : spdk_nvme_ctrlr_set_remove_cb(ctrlr, remove_cb, nvme_ctrlr);
5529 :
5530 60 : if (spdk_nvme_ctrlr_get_flags(ctrlr) &
5531 : SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) {
5532 0 : nvme_ctrlr->opal_dev = spdk_opal_dev_construct(ctrlr);
5533 : }
5534 :
5535 60 : rc = nvme_bdev_ctrlr_create(name, nvme_ctrlr);
5536 60 : if (rc != 0) {
5537 1 : goto err;
5538 : }
5539 :
5540 59 : cdata = spdk_nvme_ctrlr_get_data(ctrlr);
5541 :
5542 59 : if (cdata->cmic.ana_reporting) {
5543 30 : rc = nvme_ctrlr_init_ana_log_page(nvme_ctrlr, ctx);
5544 30 : if (rc == 0) {
5545 30 : return 0;
5546 : }
5547 : } else {
5548 29 : nvme_ctrlr_create_done(nvme_ctrlr, ctx);
5549 29 : return 0;
5550 : }
5551 :
5552 1 : err:
5553 1 : nvme_ctrlr_delete(nvme_ctrlr);
5554 1 : return rc;
5555 : }
5556 :
5557 : void
5558 56 : bdev_nvme_get_default_ctrlr_opts(struct nvme_ctrlr_opts *opts)
5559 : {
5560 56 : opts->prchk_flags = 0;
5561 56 : opts->ctrlr_loss_timeout_sec = g_opts.ctrlr_loss_timeout_sec;
5562 56 : opts->reconnect_delay_sec = g_opts.reconnect_delay_sec;
5563 56 : opts->fast_io_fail_timeout_sec = g_opts.fast_io_fail_timeout_sec;
5564 56 : }
5565 :
5566 : static void
5567 0 : attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
5568 : struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *drv_opts)
5569 : {
5570 : char *name;
5571 :
5572 0 : name = spdk_sprintf_alloc("HotInNvme%d", g_hot_insert_nvme_controller_index++);
5573 0 : if (!name) {
5574 0 : SPDK_ERRLOG("Failed to assign name to NVMe device\n");
5575 0 : return;
5576 : }
5577 :
5578 0 : if (nvme_ctrlr_create(ctrlr, name, trid, NULL) == 0) {
5579 0 : SPDK_DEBUGLOG(bdev_nvme, "Attached to %s (%s)\n", trid->traddr, name);
5580 : } else {
5581 0 : SPDK_ERRLOG("Failed to attach to %s (%s)\n", trid->traddr, name);
5582 : }
5583 :
5584 0 : free(name);
5585 : }
5586 :
5587 : static void
5588 59 : _nvme_ctrlr_destruct(void *ctx)
5589 : {
5590 59 : struct nvme_ctrlr *nvme_ctrlr = ctx;
5591 :
5592 59 : nvme_ctrlr_depopulate_namespaces(nvme_ctrlr);
5593 59 : nvme_ctrlr_release(nvme_ctrlr);
5594 59 : }
5595 :
5596 : static int
5597 56 : bdev_nvme_delete_ctrlr_unsafe(struct nvme_ctrlr *nvme_ctrlr, bool hotplug)
5598 : {
5599 : struct nvme_probe_skip_entry *entry;
5600 :
5601 : /* The controller's destruction was already started */
5602 56 : if (nvme_ctrlr->destruct) {
5603 0 : return -EALREADY;
5604 : }
5605 :
5606 56 : if (!hotplug &&
5607 56 : nvme_ctrlr->active_path_id->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
5608 0 : entry = calloc(1, sizeof(*entry));
5609 0 : if (!entry) {
5610 0 : return -ENOMEM;
5611 : }
5612 0 : entry->trid = nvme_ctrlr->active_path_id->trid;
5613 0 : TAILQ_INSERT_TAIL(&g_skipped_nvme_ctrlrs, entry, tailq);
5614 : }
5615 :
5616 56 : nvme_ctrlr->destruct = true;
5617 56 : return 0;
5618 : }
5619 :
5620 : static int
5621 2 : bdev_nvme_delete_ctrlr(struct nvme_ctrlr *nvme_ctrlr, bool hotplug)
5622 : {
5623 : int rc;
5624 :
5625 2 : pthread_mutex_lock(&nvme_ctrlr->mutex);
5626 2 : rc = bdev_nvme_delete_ctrlr_unsafe(nvme_ctrlr, hotplug);
5627 2 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
5628 :
5629 2 : if (rc == 0) {
5630 2 : _nvme_ctrlr_destruct(nvme_ctrlr);
5631 0 : } else if (rc == -EALREADY) {
5632 0 : rc = 0;
5633 : }
5634 :
5635 2 : return rc;
5636 : }
5637 :
5638 : static void
5639 0 : remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr)
5640 : {
5641 0 : struct nvme_ctrlr *nvme_ctrlr = cb_ctx;
5642 :
5643 0 : bdev_nvme_delete_ctrlr(nvme_ctrlr, true);
5644 0 : }
5645 :
5646 : static int
5647 0 : bdev_nvme_hotplug_probe(void *arg)
5648 : {
5649 0 : if (g_hotplug_probe_ctx == NULL) {
5650 0 : spdk_poller_unregister(&g_hotplug_probe_poller);
5651 0 : return SPDK_POLLER_IDLE;
5652 : }
5653 :
5654 0 : if (spdk_nvme_probe_poll_async(g_hotplug_probe_ctx) != -EAGAIN) {
5655 0 : g_hotplug_probe_ctx = NULL;
5656 0 : spdk_poller_unregister(&g_hotplug_probe_poller);
5657 : }
5658 :
5659 0 : return SPDK_POLLER_BUSY;
5660 : }
5661 :
5662 : static int
5663 0 : bdev_nvme_hotplug(void *arg)
5664 : {
5665 0 : struct spdk_nvme_transport_id trid_pcie;
5666 :
5667 0 : if (g_hotplug_probe_ctx) {
5668 0 : return SPDK_POLLER_BUSY;
5669 : }
5670 :
5671 0 : memset(&trid_pcie, 0, sizeof(trid_pcie));
5672 0 : spdk_nvme_trid_populate_transport(&trid_pcie, SPDK_NVME_TRANSPORT_PCIE);
5673 :
5674 0 : g_hotplug_probe_ctx = spdk_nvme_probe_async(&trid_pcie, NULL,
5675 : hotplug_probe_cb, attach_cb, NULL);
5676 :
5677 0 : if (g_hotplug_probe_ctx) {
5678 0 : assert(g_hotplug_probe_poller == NULL);
5679 0 : g_hotplug_probe_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug_probe, NULL, 1000);
5680 : }
5681 :
5682 0 : return SPDK_POLLER_BUSY;
5683 : }
5684 :
5685 : void
5686 0 : bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts)
5687 : {
5688 0 : *opts = g_opts;
5689 0 : }
5690 :
5691 : static bool bdev_nvme_check_io_error_resiliency_params(int32_t ctrlr_loss_timeout_sec,
5692 : uint32_t reconnect_delay_sec,
5693 : uint32_t fast_io_fail_timeout_sec);
5694 :
5695 : static int
5696 0 : bdev_nvme_validate_opts(const struct spdk_bdev_nvme_opts *opts)
5697 : {
5698 0 : if ((opts->timeout_us == 0) && (opts->timeout_admin_us != 0)) {
5699 : /* Can't set timeout_admin_us without also setting timeout_us */
5700 0 : SPDK_WARNLOG("Invalid options: Can't have (timeout_us == 0) with (timeout_admin_us > 0)\n");
5701 0 : return -EINVAL;
5702 : }
5703 :
5704 0 : if (opts->bdev_retry_count < -1) {
5705 0 : SPDK_WARNLOG("Invalid option: bdev_retry_count can't be less than -1.\n");
5706 0 : return -EINVAL;
5707 : }
5708 :
5709 0 : if (!bdev_nvme_check_io_error_resiliency_params(opts->ctrlr_loss_timeout_sec,
5710 : opts->reconnect_delay_sec,
5711 : opts->fast_io_fail_timeout_sec)) {
5712 0 : return -EINVAL;
5713 : }
5714 :
5715 0 : return 0;
5716 : }
5717 :
5718 : int
5719 0 : bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts)
5720 : {
5721 : int ret;
5722 :
5723 0 : ret = bdev_nvme_validate_opts(opts);
5724 0 : if (ret) {
5725 0 : SPDK_WARNLOG("Failed to set nvme opts.\n");
5726 0 : return ret;
5727 : }
5728 :
5729 0 : if (g_bdev_nvme_init_thread != NULL) {
5730 0 : if (!TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
5731 0 : return -EPERM;
5732 : }
5733 : }
5734 :
5735 0 : if (opts->rdma_srq_size != 0 ||
5736 0 : opts->rdma_max_cq_size != 0 ||
5737 0 : opts->rdma_cm_event_timeout_ms != 0) {
5738 0 : struct spdk_nvme_transport_opts drv_opts;
5739 :
5740 0 : spdk_nvme_transport_get_opts(&drv_opts, sizeof(drv_opts));
5741 0 : if (opts->rdma_srq_size != 0) {
5742 0 : drv_opts.rdma_srq_size = opts->rdma_srq_size;
5743 : }
5744 0 : if (opts->rdma_max_cq_size != 0) {
5745 0 : drv_opts.rdma_max_cq_size = opts->rdma_max_cq_size;
5746 : }
5747 0 : if (opts->rdma_cm_event_timeout_ms != 0) {
5748 0 : drv_opts.rdma_cm_event_timeout_ms = opts->rdma_cm_event_timeout_ms;
5749 : }
5750 :
5751 0 : ret = spdk_nvme_transport_set_opts(&drv_opts, sizeof(drv_opts));
5752 0 : if (ret) {
5753 0 : SPDK_ERRLOG("Failed to set NVMe transport opts.\n");
5754 0 : return ret;
5755 : }
5756 : }
5757 :
5758 0 : g_opts = *opts;
5759 :
5760 0 : return 0;
5761 : }
5762 :
5763 : struct set_nvme_hotplug_ctx {
5764 : uint64_t period_us;
5765 : bool enabled;
5766 : spdk_msg_fn fn;
5767 : void *fn_ctx;
5768 : };
5769 :
5770 : static void
5771 0 : set_nvme_hotplug_period_cb(void *_ctx)
5772 : {
5773 0 : struct set_nvme_hotplug_ctx *ctx = _ctx;
5774 :
5775 0 : spdk_poller_unregister(&g_hotplug_poller);
5776 0 : if (ctx->enabled) {
5777 0 : g_hotplug_poller = SPDK_POLLER_REGISTER(bdev_nvme_hotplug, NULL, ctx->period_us);
5778 : }
5779 :
5780 0 : g_nvme_hotplug_poll_period_us = ctx->period_us;
5781 0 : g_nvme_hotplug_enabled = ctx->enabled;
5782 0 : if (ctx->fn) {
5783 0 : ctx->fn(ctx->fn_ctx);
5784 : }
5785 :
5786 0 : free(ctx);
5787 0 : }
5788 :
5789 : int
5790 0 : bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx)
5791 : {
5792 : struct set_nvme_hotplug_ctx *ctx;
5793 :
5794 0 : if (enabled == true && !spdk_process_is_primary()) {
5795 0 : return -EPERM;
5796 : }
5797 :
5798 0 : ctx = calloc(1, sizeof(*ctx));
5799 0 : if (ctx == NULL) {
5800 0 : return -ENOMEM;
5801 : }
5802 :
5803 0 : period_us = period_us == 0 ? NVME_HOTPLUG_POLL_PERIOD_DEFAULT : period_us;
5804 0 : ctx->period_us = spdk_min(period_us, NVME_HOTPLUG_POLL_PERIOD_MAX);
5805 0 : ctx->enabled = enabled;
5806 0 : ctx->fn = cb;
5807 0 : ctx->fn_ctx = cb_ctx;
5808 :
5809 0 : spdk_thread_send_msg(g_bdev_nvme_init_thread, set_nvme_hotplug_period_cb, ctx);
5810 0 : return 0;
5811 : }
5812 :
5813 : static void
5814 45 : nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
5815 : struct nvme_async_probe_ctx *ctx)
5816 : {
5817 : struct nvme_ns *nvme_ns;
5818 : struct nvme_bdev *nvme_bdev;
5819 : size_t j;
5820 :
5821 45 : assert(nvme_ctrlr != NULL);
5822 :
5823 45 : if (ctx->names == NULL) {
5824 0 : ctx->reported_bdevs = 0;
5825 0 : populate_namespaces_cb(ctx, 0);
5826 0 : return;
5827 : }
5828 :
5829 : /*
5830 : * Report the new bdevs that were created in this call.
5831 : * There can be more than one bdev per NVMe controller.
5832 : */
5833 45 : j = 0;
5834 45 : nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
5835 92 : while (nvme_ns != NULL) {
5836 47 : nvme_bdev = nvme_ns->bdev;
5837 47 : if (j < ctx->max_bdevs) {
5838 47 : ctx->names[j] = nvme_bdev->disk.name;
5839 47 : j++;
5840 : } else {
5841 0 : SPDK_ERRLOG("Maximum number of namespaces supported per NVMe controller is %du. Unable to return all names of created bdevs\n",
5842 : ctx->max_bdevs);
5843 0 : ctx->reported_bdevs = 0;
5844 0 : populate_namespaces_cb(ctx, -ERANGE);
5845 0 : return;
5846 : }
5847 :
5848 47 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
5849 : }
5850 :
5851 45 : ctx->reported_bdevs = j;
5852 45 : populate_namespaces_cb(ctx, 0);
5853 : }
5854 :
5855 : static int
5856 9 : bdev_nvme_check_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
5857 : struct spdk_nvme_ctrlr *new_ctrlr,
5858 : struct spdk_nvme_transport_id *trid)
5859 : {
5860 : struct nvme_path_id *tmp_trid;
5861 :
5862 9 : if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
5863 0 : SPDK_ERRLOG("PCIe failover is not supported.\n");
5864 0 : return -ENOTSUP;
5865 : }
5866 :
5867 : /* Currently we only support failover to the same transport type. */
5868 9 : if (nvme_ctrlr->active_path_id->trid.trtype != trid->trtype) {
5869 0 : SPDK_WARNLOG("Failover from trtype: %s to a different trtype: %s is not supported currently\n",
5870 : spdk_nvme_transport_id_trtype_str(nvme_ctrlr->active_path_id->trid.trtype),
5871 : spdk_nvme_transport_id_trtype_str(trid->trtype));
5872 0 : return -EINVAL;
5873 : }
5874 :
5875 :
5876 : /* Currently we only support failover to the same NQN. */
5877 9 : if (strncmp(trid->subnqn, nvme_ctrlr->active_path_id->trid.subnqn, SPDK_NVMF_NQN_MAX_LEN)) {
5878 0 : SPDK_WARNLOG("Failover from subnqn: %s to a different subnqn: %s is not supported currently\n",
5879 : nvme_ctrlr->active_path_id->trid.subnqn, trid->subnqn);
5880 0 : return -EINVAL;
5881 : }
5882 :
5883 : /* Skip all the other checks if we've already registered this path. */
5884 21 : TAILQ_FOREACH(tmp_trid, &nvme_ctrlr->trids, link) {
5885 12 : if (!spdk_nvme_transport_id_compare(&tmp_trid->trid, trid)) {
5886 0 : SPDK_WARNLOG("This path (traddr: %s subnqn: %s) is already registered\n", trid->traddr,
5887 : trid->subnqn);
5888 0 : return -EALREADY;
5889 : }
5890 : }
5891 :
5892 9 : return 0;
5893 : }
5894 :
5895 : static int
5896 9 : bdev_nvme_check_secondary_namespace(struct nvme_ctrlr *nvme_ctrlr,
5897 : struct spdk_nvme_ctrlr *new_ctrlr)
5898 : {
5899 : struct nvme_ns *nvme_ns;
5900 : struct spdk_nvme_ns *new_ns;
5901 :
5902 9 : nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
5903 9 : while (nvme_ns != NULL) {
5904 0 : new_ns = spdk_nvme_ctrlr_get_ns(new_ctrlr, nvme_ns->id);
5905 0 : assert(new_ns != NULL);
5906 :
5907 0 : if (!bdev_nvme_compare_ns(nvme_ns->ns, new_ns)) {
5908 0 : return -EINVAL;
5909 : }
5910 :
5911 0 : nvme_ns = nvme_ctrlr_get_next_active_ns(nvme_ctrlr, nvme_ns);
5912 : }
5913 :
5914 9 : return 0;
5915 : }
5916 :
5917 : static int
5918 9 : _bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
5919 : struct spdk_nvme_transport_id *trid)
5920 : {
5921 : struct nvme_path_id *active_id, *new_trid, *tmp_trid;
5922 :
5923 9 : new_trid = calloc(1, sizeof(*new_trid));
5924 9 : if (new_trid == NULL) {
5925 0 : return -ENOMEM;
5926 : }
5927 9 : new_trid->trid = *trid;
5928 :
5929 9 : active_id = nvme_ctrlr->active_path_id;
5930 9 : assert(active_id != NULL);
5931 9 : assert(active_id == TAILQ_FIRST(&nvme_ctrlr->trids));
5932 :
5933 : /* Skip the active trid not to replace it until it is failed. */
5934 9 : tmp_trid = TAILQ_NEXT(active_id, link);
5935 9 : if (tmp_trid == NULL) {
5936 6 : goto add_tail;
5937 : }
5938 :
5939 : /* It means the trid is faled if its last failed time is non-zero.
5940 : * Insert the new alternate trid before any failed trid.
5941 : */
5942 5 : TAILQ_FOREACH_FROM(tmp_trid, &nvme_ctrlr->trids, link) {
5943 3 : if (tmp_trid->last_failed_tsc != 0) {
5944 1 : TAILQ_INSERT_BEFORE(tmp_trid, new_trid, link);
5945 1 : return 0;
5946 : }
5947 : }
5948 :
5949 2 : add_tail:
5950 8 : TAILQ_INSERT_TAIL(&nvme_ctrlr->trids, new_trid, link);
5951 8 : return 0;
5952 : }
5953 :
5954 : /* This is the case that a secondary path is added to an existing
5955 : * nvme_ctrlr for failover. After checking if it can access the same
5956 : * namespaces as the primary path, it is disconnected until failover occurs.
5957 : */
5958 : static int
5959 9 : bdev_nvme_add_secondary_trid(struct nvme_ctrlr *nvme_ctrlr,
5960 : struct spdk_nvme_ctrlr *new_ctrlr,
5961 : struct spdk_nvme_transport_id *trid)
5962 : {
5963 : int rc;
5964 :
5965 9 : assert(nvme_ctrlr != NULL);
5966 :
5967 9 : pthread_mutex_lock(&nvme_ctrlr->mutex);
5968 :
5969 9 : rc = bdev_nvme_check_secondary_trid(nvme_ctrlr, new_ctrlr, trid);
5970 9 : if (rc != 0) {
5971 0 : goto exit;
5972 : }
5973 :
5974 9 : rc = bdev_nvme_check_secondary_namespace(nvme_ctrlr, new_ctrlr);
5975 9 : if (rc != 0) {
5976 0 : goto exit;
5977 : }
5978 :
5979 9 : rc = _bdev_nvme_add_secondary_trid(nvme_ctrlr, trid);
5980 :
5981 9 : exit:
5982 9 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
5983 :
5984 9 : spdk_nvme_detach(new_ctrlr);
5985 :
5986 9 : return rc;
5987 : }
5988 :
5989 : static void
5990 46 : connect_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
5991 : struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
5992 : {
5993 46 : struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
5994 : struct nvme_async_probe_ctx *ctx;
5995 : int rc;
5996 :
5997 46 : ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, drv_opts);
5998 46 : ctx->ctrlr_attached = true;
5999 :
6000 46 : rc = nvme_ctrlr_create(ctrlr, ctx->base_name, &ctx->trid, ctx);
6001 46 : if (rc != 0) {
6002 1 : ctx->reported_bdevs = 0;
6003 1 : populate_namespaces_cb(ctx, rc);
6004 : }
6005 46 : }
6006 :
6007 : static void
6008 4 : connect_set_failover_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
6009 : struct spdk_nvme_ctrlr *ctrlr,
6010 : const struct spdk_nvme_ctrlr_opts *opts)
6011 : {
6012 4 : struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
6013 : struct nvme_ctrlr *nvme_ctrlr;
6014 : struct nvme_async_probe_ctx *ctx;
6015 : int rc;
6016 :
6017 4 : ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, drv_opts);
6018 4 : ctx->ctrlr_attached = true;
6019 :
6020 4 : nvme_ctrlr = nvme_ctrlr_get_by_name(ctx->base_name);
6021 4 : if (nvme_ctrlr) {
6022 4 : rc = bdev_nvme_add_secondary_trid(nvme_ctrlr, ctrlr, &ctx->trid);
6023 : } else {
6024 0 : rc = -ENODEV;
6025 : }
6026 :
6027 4 : ctx->reported_bdevs = 0;
6028 4 : populate_namespaces_cb(ctx, rc);
6029 4 : }
6030 :
6031 : static int
6032 51 : bdev_nvme_async_poll(void *arg)
6033 : {
6034 51 : struct nvme_async_probe_ctx *ctx = arg;
6035 : int rc;
6036 :
6037 51 : rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
6038 51 : if (spdk_unlikely(rc != -EAGAIN)) {
6039 51 : ctx->probe_done = true;
6040 51 : spdk_poller_unregister(&ctx->poller);
6041 51 : if (!ctx->ctrlr_attached) {
6042 : /* The probe is done, but no controller was attached.
6043 : * That means we had a failure, so report -EIO back to
6044 : * the caller (usually the RPC). populate_namespaces_cb()
6045 : * will take care of freeing the nvme_async_probe_ctx.
6046 : */
6047 1 : ctx->reported_bdevs = 0;
6048 1 : populate_namespaces_cb(ctx, -EIO);
6049 50 : } else if (ctx->namespaces_populated) {
6050 : /* The namespaces for the attached controller were all
6051 : * populated and the response was already sent to the
6052 : * caller (usually the RPC). So free the context here.
6053 : */
6054 20 : free_nvme_async_probe_ctx(ctx);
6055 : }
6056 : }
6057 :
6058 51 : return SPDK_POLLER_BUSY;
6059 : }
6060 :
6061 : static bool
6062 28 : bdev_nvme_check_io_error_resiliency_params(int32_t ctrlr_loss_timeout_sec,
6063 : uint32_t reconnect_delay_sec,
6064 : uint32_t fast_io_fail_timeout_sec)
6065 : {
6066 28 : if (ctrlr_loss_timeout_sec < -1) {
6067 1 : SPDK_ERRLOG("ctrlr_loss_timeout_sec can't be less than -1.\n");
6068 1 : return false;
6069 27 : } else if (ctrlr_loss_timeout_sec == -1) {
6070 13 : if (reconnect_delay_sec == 0) {
6071 1 : SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
6072 1 : return false;
6073 12 : } else if (fast_io_fail_timeout_sec != 0 &&
6074 : fast_io_fail_timeout_sec < reconnect_delay_sec) {
6075 1 : SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io-fail_timeout_sec.\n");
6076 1 : return false;
6077 : }
6078 14 : } else if (ctrlr_loss_timeout_sec != 0) {
6079 11 : if (reconnect_delay_sec == 0) {
6080 1 : SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
6081 1 : return false;
6082 10 : } else if (reconnect_delay_sec > (uint32_t)ctrlr_loss_timeout_sec) {
6083 1 : SPDK_ERRLOG("reconnect_delay_sec can't be more than ctrlr_loss_timeout_sec.\n");
6084 1 : return false;
6085 9 : } else if (fast_io_fail_timeout_sec != 0) {
6086 6 : if (fast_io_fail_timeout_sec < reconnect_delay_sec) {
6087 1 : SPDK_ERRLOG("reconnect_delay_sec can't be more than fast_io_fail_timeout_sec.\n");
6088 1 : return false;
6089 5 : } else if (fast_io_fail_timeout_sec > (uint32_t)ctrlr_loss_timeout_sec) {
6090 1 : SPDK_ERRLOG("fast_io_fail_timeout_sec can't be more than ctrlr_loss_timeout_sec.\n");
6091 1 : return false;
6092 : }
6093 : }
6094 3 : } else if (reconnect_delay_sec != 0 || fast_io_fail_timeout_sec != 0) {
6095 2 : SPDK_ERRLOG("Both reconnect_delay_sec and fast_io_fail_timeout_sec must be 0 if ctrlr_loss_timeout_sec is 0.\n");
6096 2 : return false;
6097 : }
6098 :
6099 19 : return true;
6100 : }
6101 :
6102 : static int
6103 0 : bdev_nvme_load_psk(const char *fname, char *buf, size_t bufsz)
6104 : {
6105 : FILE *psk_file;
6106 0 : struct stat statbuf;
6107 : int rc;
6108 : #define TCP_PSK_INVALID_PERMISSIONS 0177
6109 :
6110 0 : if (stat(fname, &statbuf) != 0) {
6111 0 : SPDK_ERRLOG("Could not read permissions for PSK file\n");
6112 0 : return -EACCES;
6113 : }
6114 :
6115 0 : if ((statbuf.st_mode & TCP_PSK_INVALID_PERMISSIONS) != 0) {
6116 0 : SPDK_ERRLOG("Incorrect permissions for PSK file\n");
6117 0 : return -EPERM;
6118 : }
6119 0 : if ((size_t)statbuf.st_size >= bufsz) {
6120 0 : SPDK_ERRLOG("Invalid PSK: too long\n");
6121 0 : return -EINVAL;
6122 : }
6123 0 : psk_file = fopen(fname, "r");
6124 0 : if (psk_file == NULL) {
6125 0 : SPDK_ERRLOG("Could not open PSK file\n");
6126 0 : return -EINVAL;
6127 : }
6128 :
6129 0 : memset(buf, 0, bufsz);
6130 0 : rc = fread(buf, 1, statbuf.st_size, psk_file);
6131 0 : if (rc != statbuf.st_size) {
6132 0 : SPDK_ERRLOG("Failed to read PSK\n");
6133 0 : fclose(psk_file);
6134 0 : return -EINVAL;
6135 : }
6136 :
6137 0 : fclose(psk_file);
6138 0 : return 0;
6139 : }
6140 :
6141 : int
6142 51 : bdev_nvme_create(struct spdk_nvme_transport_id *trid,
6143 : const char *base_name,
6144 : const char **names,
6145 : uint32_t count,
6146 : spdk_bdev_create_nvme_fn cb_fn,
6147 : void *cb_ctx,
6148 : struct spdk_nvme_ctrlr_opts *drv_opts,
6149 : struct nvme_ctrlr_opts *bdev_opts,
6150 : bool multipath)
6151 : {
6152 : struct nvme_probe_skip_entry *entry, *tmp;
6153 : struct nvme_async_probe_ctx *ctx;
6154 : spdk_nvme_attach_cb attach_cb;
6155 : int rc, len;
6156 :
6157 : /* TODO expand this check to include both the host and target TRIDs.
6158 : * Only if both are the same should we fail.
6159 : */
6160 51 : if (nvme_ctrlr_get(trid) != NULL) {
6161 0 : SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", trid->traddr);
6162 0 : return -EEXIST;
6163 : }
6164 :
6165 51 : len = strnlen(base_name, SPDK_CONTROLLER_NAME_MAX);
6166 :
6167 51 : if (len == 0 || len == SPDK_CONTROLLER_NAME_MAX) {
6168 0 : SPDK_ERRLOG("controller name must be between 1 and %d characters\n", SPDK_CONTROLLER_NAME_MAX - 1);
6169 0 : return -EINVAL;
6170 : }
6171 :
6172 51 : if (bdev_opts != NULL &&
6173 9 : !bdev_nvme_check_io_error_resiliency_params(bdev_opts->ctrlr_loss_timeout_sec,
6174 : bdev_opts->reconnect_delay_sec,
6175 : bdev_opts->fast_io_fail_timeout_sec)) {
6176 0 : return -EINVAL;
6177 : }
6178 :
6179 51 : ctx = calloc(1, sizeof(*ctx));
6180 51 : if (!ctx) {
6181 0 : return -ENOMEM;
6182 : }
6183 51 : ctx->base_name = base_name;
6184 51 : ctx->names = names;
6185 51 : ctx->max_bdevs = count;
6186 51 : ctx->cb_fn = cb_fn;
6187 51 : ctx->cb_ctx = cb_ctx;
6188 51 : ctx->trid = *trid;
6189 :
6190 51 : if (bdev_opts) {
6191 9 : memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
6192 : } else {
6193 42 : bdev_nvme_get_default_ctrlr_opts(&ctx->bdev_opts);
6194 : }
6195 :
6196 51 : if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
6197 0 : TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, tmp) {
6198 0 : if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
6199 0 : TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
6200 0 : free(entry);
6201 0 : break;
6202 : }
6203 : }
6204 : }
6205 :
6206 51 : if (drv_opts) {
6207 0 : memcpy(&ctx->drv_opts, drv_opts, sizeof(*drv_opts));
6208 : } else {
6209 51 : spdk_nvme_ctrlr_get_default_ctrlr_opts(&ctx->drv_opts, sizeof(ctx->drv_opts));
6210 : }
6211 :
6212 51 : ctx->drv_opts.transport_retry_count = g_opts.transport_retry_count;
6213 51 : ctx->drv_opts.transport_ack_timeout = g_opts.transport_ack_timeout;
6214 51 : ctx->drv_opts.keep_alive_timeout_ms = g_opts.keep_alive_timeout_ms;
6215 51 : ctx->drv_opts.disable_read_ana_log_page = true;
6216 51 : ctx->drv_opts.transport_tos = g_opts.transport_tos;
6217 :
6218 51 : if (ctx->bdev_opts.psk[0] != '\0') {
6219 : /* Try to use the keyring first */
6220 0 : ctx->drv_opts.tls_psk = spdk_keyring_get_key(ctx->bdev_opts.psk);
6221 0 : if (ctx->drv_opts.tls_psk == NULL) {
6222 0 : rc = bdev_nvme_load_psk(ctx->bdev_opts.psk,
6223 0 : ctx->drv_opts.psk, sizeof(ctx->drv_opts.psk));
6224 0 : if (rc != 0) {
6225 0 : SPDK_ERRLOG("Could not load PSK from %s\n", ctx->bdev_opts.psk);
6226 0 : free_nvme_async_probe_ctx(ctx);
6227 0 : return rc;
6228 : }
6229 : }
6230 : }
6231 :
6232 51 : if (ctx->bdev_opts.dhchap_key != NULL) {
6233 0 : ctx->drv_opts.dhchap_key = spdk_keyring_get_key(ctx->bdev_opts.dhchap_key);
6234 0 : if (ctx->drv_opts.dhchap_key == NULL) {
6235 0 : SPDK_ERRLOG("Could not load DH-HMAC-CHAP key: %s\n",
6236 : ctx->bdev_opts.dhchap_key);
6237 0 : free_nvme_async_probe_ctx(ctx);
6238 0 : return -ENOKEY;
6239 : }
6240 :
6241 0 : ctx->drv_opts.dhchap_digests = g_opts.dhchap_digests;
6242 0 : ctx->drv_opts.dhchap_dhgroups = g_opts.dhchap_dhgroups;
6243 : }
6244 51 : if (ctx->bdev_opts.dhchap_ctrlr_key != NULL) {
6245 0 : ctx->drv_opts.dhchap_ctrlr_key =
6246 0 : spdk_keyring_get_key(ctx->bdev_opts.dhchap_ctrlr_key);
6247 0 : if (ctx->drv_opts.dhchap_ctrlr_key == NULL) {
6248 0 : SPDK_ERRLOG("Could not load DH-HMAC-CHAP controller key: %s\n",
6249 : ctx->bdev_opts.dhchap_ctrlr_key);
6250 0 : free_nvme_async_probe_ctx(ctx);
6251 0 : return -ENOKEY;
6252 : }
6253 : }
6254 :
6255 51 : if (nvme_bdev_ctrlr_get_by_name(base_name) == NULL || multipath) {
6256 47 : attach_cb = connect_attach_cb;
6257 : } else {
6258 4 : attach_cb = connect_set_failover_cb;
6259 : }
6260 :
6261 51 : ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->drv_opts, attach_cb);
6262 51 : if (ctx->probe_ctx == NULL) {
6263 0 : SPDK_ERRLOG("No controller was found with provided trid (traddr: %s)\n", trid->traddr);
6264 0 : free_nvme_async_probe_ctx(ctx);
6265 0 : return -ENODEV;
6266 : }
6267 51 : ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_async_poll, ctx, 1000);
6268 :
6269 51 : return 0;
6270 : }
6271 :
6272 : struct bdev_nvme_delete_ctx {
6273 : char *name;
6274 : struct nvme_path_id path_id;
6275 : bdev_nvme_delete_done_fn delete_done;
6276 : void *delete_done_ctx;
6277 : uint64_t timeout_ticks;
6278 : struct spdk_poller *poller;
6279 : };
6280 :
6281 : static void
6282 2 : free_bdev_nvme_delete_ctx(struct bdev_nvme_delete_ctx *ctx)
6283 : {
6284 2 : if (ctx != NULL) {
6285 1 : free(ctx->name);
6286 1 : free(ctx);
6287 : }
6288 2 : }
6289 :
6290 : static bool
6291 74 : nvme_path_id_compare(struct nvme_path_id *p, const struct nvme_path_id *path_id)
6292 : {
6293 74 : if (path_id->trid.trtype != 0) {
6294 21 : if (path_id->trid.trtype == SPDK_NVME_TRANSPORT_CUSTOM) {
6295 0 : if (strcasecmp(path_id->trid.trstring, p->trid.trstring) != 0) {
6296 0 : return false;
6297 : }
6298 : } else {
6299 21 : if (path_id->trid.trtype != p->trid.trtype) {
6300 0 : return false;
6301 : }
6302 : }
6303 : }
6304 :
6305 74 : if (!spdk_mem_all_zero(path_id->trid.traddr, sizeof(path_id->trid.traddr))) {
6306 21 : if (strcasecmp(path_id->trid.traddr, p->trid.traddr) != 0) {
6307 11 : return false;
6308 : }
6309 : }
6310 :
6311 63 : if (path_id->trid.adrfam != 0) {
6312 0 : if (path_id->trid.adrfam != p->trid.adrfam) {
6313 0 : return false;
6314 : }
6315 : }
6316 :
6317 63 : if (!spdk_mem_all_zero(path_id->trid.trsvcid, sizeof(path_id->trid.trsvcid))) {
6318 10 : if (strcasecmp(path_id->trid.trsvcid, p->trid.trsvcid) != 0) {
6319 0 : return false;
6320 : }
6321 : }
6322 :
6323 63 : if (!spdk_mem_all_zero(path_id->trid.subnqn, sizeof(path_id->trid.subnqn))) {
6324 10 : if (strcmp(path_id->trid.subnqn, p->trid.subnqn) != 0) {
6325 0 : return false;
6326 : }
6327 : }
6328 :
6329 63 : if (!spdk_mem_all_zero(path_id->hostid.hostaddr, sizeof(path_id->hostid.hostaddr))) {
6330 0 : if (strcmp(path_id->hostid.hostaddr, p->hostid.hostaddr) != 0) {
6331 0 : return false;
6332 : }
6333 : }
6334 :
6335 63 : if (!spdk_mem_all_zero(path_id->hostid.hostsvcid, sizeof(path_id->hostid.hostsvcid))) {
6336 0 : if (strcmp(path_id->hostid.hostsvcid, p->hostid.hostsvcid) != 0) {
6337 0 : return false;
6338 : }
6339 : }
6340 :
6341 63 : return true;
6342 : }
6343 :
6344 : static bool
6345 2 : nvme_path_id_exists(const char *name, const struct nvme_path_id *path_id)
6346 : {
6347 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
6348 : struct nvme_ctrlr *ctrlr;
6349 : struct nvme_path_id *p;
6350 :
6351 2 : pthread_mutex_lock(&g_bdev_nvme_mutex);
6352 2 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
6353 2 : if (!nbdev_ctrlr) {
6354 1 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6355 1 : return false;
6356 : }
6357 :
6358 1 : TAILQ_FOREACH(ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
6359 1 : pthread_mutex_lock(&ctrlr->mutex);
6360 1 : TAILQ_FOREACH(p, &ctrlr->trids, link) {
6361 1 : if (nvme_path_id_compare(p, path_id)) {
6362 1 : pthread_mutex_unlock(&ctrlr->mutex);
6363 1 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6364 1 : return true;
6365 : }
6366 : }
6367 0 : pthread_mutex_unlock(&ctrlr->mutex);
6368 : }
6369 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6370 :
6371 0 : return false;
6372 : }
6373 :
6374 : static int
6375 2 : bdev_nvme_delete_complete_poll(void *arg)
6376 : {
6377 2 : struct bdev_nvme_delete_ctx *ctx = arg;
6378 2 : int rc = 0;
6379 :
6380 2 : if (nvme_path_id_exists(ctx->name, &ctx->path_id)) {
6381 1 : if (ctx->timeout_ticks > spdk_get_ticks()) {
6382 1 : return SPDK_POLLER_BUSY;
6383 : }
6384 :
6385 0 : SPDK_ERRLOG("NVMe path '%s' still exists after delete\n", ctx->name);
6386 0 : rc = -ETIMEDOUT;
6387 : }
6388 :
6389 1 : spdk_poller_unregister(&ctx->poller);
6390 :
6391 1 : ctx->delete_done(ctx->delete_done_ctx, rc);
6392 1 : free_bdev_nvme_delete_ctx(ctx);
6393 :
6394 1 : return SPDK_POLLER_BUSY;
6395 : }
6396 :
6397 : static int
6398 63 : _bdev_nvme_delete(struct nvme_ctrlr *nvme_ctrlr, const struct nvme_path_id *path_id)
6399 : {
6400 : struct nvme_path_id *p, *t;
6401 : spdk_msg_fn msg_fn;
6402 63 : int rc = -ENXIO;
6403 :
6404 63 : pthread_mutex_lock(&nvme_ctrlr->mutex);
6405 :
6406 73 : TAILQ_FOREACH_REVERSE_SAFE(p, &nvme_ctrlr->trids, nvme_paths, link, t) {
6407 73 : if (p == TAILQ_FIRST(&nvme_ctrlr->trids)) {
6408 63 : break;
6409 : }
6410 :
6411 10 : if (!nvme_path_id_compare(p, path_id)) {
6412 3 : continue;
6413 : }
6414 :
6415 : /* We are not using the specified path. */
6416 7 : TAILQ_REMOVE(&nvme_ctrlr->trids, p, link);
6417 7 : free(p);
6418 7 : rc = 0;
6419 : }
6420 :
6421 63 : if (p == NULL || !nvme_path_id_compare(p, path_id)) {
6422 8 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
6423 8 : return rc;
6424 : }
6425 :
6426 : /* If we made it here, then this path is a match! Now we need to remove it. */
6427 :
6428 : /* This is the active path in use right now. The active path is always the first in the list. */
6429 55 : assert(p == nvme_ctrlr->active_path_id);
6430 :
6431 55 : if (!TAILQ_NEXT(p, link)) {
6432 : /* The current path is the only path. */
6433 54 : msg_fn = _nvme_ctrlr_destruct;
6434 54 : rc = bdev_nvme_delete_ctrlr_unsafe(nvme_ctrlr, false);
6435 : } else {
6436 : /* There is an alternative path. */
6437 1 : msg_fn = _bdev_nvme_reset_ctrlr;
6438 1 : rc = bdev_nvme_failover_ctrlr_unsafe(nvme_ctrlr, true);
6439 : }
6440 :
6441 55 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
6442 :
6443 55 : if (rc == 0) {
6444 55 : spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
6445 0 : } else if (rc == -EALREADY) {
6446 0 : rc = 0;
6447 : }
6448 :
6449 55 : return rc;
6450 : }
6451 :
6452 : int
6453 48 : bdev_nvme_delete(const char *name, const struct nvme_path_id *path_id,
6454 : bdev_nvme_delete_done_fn delete_done, void *delete_done_ctx)
6455 : {
6456 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
6457 : struct nvme_ctrlr *nvme_ctrlr, *tmp_nvme_ctrlr;
6458 48 : struct bdev_nvme_delete_ctx *ctx = NULL;
6459 48 : int rc = -ENXIO, _rc;
6460 :
6461 48 : if (name == NULL || path_id == NULL) {
6462 0 : rc = -EINVAL;
6463 0 : goto exit;
6464 : }
6465 :
6466 48 : pthread_mutex_lock(&g_bdev_nvme_mutex);
6467 :
6468 48 : nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
6469 48 : if (nbdev_ctrlr == NULL) {
6470 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6471 :
6472 0 : SPDK_ERRLOG("Failed to find NVMe bdev controller\n");
6473 0 : rc = -ENODEV;
6474 0 : goto exit;
6475 : }
6476 :
6477 111 : TAILQ_FOREACH_SAFE(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq, tmp_nvme_ctrlr) {
6478 63 : _rc = _bdev_nvme_delete(nvme_ctrlr, path_id);
6479 63 : if (_rc < 0 && _rc != -ENXIO) {
6480 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6481 0 : rc = _rc;
6482 0 : goto exit;
6483 63 : } else if (_rc == 0) {
6484 : /* We traverse all remaining nvme_ctrlrs even if one nvme_ctrlr
6485 : * was deleted successfully. To remember the successful deletion,
6486 : * overwrite rc only if _rc is zero.
6487 : */
6488 57 : rc = 0;
6489 : }
6490 : }
6491 :
6492 48 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
6493 :
6494 48 : if (rc != 0 || delete_done == NULL) {
6495 47 : goto exit;
6496 : }
6497 :
6498 1 : ctx = calloc(1, sizeof(*ctx));
6499 1 : if (ctx == NULL) {
6500 0 : SPDK_ERRLOG("Failed to allocate context for bdev_nvme_delete\n");
6501 0 : rc = -ENOMEM;
6502 0 : goto exit;
6503 : }
6504 :
6505 1 : ctx->name = strdup(name);
6506 1 : if (ctx->name == NULL) {
6507 0 : SPDK_ERRLOG("Failed to copy controller name for deletion\n");
6508 0 : rc = -ENOMEM;
6509 0 : goto exit;
6510 : }
6511 :
6512 1 : ctx->delete_done = delete_done;
6513 1 : ctx->delete_done_ctx = delete_done_ctx;
6514 1 : ctx->path_id = *path_id;
6515 1 : ctx->timeout_ticks = spdk_get_ticks() + 10 * spdk_get_ticks_hz();
6516 1 : ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_delete_complete_poll, ctx, 1000);
6517 1 : if (ctx->poller == NULL) {
6518 0 : SPDK_ERRLOG("Failed to register bdev_nvme_delete poller\n");
6519 0 : rc = -ENOMEM;
6520 0 : goto exit;
6521 : }
6522 :
6523 1 : exit:
6524 48 : if (rc != 0) {
6525 1 : free_bdev_nvme_delete_ctx(ctx);
6526 : }
6527 :
6528 48 : return rc;
6529 : }
6530 :
6531 : #define DISCOVERY_INFOLOG(ctx, format, ...) \
6532 : SPDK_INFOLOG(bdev_nvme, "Discovery[%s:%s] " format, ctx->trid.traddr, ctx->trid.trsvcid, ##__VA_ARGS__);
6533 :
6534 : #define DISCOVERY_ERRLOG(ctx, format, ...) \
6535 : SPDK_ERRLOG("Discovery[%s:%s] " format, ctx->trid.traddr, ctx->trid.trsvcid, ##__VA_ARGS__);
6536 :
6537 : struct discovery_entry_ctx {
6538 : char name[128];
6539 : struct spdk_nvme_transport_id trid;
6540 : struct spdk_nvme_ctrlr_opts drv_opts;
6541 : struct spdk_nvmf_discovery_log_page_entry entry;
6542 : TAILQ_ENTRY(discovery_entry_ctx) tailq;
6543 : struct discovery_ctx *ctx;
6544 : };
6545 :
6546 : struct discovery_ctx {
6547 : char *name;
6548 : spdk_bdev_nvme_start_discovery_fn start_cb_fn;
6549 : spdk_bdev_nvme_stop_discovery_fn stop_cb_fn;
6550 : void *cb_ctx;
6551 : struct spdk_nvme_probe_ctx *probe_ctx;
6552 : struct spdk_nvme_detach_ctx *detach_ctx;
6553 : struct spdk_nvme_ctrlr *ctrlr;
6554 : struct spdk_nvme_transport_id trid;
6555 : struct discovery_entry_ctx *entry_ctx_in_use;
6556 : struct spdk_poller *poller;
6557 : struct spdk_nvme_ctrlr_opts drv_opts;
6558 : struct nvme_ctrlr_opts bdev_opts;
6559 : struct spdk_nvmf_discovery_log_page *log_page;
6560 : TAILQ_ENTRY(discovery_ctx) tailq;
6561 : TAILQ_HEAD(, discovery_entry_ctx) nvm_entry_ctxs;
6562 : TAILQ_HEAD(, discovery_entry_ctx) discovery_entry_ctxs;
6563 : int rc;
6564 : bool wait_for_attach;
6565 : uint64_t timeout_ticks;
6566 : /* Denotes that the discovery service is being started. We're waiting
6567 : * for the initial connection to the discovery controller to be
6568 : * established and attach discovered NVM ctrlrs.
6569 : */
6570 : bool initializing;
6571 : /* Denotes if a discovery is currently in progress for this context.
6572 : * That includes connecting to newly discovered subsystems. Used to
6573 : * ensure we do not start a new discovery until an existing one is
6574 : * complete.
6575 : */
6576 : bool in_progress;
6577 :
6578 : /* Denotes if another discovery is needed after the one in progress
6579 : * completes. Set when we receive an AER completion while a discovery
6580 : * is already in progress.
6581 : */
6582 : bool pending;
6583 :
6584 : /* Signal to the discovery context poller that it should stop the
6585 : * discovery service, including detaching from the current discovery
6586 : * controller.
6587 : */
6588 : bool stop;
6589 :
6590 : struct spdk_thread *calling_thread;
6591 : uint32_t index;
6592 : uint32_t attach_in_progress;
6593 : char *hostnqn;
6594 :
6595 : /* Denotes if the discovery service was started by the mdns discovery.
6596 : */
6597 : bool from_mdns_discovery_service;
6598 : };
6599 :
6600 : TAILQ_HEAD(discovery_ctxs, discovery_ctx);
6601 : static struct discovery_ctxs g_discovery_ctxs = TAILQ_HEAD_INITIALIZER(g_discovery_ctxs);
6602 :
6603 : static void get_discovery_log_page(struct discovery_ctx *ctx);
6604 :
6605 : static void
6606 0 : free_discovery_ctx(struct discovery_ctx *ctx)
6607 : {
6608 0 : free(ctx->log_page);
6609 0 : free(ctx->hostnqn);
6610 0 : free(ctx->name);
6611 0 : free(ctx);
6612 0 : }
6613 :
6614 : static void
6615 0 : discovery_complete(struct discovery_ctx *ctx)
6616 : {
6617 0 : ctx->initializing = false;
6618 0 : ctx->in_progress = false;
6619 0 : if (ctx->pending) {
6620 0 : ctx->pending = false;
6621 0 : get_discovery_log_page(ctx);
6622 : }
6623 0 : }
6624 :
6625 : static void
6626 0 : build_trid_from_log_page_entry(struct spdk_nvme_transport_id *trid,
6627 : struct spdk_nvmf_discovery_log_page_entry *entry)
6628 : {
6629 : char *space;
6630 :
6631 0 : trid->trtype = entry->trtype;
6632 0 : trid->adrfam = entry->adrfam;
6633 0 : memcpy(trid->traddr, entry->traddr, sizeof(entry->traddr));
6634 0 : memcpy(trid->trsvcid, entry->trsvcid, sizeof(entry->trsvcid));
6635 : /* Because the source buffer (entry->subnqn) is longer than trid->subnqn, and
6636 : * before call to this function trid->subnqn is zeroed out, we need
6637 : * to copy sizeof(trid->subnqn) minus one byte to make sure the last character
6638 : * remains 0. Then we can shorten the string (replace ' ' with 0) if required
6639 : */
6640 0 : memcpy(trid->subnqn, entry->subnqn, sizeof(trid->subnqn) - 1);
6641 :
6642 : /* We want the traddr, trsvcid and subnqn fields to be NULL-terminated.
6643 : * But the log page entries typically pad them with spaces, not zeroes.
6644 : * So add a NULL terminator to each of these fields at the appropriate
6645 : * location.
6646 : */
6647 0 : space = strchr(trid->traddr, ' ');
6648 0 : if (space) {
6649 0 : *space = 0;
6650 : }
6651 0 : space = strchr(trid->trsvcid, ' ');
6652 0 : if (space) {
6653 0 : *space = 0;
6654 : }
6655 0 : space = strchr(trid->subnqn, ' ');
6656 0 : if (space) {
6657 0 : *space = 0;
6658 : }
6659 0 : }
6660 :
6661 : static void
6662 0 : _stop_discovery(void *_ctx)
6663 : {
6664 0 : struct discovery_ctx *ctx = _ctx;
6665 :
6666 0 : if (ctx->attach_in_progress > 0) {
6667 0 : spdk_thread_send_msg(spdk_get_thread(), _stop_discovery, ctx);
6668 0 : return;
6669 : }
6670 :
6671 0 : ctx->stop = true;
6672 :
6673 0 : while (!TAILQ_EMPTY(&ctx->nvm_entry_ctxs)) {
6674 : struct discovery_entry_ctx *entry_ctx;
6675 0 : struct nvme_path_id path = {};
6676 :
6677 0 : entry_ctx = TAILQ_FIRST(&ctx->nvm_entry_ctxs);
6678 0 : path.trid = entry_ctx->trid;
6679 0 : bdev_nvme_delete(entry_ctx->name, &path, NULL, NULL);
6680 0 : TAILQ_REMOVE(&ctx->nvm_entry_ctxs, entry_ctx, tailq);
6681 0 : free(entry_ctx);
6682 : }
6683 :
6684 0 : while (!TAILQ_EMPTY(&ctx->discovery_entry_ctxs)) {
6685 : struct discovery_entry_ctx *entry_ctx;
6686 :
6687 0 : entry_ctx = TAILQ_FIRST(&ctx->discovery_entry_ctxs);
6688 0 : TAILQ_REMOVE(&ctx->discovery_entry_ctxs, entry_ctx, tailq);
6689 0 : free(entry_ctx);
6690 : }
6691 :
6692 0 : free(ctx->entry_ctx_in_use);
6693 0 : ctx->entry_ctx_in_use = NULL;
6694 : }
6695 :
6696 : static void
6697 0 : stop_discovery(struct discovery_ctx *ctx, spdk_bdev_nvme_stop_discovery_fn cb_fn, void *cb_ctx)
6698 : {
6699 0 : ctx->stop_cb_fn = cb_fn;
6700 0 : ctx->cb_ctx = cb_ctx;
6701 :
6702 0 : if (ctx->attach_in_progress > 0) {
6703 0 : DISCOVERY_INFOLOG(ctx, "stopping discovery with attach_in_progress: %"PRIu32"\n",
6704 : ctx->attach_in_progress);
6705 : }
6706 :
6707 0 : _stop_discovery(ctx);
6708 0 : }
6709 :
6710 : static void
6711 2 : remove_discovery_entry(struct nvme_ctrlr *nvme_ctrlr)
6712 : {
6713 : struct discovery_ctx *d_ctx;
6714 : struct nvme_path_id *path_id;
6715 2 : struct spdk_nvme_transport_id trid = {};
6716 : struct discovery_entry_ctx *entry_ctx, *tmp;
6717 :
6718 2 : path_id = TAILQ_FIRST(&nvme_ctrlr->trids);
6719 :
6720 2 : TAILQ_FOREACH(d_ctx, &g_discovery_ctxs, tailq) {
6721 0 : TAILQ_FOREACH_SAFE(entry_ctx, &d_ctx->nvm_entry_ctxs, tailq, tmp) {
6722 0 : build_trid_from_log_page_entry(&trid, &entry_ctx->entry);
6723 0 : if (spdk_nvme_transport_id_compare(&trid, &path_id->trid) != 0) {
6724 0 : continue;
6725 : }
6726 :
6727 0 : TAILQ_REMOVE(&d_ctx->nvm_entry_ctxs, entry_ctx, tailq);
6728 0 : free(entry_ctx);
6729 0 : DISCOVERY_INFOLOG(d_ctx, "Remove discovery entry: %s:%s:%s\n",
6730 : trid.subnqn, trid.traddr, trid.trsvcid);
6731 :
6732 : /* Fail discovery ctrlr to force reattach attempt */
6733 0 : spdk_nvme_ctrlr_fail(d_ctx->ctrlr);
6734 : }
6735 : }
6736 2 : }
6737 :
6738 : static void
6739 0 : discovery_remove_controllers(struct discovery_ctx *ctx)
6740 : {
6741 0 : struct spdk_nvmf_discovery_log_page *log_page = ctx->log_page;
6742 : struct discovery_entry_ctx *entry_ctx, *tmp;
6743 : struct spdk_nvmf_discovery_log_page_entry *new_entry, *old_entry;
6744 0 : struct spdk_nvme_transport_id old_trid = {};
6745 : uint64_t numrec, i;
6746 : bool found;
6747 :
6748 0 : numrec = from_le64(&log_page->numrec);
6749 0 : TAILQ_FOREACH_SAFE(entry_ctx, &ctx->nvm_entry_ctxs, tailq, tmp) {
6750 0 : found = false;
6751 0 : old_entry = &entry_ctx->entry;
6752 0 : build_trid_from_log_page_entry(&old_trid, old_entry);
6753 0 : for (i = 0; i < numrec; i++) {
6754 0 : new_entry = &log_page->entries[i];
6755 0 : if (!memcmp(old_entry, new_entry, sizeof(*old_entry))) {
6756 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s found again\n",
6757 : old_trid.subnqn, old_trid.traddr, old_trid.trsvcid);
6758 0 : found = true;
6759 0 : break;
6760 : }
6761 : }
6762 0 : if (!found) {
6763 0 : struct nvme_path_id path = {};
6764 :
6765 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s not found\n",
6766 : old_trid.subnqn, old_trid.traddr, old_trid.trsvcid);
6767 :
6768 0 : path.trid = entry_ctx->trid;
6769 0 : bdev_nvme_delete(entry_ctx->name, &path, NULL, NULL);
6770 0 : TAILQ_REMOVE(&ctx->nvm_entry_ctxs, entry_ctx, tailq);
6771 0 : free(entry_ctx);
6772 : }
6773 : }
6774 0 : free(log_page);
6775 0 : ctx->log_page = NULL;
6776 0 : discovery_complete(ctx);
6777 0 : }
6778 :
6779 : static void
6780 0 : complete_discovery_start(struct discovery_ctx *ctx, int status)
6781 : {
6782 0 : ctx->timeout_ticks = 0;
6783 0 : ctx->rc = status;
6784 0 : if (ctx->start_cb_fn) {
6785 0 : ctx->start_cb_fn(ctx->cb_ctx, status);
6786 0 : ctx->start_cb_fn = NULL;
6787 0 : ctx->cb_ctx = NULL;
6788 : }
6789 0 : }
6790 :
6791 : static void
6792 0 : discovery_attach_controller_done(void *cb_ctx, size_t bdev_count, int rc)
6793 : {
6794 0 : struct discovery_entry_ctx *entry_ctx = cb_ctx;
6795 0 : struct discovery_ctx *ctx = entry_ctx->ctx;
6796 :
6797 0 : DISCOVERY_INFOLOG(ctx, "attach %s done\n", entry_ctx->name);
6798 0 : ctx->attach_in_progress--;
6799 0 : if (ctx->attach_in_progress == 0) {
6800 0 : complete_discovery_start(ctx, ctx->rc);
6801 0 : if (ctx->initializing && ctx->rc != 0) {
6802 0 : DISCOVERY_ERRLOG(ctx, "stopping discovery due to errors: %d\n", ctx->rc);
6803 0 : stop_discovery(ctx, NULL, ctx->cb_ctx);
6804 : } else {
6805 0 : discovery_remove_controllers(ctx);
6806 : }
6807 : }
6808 0 : }
6809 :
6810 : static struct discovery_entry_ctx *
6811 0 : create_discovery_entry_ctx(struct discovery_ctx *ctx, struct spdk_nvme_transport_id *trid)
6812 : {
6813 : struct discovery_entry_ctx *new_ctx;
6814 :
6815 0 : new_ctx = calloc(1, sizeof(*new_ctx));
6816 0 : if (new_ctx == NULL) {
6817 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
6818 0 : return NULL;
6819 : }
6820 :
6821 0 : new_ctx->ctx = ctx;
6822 0 : memcpy(&new_ctx->trid, trid, sizeof(*trid));
6823 0 : spdk_nvme_ctrlr_get_default_ctrlr_opts(&new_ctx->drv_opts, sizeof(new_ctx->drv_opts));
6824 0 : snprintf(new_ctx->drv_opts.hostnqn, sizeof(new_ctx->drv_opts.hostnqn), "%s", ctx->hostnqn);
6825 0 : return new_ctx;
6826 : }
6827 :
6828 : static void
6829 0 : discovery_log_page_cb(void *cb_arg, int rc, const struct spdk_nvme_cpl *cpl,
6830 : struct spdk_nvmf_discovery_log_page *log_page)
6831 : {
6832 0 : struct discovery_ctx *ctx = cb_arg;
6833 : struct discovery_entry_ctx *entry_ctx, *tmp;
6834 : struct spdk_nvmf_discovery_log_page_entry *new_entry, *old_entry;
6835 : uint64_t numrec, i;
6836 : bool found;
6837 :
6838 0 : if (rc || spdk_nvme_cpl_is_error(cpl)) {
6839 0 : DISCOVERY_ERRLOG(ctx, "could not get discovery log page\n");
6840 0 : return;
6841 : }
6842 :
6843 0 : ctx->log_page = log_page;
6844 0 : assert(ctx->attach_in_progress == 0);
6845 0 : numrec = from_le64(&log_page->numrec);
6846 0 : TAILQ_FOREACH_SAFE(entry_ctx, &ctx->discovery_entry_ctxs, tailq, tmp) {
6847 0 : TAILQ_REMOVE(&ctx->discovery_entry_ctxs, entry_ctx, tailq);
6848 0 : free(entry_ctx);
6849 : }
6850 0 : for (i = 0; i < numrec; i++) {
6851 0 : found = false;
6852 0 : new_entry = &log_page->entries[i];
6853 0 : if (new_entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY_CURRENT ||
6854 0 : new_entry->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
6855 : struct discovery_entry_ctx *new_ctx;
6856 0 : struct spdk_nvme_transport_id trid = {};
6857 :
6858 0 : build_trid_from_log_page_entry(&trid, new_entry);
6859 0 : new_ctx = create_discovery_entry_ctx(ctx, &trid);
6860 0 : if (new_ctx == NULL) {
6861 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
6862 0 : break;
6863 : }
6864 :
6865 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, new_ctx, tailq);
6866 0 : continue;
6867 : }
6868 0 : TAILQ_FOREACH(entry_ctx, &ctx->nvm_entry_ctxs, tailq) {
6869 0 : old_entry = &entry_ctx->entry;
6870 0 : if (!memcmp(new_entry, old_entry, sizeof(*new_entry))) {
6871 0 : found = true;
6872 0 : break;
6873 : }
6874 : }
6875 0 : if (!found) {
6876 0 : struct discovery_entry_ctx *subnqn_ctx = NULL, *new_ctx;
6877 : struct discovery_ctx *d_ctx;
6878 :
6879 0 : TAILQ_FOREACH(d_ctx, &g_discovery_ctxs, tailq) {
6880 0 : TAILQ_FOREACH(subnqn_ctx, &d_ctx->nvm_entry_ctxs, tailq) {
6881 0 : if (!memcmp(subnqn_ctx->entry.subnqn, new_entry->subnqn,
6882 : sizeof(new_entry->subnqn))) {
6883 0 : break;
6884 : }
6885 : }
6886 0 : if (subnqn_ctx) {
6887 0 : break;
6888 : }
6889 : }
6890 :
6891 0 : new_ctx = calloc(1, sizeof(*new_ctx));
6892 0 : if (new_ctx == NULL) {
6893 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
6894 0 : break;
6895 : }
6896 :
6897 0 : new_ctx->ctx = ctx;
6898 0 : memcpy(&new_ctx->entry, new_entry, sizeof(*new_entry));
6899 0 : build_trid_from_log_page_entry(&new_ctx->trid, new_entry);
6900 0 : if (subnqn_ctx) {
6901 0 : snprintf(new_ctx->name, sizeof(new_ctx->name), "%s", subnqn_ctx->name);
6902 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s new path for %s\n",
6903 : new_ctx->trid.subnqn, new_ctx->trid.traddr, new_ctx->trid.trsvcid,
6904 : new_ctx->name);
6905 : } else {
6906 0 : snprintf(new_ctx->name, sizeof(new_ctx->name), "%s%d", ctx->name, ctx->index++);
6907 0 : DISCOVERY_INFOLOG(ctx, "NVM %s:%s:%s new subsystem %s\n",
6908 : new_ctx->trid.subnqn, new_ctx->trid.traddr, new_ctx->trid.trsvcid,
6909 : new_ctx->name);
6910 : }
6911 0 : spdk_nvme_ctrlr_get_default_ctrlr_opts(&new_ctx->drv_opts, sizeof(new_ctx->drv_opts));
6912 0 : snprintf(new_ctx->drv_opts.hostnqn, sizeof(new_ctx->drv_opts.hostnqn), "%s", ctx->hostnqn);
6913 0 : rc = bdev_nvme_create(&new_ctx->trid, new_ctx->name, NULL, 0,
6914 : discovery_attach_controller_done, new_ctx,
6915 : &new_ctx->drv_opts, &ctx->bdev_opts, true);
6916 0 : if (rc == 0) {
6917 0 : TAILQ_INSERT_TAIL(&ctx->nvm_entry_ctxs, new_ctx, tailq);
6918 0 : ctx->attach_in_progress++;
6919 : } else {
6920 0 : DISCOVERY_ERRLOG(ctx, "bdev_nvme_create failed (%s)\n", spdk_strerror(-rc));
6921 : }
6922 : }
6923 : }
6924 :
6925 0 : if (ctx->attach_in_progress == 0) {
6926 0 : discovery_remove_controllers(ctx);
6927 : }
6928 : }
6929 :
6930 : static void
6931 0 : get_discovery_log_page(struct discovery_ctx *ctx)
6932 : {
6933 : int rc;
6934 :
6935 0 : assert(ctx->in_progress == false);
6936 0 : ctx->in_progress = true;
6937 0 : rc = spdk_nvme_ctrlr_get_discovery_log_page(ctx->ctrlr, discovery_log_page_cb, ctx);
6938 0 : if (rc != 0) {
6939 0 : DISCOVERY_ERRLOG(ctx, "could not get discovery log page\n");
6940 : }
6941 0 : DISCOVERY_INFOLOG(ctx, "sent discovery log page command\n");
6942 0 : }
6943 :
6944 : static void
6945 0 : discovery_aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
6946 : {
6947 0 : struct discovery_ctx *ctx = arg;
6948 0 : uint32_t log_page_id = (cpl->cdw0 & 0xFF0000) >> 16;
6949 :
6950 0 : if (spdk_nvme_cpl_is_error(cpl)) {
6951 0 : DISCOVERY_ERRLOG(ctx, "aer failed\n");
6952 0 : return;
6953 : }
6954 :
6955 0 : if (log_page_id != SPDK_NVME_LOG_DISCOVERY) {
6956 0 : DISCOVERY_ERRLOG(ctx, "unexpected log page 0x%x\n", log_page_id);
6957 0 : return;
6958 : }
6959 :
6960 0 : DISCOVERY_INFOLOG(ctx, "got aer\n");
6961 0 : if (ctx->in_progress) {
6962 0 : ctx->pending = true;
6963 0 : return;
6964 : }
6965 :
6966 0 : get_discovery_log_page(ctx);
6967 : }
6968 :
6969 : static void
6970 0 : discovery_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
6971 : struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
6972 : {
6973 0 : struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
6974 : struct discovery_ctx *ctx;
6975 :
6976 0 : ctx = SPDK_CONTAINEROF(user_opts, struct discovery_ctx, drv_opts);
6977 :
6978 0 : DISCOVERY_INFOLOG(ctx, "discovery ctrlr attached\n");
6979 0 : ctx->probe_ctx = NULL;
6980 0 : ctx->ctrlr = ctrlr;
6981 :
6982 0 : if (ctx->rc != 0) {
6983 0 : DISCOVERY_ERRLOG(ctx, "encountered error while attaching discovery ctrlr: %d\n",
6984 : ctx->rc);
6985 0 : return;
6986 : }
6987 :
6988 0 : spdk_nvme_ctrlr_register_aer_callback(ctx->ctrlr, discovery_aer_cb, ctx);
6989 : }
6990 :
6991 : static int
6992 0 : discovery_poller(void *arg)
6993 : {
6994 0 : struct discovery_ctx *ctx = arg;
6995 : struct spdk_nvme_transport_id *trid;
6996 : int rc;
6997 :
6998 0 : if (ctx->detach_ctx) {
6999 0 : rc = spdk_nvme_detach_poll_async(ctx->detach_ctx);
7000 0 : if (rc != -EAGAIN) {
7001 0 : ctx->detach_ctx = NULL;
7002 0 : ctx->ctrlr = NULL;
7003 : }
7004 0 : } else if (ctx->stop) {
7005 0 : if (ctx->ctrlr != NULL) {
7006 0 : rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx);
7007 0 : if (rc == 0) {
7008 0 : return SPDK_POLLER_BUSY;
7009 : }
7010 0 : DISCOVERY_ERRLOG(ctx, "could not detach discovery ctrlr\n");
7011 : }
7012 0 : spdk_poller_unregister(&ctx->poller);
7013 0 : TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
7014 0 : assert(ctx->start_cb_fn == NULL);
7015 0 : if (ctx->stop_cb_fn != NULL) {
7016 0 : ctx->stop_cb_fn(ctx->cb_ctx);
7017 : }
7018 0 : free_discovery_ctx(ctx);
7019 0 : } else if (ctx->probe_ctx == NULL && ctx->ctrlr == NULL) {
7020 0 : if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
7021 0 : DISCOVERY_ERRLOG(ctx, "timed out while attaching discovery ctrlr\n");
7022 0 : assert(ctx->initializing);
7023 0 : spdk_poller_unregister(&ctx->poller);
7024 0 : TAILQ_REMOVE(&g_discovery_ctxs, ctx, tailq);
7025 0 : complete_discovery_start(ctx, -ETIMEDOUT);
7026 0 : stop_discovery(ctx, NULL, NULL);
7027 0 : free_discovery_ctx(ctx);
7028 0 : return SPDK_POLLER_BUSY;
7029 : }
7030 :
7031 0 : assert(ctx->entry_ctx_in_use == NULL);
7032 0 : ctx->entry_ctx_in_use = TAILQ_FIRST(&ctx->discovery_entry_ctxs);
7033 0 : TAILQ_REMOVE(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
7034 0 : trid = &ctx->entry_ctx_in_use->trid;
7035 0 : ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->drv_opts, discovery_attach_cb);
7036 0 : if (ctx->probe_ctx) {
7037 0 : spdk_poller_unregister(&ctx->poller);
7038 0 : ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000);
7039 : } else {
7040 0 : DISCOVERY_ERRLOG(ctx, "could not start discovery connect\n");
7041 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
7042 0 : ctx->entry_ctx_in_use = NULL;
7043 : }
7044 0 : } else if (ctx->probe_ctx) {
7045 0 : if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
7046 0 : DISCOVERY_ERRLOG(ctx, "timed out while attaching discovery ctrlr\n");
7047 0 : complete_discovery_start(ctx, -ETIMEDOUT);
7048 0 : return SPDK_POLLER_BUSY;
7049 : }
7050 :
7051 0 : rc = spdk_nvme_probe_poll_async(ctx->probe_ctx);
7052 0 : if (rc != -EAGAIN) {
7053 0 : if (ctx->rc != 0) {
7054 0 : assert(ctx->initializing);
7055 0 : stop_discovery(ctx, NULL, ctx->cb_ctx);
7056 : } else {
7057 0 : assert(rc == 0);
7058 0 : DISCOVERY_INFOLOG(ctx, "discovery ctrlr connected\n");
7059 0 : ctx->rc = rc;
7060 0 : get_discovery_log_page(ctx);
7061 : }
7062 : }
7063 : } else {
7064 0 : if (ctx->timeout_ticks != 0 && ctx->timeout_ticks < spdk_get_ticks()) {
7065 0 : DISCOVERY_ERRLOG(ctx, "timed out while attaching NVM ctrlrs\n");
7066 0 : complete_discovery_start(ctx, -ETIMEDOUT);
7067 : /* We need to wait until all NVM ctrlrs are attached before we stop the
7068 : * discovery service to make sure we don't detach a ctrlr that is still
7069 : * being attached.
7070 : */
7071 0 : if (ctx->attach_in_progress == 0) {
7072 0 : stop_discovery(ctx, NULL, ctx->cb_ctx);
7073 0 : return SPDK_POLLER_BUSY;
7074 : }
7075 : }
7076 :
7077 0 : rc = spdk_nvme_ctrlr_process_admin_completions(ctx->ctrlr);
7078 0 : if (rc < 0) {
7079 0 : spdk_poller_unregister(&ctx->poller);
7080 0 : ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000 * 1000);
7081 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, ctx->entry_ctx_in_use, tailq);
7082 0 : ctx->entry_ctx_in_use = NULL;
7083 :
7084 0 : rc = spdk_nvme_detach_async(ctx->ctrlr, &ctx->detach_ctx);
7085 0 : if (rc != 0) {
7086 0 : DISCOVERY_ERRLOG(ctx, "could not detach discovery ctrlr\n");
7087 0 : ctx->ctrlr = NULL;
7088 : }
7089 : }
7090 : }
7091 :
7092 0 : return SPDK_POLLER_BUSY;
7093 : }
7094 :
7095 : static void
7096 0 : start_discovery_poller(void *arg)
7097 : {
7098 0 : struct discovery_ctx *ctx = arg;
7099 :
7100 0 : TAILQ_INSERT_TAIL(&g_discovery_ctxs, ctx, tailq);
7101 0 : ctx->poller = SPDK_POLLER_REGISTER(discovery_poller, ctx, 1000 * 1000);
7102 0 : }
7103 :
7104 : int
7105 0 : bdev_nvme_start_discovery(struct spdk_nvme_transport_id *trid,
7106 : const char *base_name,
7107 : struct spdk_nvme_ctrlr_opts *drv_opts,
7108 : struct nvme_ctrlr_opts *bdev_opts,
7109 : uint64_t attach_timeout,
7110 : bool from_mdns,
7111 : spdk_bdev_nvme_start_discovery_fn cb_fn, void *cb_ctx)
7112 : {
7113 : struct discovery_ctx *ctx;
7114 : struct discovery_entry_ctx *discovery_entry_ctx;
7115 :
7116 0 : snprintf(trid->subnqn, sizeof(trid->subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN);
7117 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
7118 0 : if (strcmp(ctx->name, base_name) == 0) {
7119 0 : return -EEXIST;
7120 : }
7121 :
7122 0 : if (ctx->entry_ctx_in_use != NULL) {
7123 0 : if (!spdk_nvme_transport_id_compare(trid, &ctx->entry_ctx_in_use->trid)) {
7124 0 : return -EEXIST;
7125 : }
7126 : }
7127 :
7128 0 : TAILQ_FOREACH(discovery_entry_ctx, &ctx->discovery_entry_ctxs, tailq) {
7129 0 : if (!spdk_nvme_transport_id_compare(trid, &discovery_entry_ctx->trid)) {
7130 0 : return -EEXIST;
7131 : }
7132 : }
7133 : }
7134 :
7135 0 : ctx = calloc(1, sizeof(*ctx));
7136 0 : if (ctx == NULL) {
7137 0 : return -ENOMEM;
7138 : }
7139 :
7140 0 : ctx->name = strdup(base_name);
7141 0 : if (ctx->name == NULL) {
7142 0 : free_discovery_ctx(ctx);
7143 0 : return -ENOMEM;
7144 : }
7145 0 : memcpy(&ctx->drv_opts, drv_opts, sizeof(*drv_opts));
7146 0 : memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
7147 0 : ctx->from_mdns_discovery_service = from_mdns;
7148 0 : ctx->bdev_opts.from_discovery_service = true;
7149 0 : ctx->calling_thread = spdk_get_thread();
7150 0 : ctx->start_cb_fn = cb_fn;
7151 0 : ctx->cb_ctx = cb_ctx;
7152 0 : ctx->initializing = true;
7153 0 : if (ctx->start_cb_fn) {
7154 : /* We can use this when dumping json to denote if this RPC parameter
7155 : * was specified or not.
7156 : */
7157 0 : ctx->wait_for_attach = true;
7158 : }
7159 0 : if (attach_timeout != 0) {
7160 0 : ctx->timeout_ticks = spdk_get_ticks() + attach_timeout *
7161 0 : spdk_get_ticks_hz() / 1000ull;
7162 : }
7163 0 : TAILQ_INIT(&ctx->nvm_entry_ctxs);
7164 0 : TAILQ_INIT(&ctx->discovery_entry_ctxs);
7165 0 : memcpy(&ctx->trid, trid, sizeof(*trid));
7166 : /* Even if user did not specify hostnqn, we can still strdup("\0"); */
7167 0 : ctx->hostnqn = strdup(ctx->drv_opts.hostnqn);
7168 0 : if (ctx->hostnqn == NULL) {
7169 0 : free_discovery_ctx(ctx);
7170 0 : return -ENOMEM;
7171 : }
7172 0 : discovery_entry_ctx = create_discovery_entry_ctx(ctx, trid);
7173 0 : if (discovery_entry_ctx == NULL) {
7174 0 : DISCOVERY_ERRLOG(ctx, "could not allocate new entry_ctx\n");
7175 0 : free_discovery_ctx(ctx);
7176 0 : return -ENOMEM;
7177 : }
7178 :
7179 0 : TAILQ_INSERT_TAIL(&ctx->discovery_entry_ctxs, discovery_entry_ctx, tailq);
7180 0 : spdk_thread_send_msg(g_bdev_nvme_init_thread, start_discovery_poller, ctx);
7181 0 : return 0;
7182 : }
7183 :
7184 : int
7185 0 : bdev_nvme_stop_discovery(const char *name, spdk_bdev_nvme_stop_discovery_fn cb_fn, void *cb_ctx)
7186 : {
7187 : struct discovery_ctx *ctx;
7188 :
7189 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
7190 0 : if (strcmp(name, ctx->name) == 0) {
7191 0 : if (ctx->stop) {
7192 0 : return -EALREADY;
7193 : }
7194 : /* If we're still starting the discovery service and ->rc is non-zero, we're
7195 : * going to stop it as soon as we can
7196 : */
7197 0 : if (ctx->initializing && ctx->rc != 0) {
7198 0 : return -EALREADY;
7199 : }
7200 0 : stop_discovery(ctx, cb_fn, cb_ctx);
7201 0 : return 0;
7202 : }
7203 : }
7204 :
7205 0 : return -ENOENT;
7206 : }
7207 :
7208 : static int
7209 1 : bdev_nvme_library_init(void)
7210 : {
7211 1 : g_bdev_nvme_init_thread = spdk_get_thread();
7212 :
7213 1 : spdk_io_device_register(&g_nvme_bdev_ctrlrs, bdev_nvme_create_poll_group_cb,
7214 : bdev_nvme_destroy_poll_group_cb,
7215 : sizeof(struct nvme_poll_group), "nvme_poll_groups");
7216 :
7217 1 : return 0;
7218 : }
7219 :
7220 : static void
7221 1 : bdev_nvme_fini_destruct_ctrlrs(void)
7222 : {
7223 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
7224 : struct nvme_ctrlr *nvme_ctrlr;
7225 :
7226 1 : pthread_mutex_lock(&g_bdev_nvme_mutex);
7227 1 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
7228 0 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
7229 0 : pthread_mutex_lock(&nvme_ctrlr->mutex);
7230 0 : if (nvme_ctrlr->destruct) {
7231 : /* This controller's destruction was already started
7232 : * before the application started shutting down
7233 : */
7234 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
7235 0 : continue;
7236 : }
7237 0 : nvme_ctrlr->destruct = true;
7238 0 : pthread_mutex_unlock(&nvme_ctrlr->mutex);
7239 :
7240 0 : spdk_thread_send_msg(nvme_ctrlr->thread, _nvme_ctrlr_destruct,
7241 : nvme_ctrlr);
7242 : }
7243 : }
7244 :
7245 1 : g_bdev_nvme_module_finish = true;
7246 1 : if (TAILQ_EMPTY(&g_nvme_bdev_ctrlrs)) {
7247 1 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
7248 1 : spdk_io_device_unregister(&g_nvme_bdev_ctrlrs, NULL);
7249 1 : spdk_bdev_module_fini_done();
7250 1 : return;
7251 : }
7252 :
7253 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
7254 : }
7255 :
7256 : static void
7257 0 : check_discovery_fini(void *arg)
7258 : {
7259 0 : if (TAILQ_EMPTY(&g_discovery_ctxs)) {
7260 0 : bdev_nvme_fini_destruct_ctrlrs();
7261 : }
7262 0 : }
7263 :
7264 : static void
7265 1 : bdev_nvme_library_fini(void)
7266 : {
7267 : struct nvme_probe_skip_entry *entry, *entry_tmp;
7268 : struct discovery_ctx *ctx;
7269 :
7270 1 : spdk_poller_unregister(&g_hotplug_poller);
7271 1 : free(g_hotplug_probe_ctx);
7272 1 : g_hotplug_probe_ctx = NULL;
7273 :
7274 1 : TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, entry_tmp) {
7275 0 : TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
7276 0 : free(entry);
7277 : }
7278 :
7279 1 : assert(spdk_get_thread() == g_bdev_nvme_init_thread);
7280 1 : if (TAILQ_EMPTY(&g_discovery_ctxs)) {
7281 1 : bdev_nvme_fini_destruct_ctrlrs();
7282 : } else {
7283 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
7284 0 : stop_discovery(ctx, check_discovery_fini, NULL);
7285 : }
7286 : }
7287 1 : }
7288 :
7289 : static void
7290 0 : bdev_nvme_verify_pi_error(struct nvme_bdev_io *bio)
7291 : {
7292 0 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7293 0 : struct spdk_bdev *bdev = bdev_io->bdev;
7294 0 : struct spdk_dif_ctx dif_ctx;
7295 0 : struct spdk_dif_error err_blk = {};
7296 : int rc;
7297 0 : struct spdk_dif_ctx_init_ext_opts dif_opts;
7298 :
7299 0 : dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
7300 0 : dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16;
7301 0 : rc = spdk_dif_ctx_init(&dif_ctx,
7302 0 : bdev->blocklen, bdev->md_len, bdev->md_interleave,
7303 0 : bdev->dif_is_head_of_md, bdev->dif_type,
7304 : bdev_io->u.bdev.dif_check_flags,
7305 0 : bdev_io->u.bdev.offset_blocks, 0, 0, 0, 0, &dif_opts);
7306 0 : if (rc != 0) {
7307 0 : SPDK_ERRLOG("Initialization of DIF context failed\n");
7308 0 : return;
7309 : }
7310 :
7311 0 : if (bdev->md_interleave) {
7312 0 : rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
7313 0 : bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
7314 : } else {
7315 0 : struct iovec md_iov = {
7316 0 : .iov_base = bdev_io->u.bdev.md_buf,
7317 0 : .iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len,
7318 : };
7319 :
7320 0 : rc = spdk_dix_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
7321 0 : &md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
7322 : }
7323 :
7324 0 : if (rc != 0) {
7325 0 : SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
7326 : err_blk.err_type, err_blk.err_offset);
7327 : } else {
7328 0 : SPDK_ERRLOG("Hardware reported PI error but SPDK could not find any.\n");
7329 : }
7330 : }
7331 :
7332 : static void
7333 0 : bdev_nvme_no_pi_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
7334 : {
7335 0 : struct nvme_bdev_io *bio = ref;
7336 :
7337 0 : if (spdk_nvme_cpl_is_success(cpl)) {
7338 : /* Run PI verification for read data buffer. */
7339 0 : bdev_nvme_verify_pi_error(bio);
7340 : }
7341 :
7342 : /* Return original completion status */
7343 0 : bdev_nvme_io_complete_nvme_status(bio, &bio->cpl);
7344 0 : }
7345 :
7346 : static void
7347 3 : bdev_nvme_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
7348 : {
7349 3 : struct nvme_bdev_io *bio = ref;
7350 3 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7351 : int ret;
7352 :
7353 3 : if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
7354 0 : SPDK_ERRLOG("readv completed with PI error (sct=%d, sc=%d)\n",
7355 : cpl->status.sct, cpl->status.sc);
7356 :
7357 : /* Save completion status to use after verifying PI error. */
7358 0 : bio->cpl = *cpl;
7359 :
7360 0 : if (spdk_likely(nvme_io_path_is_available(bio->io_path))) {
7361 : /* Read without PI checking to verify PI error. */
7362 0 : ret = bdev_nvme_no_pi_readv(bio,
7363 : bdev_io->u.bdev.iovs,
7364 : bdev_io->u.bdev.iovcnt,
7365 : bdev_io->u.bdev.md_buf,
7366 : bdev_io->u.bdev.num_blocks,
7367 : bdev_io->u.bdev.offset_blocks);
7368 0 : if (ret == 0) {
7369 0 : return;
7370 : }
7371 : }
7372 : }
7373 :
7374 3 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7375 : }
7376 :
7377 : static void
7378 25 : bdev_nvme_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
7379 : {
7380 25 : struct nvme_bdev_io *bio = ref;
7381 :
7382 25 : if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
7383 0 : SPDK_ERRLOG("writev completed with PI error (sct=%d, sc=%d)\n",
7384 : cpl->status.sct, cpl->status.sc);
7385 : /* Run PI verification for write data buffer if PI error is detected. */
7386 0 : bdev_nvme_verify_pi_error(bio);
7387 : }
7388 :
7389 25 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7390 25 : }
7391 :
7392 : static void
7393 0 : bdev_nvme_zone_appendv_done(void *ref, const struct spdk_nvme_cpl *cpl)
7394 : {
7395 0 : struct nvme_bdev_io *bio = ref;
7396 0 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7397 :
7398 : /* spdk_bdev_io_get_append_location() requires that the ALBA is stored in offset_blocks.
7399 : * Additionally, offset_blocks has to be set before calling bdev_nvme_verify_pi_error().
7400 : */
7401 0 : bdev_io->u.bdev.offset_blocks = *(uint64_t *)&cpl->cdw0;
7402 :
7403 0 : if (spdk_nvme_cpl_is_pi_error(cpl)) {
7404 0 : SPDK_ERRLOG("zone append completed with PI error (sct=%d, sc=%d)\n",
7405 : cpl->status.sct, cpl->status.sc);
7406 : /* Run PI verification for zone append data buffer if PI error is detected. */
7407 0 : bdev_nvme_verify_pi_error(bio);
7408 : }
7409 :
7410 0 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7411 0 : }
7412 :
7413 : static void
7414 1 : bdev_nvme_comparev_done(void *ref, const struct spdk_nvme_cpl *cpl)
7415 : {
7416 1 : struct nvme_bdev_io *bio = ref;
7417 :
7418 1 : if (spdk_nvme_cpl_is_pi_error(cpl)) {
7419 0 : SPDK_ERRLOG("comparev completed with PI error (sct=%d, sc=%d)\n",
7420 : cpl->status.sct, cpl->status.sc);
7421 : /* Run PI verification for compare data buffer if PI error is detected. */
7422 0 : bdev_nvme_verify_pi_error(bio);
7423 : }
7424 :
7425 1 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7426 1 : }
7427 :
7428 : static void
7429 4 : bdev_nvme_comparev_and_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
7430 : {
7431 4 : struct nvme_bdev_io *bio = ref;
7432 :
7433 : /* Compare operation completion */
7434 4 : if (!bio->first_fused_completed) {
7435 : /* Save compare result for write callback */
7436 2 : bio->cpl = *cpl;
7437 2 : bio->first_fused_completed = true;
7438 2 : return;
7439 : }
7440 :
7441 : /* Write operation completion */
7442 2 : if (spdk_nvme_cpl_is_error(&bio->cpl)) {
7443 : /* If bio->cpl is already an error, it means the compare operation failed. In that case,
7444 : * complete the IO with the compare operation's status.
7445 : */
7446 1 : if (!spdk_nvme_cpl_is_error(cpl)) {
7447 1 : SPDK_ERRLOG("Unexpected write success after compare failure.\n");
7448 : }
7449 :
7450 1 : bdev_nvme_io_complete_nvme_status(bio, &bio->cpl);
7451 : } else {
7452 1 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7453 : }
7454 : }
7455 :
7456 : static void
7457 1 : bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl)
7458 : {
7459 1 : struct nvme_bdev_io *bio = ref;
7460 :
7461 1 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7462 1 : }
7463 :
7464 : static int
7465 0 : fill_zone_from_report(struct spdk_bdev_zone_info *info, struct spdk_nvme_zns_zone_desc *desc)
7466 : {
7467 0 : switch (desc->zt) {
7468 0 : case SPDK_NVME_ZONE_TYPE_SEQWR:
7469 0 : info->type = SPDK_BDEV_ZONE_TYPE_SEQWR;
7470 0 : break;
7471 0 : default:
7472 0 : SPDK_ERRLOG("Invalid zone type: %#x in zone report\n", desc->zt);
7473 0 : return -EIO;
7474 : }
7475 :
7476 0 : switch (desc->zs) {
7477 0 : case SPDK_NVME_ZONE_STATE_EMPTY:
7478 0 : info->state = SPDK_BDEV_ZONE_STATE_EMPTY;
7479 0 : break;
7480 0 : case SPDK_NVME_ZONE_STATE_IOPEN:
7481 0 : info->state = SPDK_BDEV_ZONE_STATE_IMP_OPEN;
7482 0 : break;
7483 0 : case SPDK_NVME_ZONE_STATE_EOPEN:
7484 0 : info->state = SPDK_BDEV_ZONE_STATE_EXP_OPEN;
7485 0 : break;
7486 0 : case SPDK_NVME_ZONE_STATE_CLOSED:
7487 0 : info->state = SPDK_BDEV_ZONE_STATE_CLOSED;
7488 0 : break;
7489 0 : case SPDK_NVME_ZONE_STATE_RONLY:
7490 0 : info->state = SPDK_BDEV_ZONE_STATE_READ_ONLY;
7491 0 : break;
7492 0 : case SPDK_NVME_ZONE_STATE_FULL:
7493 0 : info->state = SPDK_BDEV_ZONE_STATE_FULL;
7494 0 : break;
7495 0 : case SPDK_NVME_ZONE_STATE_OFFLINE:
7496 0 : info->state = SPDK_BDEV_ZONE_STATE_OFFLINE;
7497 0 : break;
7498 0 : default:
7499 0 : SPDK_ERRLOG("Invalid zone state: %#x in zone report\n", desc->zs);
7500 0 : return -EIO;
7501 : }
7502 :
7503 0 : info->zone_id = desc->zslba;
7504 0 : info->write_pointer = desc->wp;
7505 0 : info->capacity = desc->zcap;
7506 :
7507 0 : return 0;
7508 : }
7509 :
7510 : static void
7511 0 : bdev_nvme_get_zone_info_done(void *ref, const struct spdk_nvme_cpl *cpl)
7512 : {
7513 0 : struct nvme_bdev_io *bio = ref;
7514 0 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7515 0 : uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id;
7516 0 : uint32_t zones_to_copy = bdev_io->u.zone_mgmt.num_zones;
7517 0 : struct spdk_bdev_zone_info *info = bdev_io->u.zone_mgmt.buf;
7518 : uint64_t max_zones_per_buf, i;
7519 : uint32_t zone_report_bufsize;
7520 : struct spdk_nvme_ns *ns;
7521 : struct spdk_nvme_qpair *qpair;
7522 : int ret;
7523 :
7524 0 : if (spdk_nvme_cpl_is_error(cpl)) {
7525 0 : goto out_complete_io_nvme_cpl;
7526 : }
7527 :
7528 0 : if (spdk_unlikely(!nvme_io_path_is_available(bio->io_path))) {
7529 0 : ret = -ENXIO;
7530 0 : goto out_complete_io_ret;
7531 : }
7532 :
7533 0 : ns = bio->io_path->nvme_ns->ns;
7534 0 : qpair = bio->io_path->qpair->qpair;
7535 :
7536 0 : zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns);
7537 0 : max_zones_per_buf = (zone_report_bufsize - sizeof(*bio->zone_report_buf)) /
7538 : sizeof(bio->zone_report_buf->descs[0]);
7539 :
7540 0 : if (bio->zone_report_buf->nr_zones > max_zones_per_buf) {
7541 0 : ret = -EINVAL;
7542 0 : goto out_complete_io_ret;
7543 : }
7544 :
7545 0 : if (!bio->zone_report_buf->nr_zones) {
7546 0 : ret = -EINVAL;
7547 0 : goto out_complete_io_ret;
7548 : }
7549 :
7550 0 : for (i = 0; i < bio->zone_report_buf->nr_zones && bio->handled_zones < zones_to_copy; i++) {
7551 0 : ret = fill_zone_from_report(&info[bio->handled_zones],
7552 0 : &bio->zone_report_buf->descs[i]);
7553 0 : if (ret) {
7554 0 : goto out_complete_io_ret;
7555 : }
7556 0 : bio->handled_zones++;
7557 : }
7558 :
7559 0 : if (bio->handled_zones < zones_to_copy) {
7560 0 : uint64_t zone_size_lba = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
7561 0 : uint64_t slba = zone_id + (zone_size_lba * bio->handled_zones);
7562 :
7563 0 : memset(bio->zone_report_buf, 0, zone_report_bufsize);
7564 0 : ret = spdk_nvme_zns_report_zones(ns, qpair,
7565 0 : bio->zone_report_buf, zone_report_bufsize,
7566 : slba, SPDK_NVME_ZRA_LIST_ALL, true,
7567 : bdev_nvme_get_zone_info_done, bio);
7568 0 : if (!ret) {
7569 0 : return;
7570 : } else {
7571 0 : goto out_complete_io_ret;
7572 : }
7573 : }
7574 :
7575 0 : out_complete_io_nvme_cpl:
7576 0 : free(bio->zone_report_buf);
7577 0 : bio->zone_report_buf = NULL;
7578 0 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7579 0 : return;
7580 :
7581 0 : out_complete_io_ret:
7582 0 : free(bio->zone_report_buf);
7583 0 : bio->zone_report_buf = NULL;
7584 0 : bdev_nvme_io_complete(bio, ret);
7585 : }
7586 :
7587 : static void
7588 0 : bdev_nvme_zone_management_done(void *ref, const struct spdk_nvme_cpl *cpl)
7589 : {
7590 0 : struct nvme_bdev_io *bio = ref;
7591 :
7592 0 : bdev_nvme_io_complete_nvme_status(bio, cpl);
7593 0 : }
7594 :
7595 : static void
7596 4 : bdev_nvme_admin_passthru_complete_nvme_status(void *ctx)
7597 : {
7598 4 : struct nvme_bdev_io *bio = ctx;
7599 4 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7600 4 : const struct spdk_nvme_cpl *cpl = &bio->cpl;
7601 :
7602 4 : assert(bdev_nvme_io_type_is_admin(bdev_io->type));
7603 :
7604 4 : __bdev_nvme_io_complete(bdev_io, 0, cpl);
7605 4 : }
7606 :
7607 : static void
7608 3 : bdev_nvme_abort_complete(void *ctx)
7609 : {
7610 3 : struct nvme_bdev_io *bio = ctx;
7611 3 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7612 :
7613 3 : if (spdk_nvme_cpl_is_abort_success(&bio->cpl)) {
7614 3 : __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS, NULL);
7615 : } else {
7616 0 : __bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED, NULL);
7617 : }
7618 3 : }
7619 :
7620 : static void
7621 3 : bdev_nvme_abort_done(void *ref, const struct spdk_nvme_cpl *cpl)
7622 : {
7623 3 : struct nvme_bdev_io *bio = ref;
7624 3 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7625 :
7626 3 : bio->cpl = *cpl;
7627 3 : spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), bdev_nvme_abort_complete, bio);
7628 3 : }
7629 :
7630 : static void
7631 4 : bdev_nvme_admin_passthru_done(void *ref, const struct spdk_nvme_cpl *cpl)
7632 : {
7633 4 : struct nvme_bdev_io *bio = ref;
7634 4 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7635 :
7636 4 : bio->cpl = *cpl;
7637 4 : spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io),
7638 : bdev_nvme_admin_passthru_complete_nvme_status, bio);
7639 4 : }
7640 :
7641 : static void
7642 0 : bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset)
7643 : {
7644 0 : struct nvme_bdev_io *bio = ref;
7645 : struct iovec *iov;
7646 :
7647 0 : bio->iov_offset = sgl_offset;
7648 0 : for (bio->iovpos = 0; bio->iovpos < bio->iovcnt; bio->iovpos++) {
7649 0 : iov = &bio->iovs[bio->iovpos];
7650 0 : if (bio->iov_offset < iov->iov_len) {
7651 0 : break;
7652 : }
7653 :
7654 0 : bio->iov_offset -= iov->iov_len;
7655 : }
7656 0 : }
7657 :
7658 : static int
7659 0 : bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length)
7660 : {
7661 0 : struct nvme_bdev_io *bio = ref;
7662 : struct iovec *iov;
7663 :
7664 0 : assert(bio->iovpos < bio->iovcnt);
7665 :
7666 0 : iov = &bio->iovs[bio->iovpos];
7667 :
7668 0 : *address = iov->iov_base;
7669 0 : *length = iov->iov_len;
7670 :
7671 0 : if (bio->iov_offset) {
7672 0 : assert(bio->iov_offset <= iov->iov_len);
7673 0 : *address += bio->iov_offset;
7674 0 : *length -= bio->iov_offset;
7675 : }
7676 :
7677 0 : bio->iov_offset += *length;
7678 0 : if (bio->iov_offset == iov->iov_len) {
7679 0 : bio->iovpos++;
7680 0 : bio->iov_offset = 0;
7681 : }
7682 :
7683 0 : return 0;
7684 : }
7685 :
7686 : static void
7687 0 : bdev_nvme_queued_reset_fused_sgl(void *ref, uint32_t sgl_offset)
7688 : {
7689 0 : struct nvme_bdev_io *bio = ref;
7690 : struct iovec *iov;
7691 :
7692 0 : bio->fused_iov_offset = sgl_offset;
7693 0 : for (bio->fused_iovpos = 0; bio->fused_iovpos < bio->fused_iovcnt; bio->fused_iovpos++) {
7694 0 : iov = &bio->fused_iovs[bio->fused_iovpos];
7695 0 : if (bio->fused_iov_offset < iov->iov_len) {
7696 0 : break;
7697 : }
7698 :
7699 0 : bio->fused_iov_offset -= iov->iov_len;
7700 : }
7701 0 : }
7702 :
7703 : static int
7704 0 : bdev_nvme_queued_next_fused_sge(void *ref, void **address, uint32_t *length)
7705 : {
7706 0 : struct nvme_bdev_io *bio = ref;
7707 : struct iovec *iov;
7708 :
7709 0 : assert(bio->fused_iovpos < bio->fused_iovcnt);
7710 :
7711 0 : iov = &bio->fused_iovs[bio->fused_iovpos];
7712 :
7713 0 : *address = iov->iov_base;
7714 0 : *length = iov->iov_len;
7715 :
7716 0 : if (bio->fused_iov_offset) {
7717 0 : assert(bio->fused_iov_offset <= iov->iov_len);
7718 0 : *address += bio->fused_iov_offset;
7719 0 : *length -= bio->fused_iov_offset;
7720 : }
7721 :
7722 0 : bio->fused_iov_offset += *length;
7723 0 : if (bio->fused_iov_offset == iov->iov_len) {
7724 0 : bio->fused_iovpos++;
7725 0 : bio->fused_iov_offset = 0;
7726 : }
7727 :
7728 0 : return 0;
7729 : }
7730 :
7731 : static int
7732 0 : bdev_nvme_no_pi_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
7733 : void *md, uint64_t lba_count, uint64_t lba)
7734 : {
7735 : int rc;
7736 :
7737 0 : SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 " without PI check\n",
7738 : lba_count, lba);
7739 :
7740 0 : bio->iovs = iov;
7741 0 : bio->iovcnt = iovcnt;
7742 0 : bio->iovpos = 0;
7743 0 : bio->iov_offset = 0;
7744 :
7745 0 : rc = spdk_nvme_ns_cmd_readv_with_md(bio->io_path->nvme_ns->ns,
7746 0 : bio->io_path->qpair->qpair,
7747 : lba, lba_count,
7748 : bdev_nvme_no_pi_readv_done, bio, 0,
7749 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
7750 : md, 0, 0);
7751 :
7752 0 : if (rc != 0 && rc != -ENOMEM) {
7753 0 : SPDK_ERRLOG("no_pi_readv failed: rc = %d\n", rc);
7754 : }
7755 0 : return rc;
7756 : }
7757 :
7758 : static int
7759 3 : bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
7760 : void *md, uint64_t lba_count, uint64_t lba, uint32_t flags,
7761 : struct spdk_memory_domain *domain, void *domain_ctx,
7762 : struct spdk_accel_sequence *seq)
7763 : {
7764 3 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
7765 3 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
7766 : int rc;
7767 :
7768 3 : SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 "\n",
7769 : lba_count, lba);
7770 :
7771 3 : bio->iovs = iov;
7772 3 : bio->iovcnt = iovcnt;
7773 3 : bio->iovpos = 0;
7774 3 : bio->iov_offset = 0;
7775 :
7776 3 : if (domain != NULL || seq != NULL) {
7777 1 : bio->ext_opts.size = SPDK_SIZEOF(&bio->ext_opts, accel_sequence);
7778 1 : bio->ext_opts.memory_domain = domain;
7779 1 : bio->ext_opts.memory_domain_ctx = domain_ctx;
7780 1 : bio->ext_opts.io_flags = flags;
7781 1 : bio->ext_opts.metadata = md;
7782 1 : bio->ext_opts.accel_sequence = seq;
7783 :
7784 1 : if (iovcnt == 1) {
7785 1 : rc = spdk_nvme_ns_cmd_read_ext(ns, qpair, iov[0].iov_base, lba, lba_count, bdev_nvme_readv_done,
7786 : bio, &bio->ext_opts);
7787 : } else {
7788 0 : rc = spdk_nvme_ns_cmd_readv_ext(ns, qpair, lba, lba_count,
7789 : bdev_nvme_readv_done, bio,
7790 : bdev_nvme_queued_reset_sgl,
7791 : bdev_nvme_queued_next_sge,
7792 : &bio->ext_opts);
7793 : }
7794 2 : } else if (iovcnt == 1) {
7795 2 : rc = spdk_nvme_ns_cmd_read_with_md(ns, qpair, iov[0].iov_base,
7796 : md, lba, lba_count, bdev_nvme_readv_done,
7797 : bio, flags, 0, 0);
7798 : } else {
7799 0 : rc = spdk_nvme_ns_cmd_readv_with_md(ns, qpair, lba, lba_count,
7800 : bdev_nvme_readv_done, bio, flags,
7801 : bdev_nvme_queued_reset_sgl,
7802 : bdev_nvme_queued_next_sge, md, 0, 0);
7803 : }
7804 :
7805 3 : if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
7806 0 : SPDK_ERRLOG("readv failed: rc = %d\n", rc);
7807 : }
7808 3 : return rc;
7809 : }
7810 :
7811 : static int
7812 25 : bdev_nvme_writev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
7813 : void *md, uint64_t lba_count, uint64_t lba, uint32_t flags,
7814 : struct spdk_memory_domain *domain, void *domain_ctx,
7815 : struct spdk_accel_sequence *seq,
7816 : union spdk_bdev_nvme_cdw12 cdw12, union spdk_bdev_nvme_cdw13 cdw13)
7817 : {
7818 25 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
7819 25 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
7820 : int rc;
7821 :
7822 25 : SPDK_DEBUGLOG(bdev_nvme, "write %" PRIu64 " blocks with offset %#" PRIx64 "\n",
7823 : lba_count, lba);
7824 :
7825 25 : bio->iovs = iov;
7826 25 : bio->iovcnt = iovcnt;
7827 25 : bio->iovpos = 0;
7828 25 : bio->iov_offset = 0;
7829 :
7830 25 : if (domain != NULL || seq != NULL) {
7831 0 : bio->ext_opts.size = SPDK_SIZEOF(&bio->ext_opts, accel_sequence);
7832 0 : bio->ext_opts.memory_domain = domain;
7833 0 : bio->ext_opts.memory_domain_ctx = domain_ctx;
7834 0 : bio->ext_opts.io_flags = flags | SPDK_NVME_IO_FLAGS_DIRECTIVE(cdw12.write.dtype);
7835 0 : bio->ext_opts.cdw13 = cdw13.raw;
7836 0 : bio->ext_opts.metadata = md;
7837 0 : bio->ext_opts.accel_sequence = seq;
7838 :
7839 0 : if (iovcnt == 1) {
7840 0 : rc = spdk_nvme_ns_cmd_write_ext(ns, qpair, iov[0].iov_base, lba, lba_count, bdev_nvme_writev_done,
7841 : bio, &bio->ext_opts);
7842 : } else {
7843 0 : rc = spdk_nvme_ns_cmd_writev_ext(ns, qpair, lba, lba_count,
7844 : bdev_nvme_writev_done, bio,
7845 : bdev_nvme_queued_reset_sgl,
7846 : bdev_nvme_queued_next_sge,
7847 : &bio->ext_opts);
7848 : }
7849 25 : } else if (iovcnt == 1) {
7850 25 : rc = spdk_nvme_ns_cmd_write_with_md(ns, qpair, iov[0].iov_base,
7851 : md, lba, lba_count, bdev_nvme_writev_done,
7852 : bio, flags, 0, 0);
7853 : } else {
7854 0 : rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count,
7855 : bdev_nvme_writev_done, bio, flags,
7856 : bdev_nvme_queued_reset_sgl,
7857 : bdev_nvme_queued_next_sge, md, 0, 0);
7858 : }
7859 :
7860 25 : if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
7861 0 : SPDK_ERRLOG("writev failed: rc = %d\n", rc);
7862 : }
7863 25 : return rc;
7864 : }
7865 :
7866 : static int
7867 0 : bdev_nvme_zone_appendv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
7868 : void *md, uint64_t lba_count, uint64_t zslba,
7869 : uint32_t flags)
7870 : {
7871 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
7872 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
7873 : int rc;
7874 :
7875 0 : SPDK_DEBUGLOG(bdev_nvme, "zone append %" PRIu64 " blocks to zone start lba %#" PRIx64 "\n",
7876 : lba_count, zslba);
7877 :
7878 0 : bio->iovs = iov;
7879 0 : bio->iovcnt = iovcnt;
7880 0 : bio->iovpos = 0;
7881 0 : bio->iov_offset = 0;
7882 :
7883 0 : if (iovcnt == 1) {
7884 0 : rc = spdk_nvme_zns_zone_append_with_md(ns, qpair, iov[0].iov_base, md, zslba,
7885 : lba_count,
7886 : bdev_nvme_zone_appendv_done, bio,
7887 : flags,
7888 : 0, 0);
7889 : } else {
7890 0 : rc = spdk_nvme_zns_zone_appendv_with_md(ns, qpair, zslba, lba_count,
7891 : bdev_nvme_zone_appendv_done, bio, flags,
7892 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
7893 : md, 0, 0);
7894 : }
7895 :
7896 0 : if (rc != 0 && rc != -ENOMEM) {
7897 0 : SPDK_ERRLOG("zone append failed: rc = %d\n", rc);
7898 : }
7899 0 : return rc;
7900 : }
7901 :
7902 : static int
7903 1 : bdev_nvme_comparev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
7904 : void *md, uint64_t lba_count, uint64_t lba,
7905 : uint32_t flags)
7906 : {
7907 : int rc;
7908 :
7909 1 : SPDK_DEBUGLOG(bdev_nvme, "compare %" PRIu64 " blocks with offset %#" PRIx64 "\n",
7910 : lba_count, lba);
7911 :
7912 1 : bio->iovs = iov;
7913 1 : bio->iovcnt = iovcnt;
7914 1 : bio->iovpos = 0;
7915 1 : bio->iov_offset = 0;
7916 :
7917 1 : rc = spdk_nvme_ns_cmd_comparev_with_md(bio->io_path->nvme_ns->ns,
7918 1 : bio->io_path->qpair->qpair,
7919 : lba, lba_count,
7920 : bdev_nvme_comparev_done, bio, flags,
7921 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
7922 : md, 0, 0);
7923 :
7924 1 : if (rc != 0 && rc != -ENOMEM) {
7925 0 : SPDK_ERRLOG("comparev failed: rc = %d\n", rc);
7926 : }
7927 1 : return rc;
7928 : }
7929 :
7930 : static int
7931 2 : bdev_nvme_comparev_and_writev(struct nvme_bdev_io *bio, struct iovec *cmp_iov, int cmp_iovcnt,
7932 : struct iovec *write_iov, int write_iovcnt,
7933 : void *md, uint64_t lba_count, uint64_t lba, uint32_t flags)
7934 : {
7935 2 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
7936 2 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
7937 2 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
7938 : int rc;
7939 :
7940 2 : SPDK_DEBUGLOG(bdev_nvme, "compare and write %" PRIu64 " blocks with offset %#" PRIx64 "\n",
7941 : lba_count, lba);
7942 :
7943 2 : bio->iovs = cmp_iov;
7944 2 : bio->iovcnt = cmp_iovcnt;
7945 2 : bio->iovpos = 0;
7946 2 : bio->iov_offset = 0;
7947 2 : bio->fused_iovs = write_iov;
7948 2 : bio->fused_iovcnt = write_iovcnt;
7949 2 : bio->fused_iovpos = 0;
7950 2 : bio->fused_iov_offset = 0;
7951 :
7952 2 : if (bdev_io->num_retries == 0) {
7953 2 : bio->first_fused_submitted = false;
7954 2 : bio->first_fused_completed = false;
7955 : }
7956 :
7957 2 : if (!bio->first_fused_submitted) {
7958 2 : flags |= SPDK_NVME_IO_FLAGS_FUSE_FIRST;
7959 2 : memset(&bio->cpl, 0, sizeof(bio->cpl));
7960 :
7961 2 : rc = spdk_nvme_ns_cmd_comparev_with_md(ns, qpair, lba, lba_count,
7962 : bdev_nvme_comparev_and_writev_done, bio, flags,
7963 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, md, 0, 0);
7964 2 : if (rc == 0) {
7965 2 : bio->first_fused_submitted = true;
7966 2 : flags &= ~SPDK_NVME_IO_FLAGS_FUSE_FIRST;
7967 : } else {
7968 0 : if (rc != -ENOMEM) {
7969 0 : SPDK_ERRLOG("compare failed: rc = %d\n", rc);
7970 : }
7971 0 : return rc;
7972 : }
7973 : }
7974 :
7975 2 : flags |= SPDK_NVME_IO_FLAGS_FUSE_SECOND;
7976 :
7977 2 : rc = spdk_nvme_ns_cmd_writev_with_md(ns, qpair, lba, lba_count,
7978 : bdev_nvme_comparev_and_writev_done, bio, flags,
7979 : bdev_nvme_queued_reset_fused_sgl, bdev_nvme_queued_next_fused_sge, md, 0, 0);
7980 2 : if (rc != 0 && rc != -ENOMEM) {
7981 0 : SPDK_ERRLOG("write failed: rc = %d\n", rc);
7982 0 : rc = 0;
7983 : }
7984 :
7985 2 : return rc;
7986 : }
7987 :
7988 : static int
7989 1 : bdev_nvme_unmap(struct nvme_bdev_io *bio, uint64_t offset_blocks, uint64_t num_blocks)
7990 : {
7991 1 : struct spdk_nvme_dsm_range dsm_ranges[SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES];
7992 : struct spdk_nvme_dsm_range *range;
7993 : uint64_t offset, remaining;
7994 : uint64_t num_ranges_u64;
7995 : uint16_t num_ranges;
7996 : int rc;
7997 :
7998 1 : num_ranges_u64 = (num_blocks + SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS - 1) /
7999 : SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
8000 1 : if (num_ranges_u64 > SPDK_COUNTOF(dsm_ranges)) {
8001 0 : SPDK_ERRLOG("Unmap request for %" PRIu64 " blocks is too large\n", num_blocks);
8002 0 : return -EINVAL;
8003 : }
8004 1 : num_ranges = (uint16_t)num_ranges_u64;
8005 :
8006 1 : offset = offset_blocks;
8007 1 : remaining = num_blocks;
8008 1 : range = &dsm_ranges[0];
8009 :
8010 : /* Fill max-size ranges until the remaining blocks fit into one range */
8011 1 : while (remaining > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) {
8012 0 : range->attributes.raw = 0;
8013 0 : range->length = SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
8014 0 : range->starting_lba = offset;
8015 :
8016 0 : offset += SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
8017 0 : remaining -= SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
8018 0 : range++;
8019 : }
8020 :
8021 : /* Final range describes the remaining blocks */
8022 1 : range->attributes.raw = 0;
8023 1 : range->length = remaining;
8024 1 : range->starting_lba = offset;
8025 :
8026 1 : rc = spdk_nvme_ns_cmd_dataset_management(bio->io_path->nvme_ns->ns,
8027 1 : bio->io_path->qpair->qpair,
8028 : SPDK_NVME_DSM_ATTR_DEALLOCATE,
8029 : dsm_ranges, num_ranges,
8030 : bdev_nvme_queued_done, bio);
8031 :
8032 1 : return rc;
8033 : }
8034 :
8035 : static int
8036 0 : bdev_nvme_write_zeroes(struct nvme_bdev_io *bio, uint64_t offset_blocks, uint64_t num_blocks)
8037 : {
8038 0 : if (num_blocks > UINT16_MAX + 1) {
8039 0 : SPDK_ERRLOG("NVMe write zeroes is limited to 16-bit block count\n");
8040 0 : return -EINVAL;
8041 : }
8042 :
8043 0 : return spdk_nvme_ns_cmd_write_zeroes(bio->io_path->nvme_ns->ns,
8044 0 : bio->io_path->qpair->qpair,
8045 : offset_blocks, num_blocks,
8046 : bdev_nvme_queued_done, bio,
8047 : 0);
8048 : }
8049 :
8050 : static int
8051 0 : bdev_nvme_get_zone_info(struct nvme_bdev_io *bio, uint64_t zone_id, uint32_t num_zones,
8052 : struct spdk_bdev_zone_info *info)
8053 : {
8054 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8055 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8056 0 : uint32_t zone_report_bufsize = spdk_nvme_ns_get_max_io_xfer_size(ns);
8057 0 : uint64_t zone_size = spdk_nvme_zns_ns_get_zone_size_sectors(ns);
8058 0 : uint64_t total_zones = spdk_nvme_zns_ns_get_num_zones(ns);
8059 :
8060 0 : if (zone_id % zone_size != 0) {
8061 0 : return -EINVAL;
8062 : }
8063 :
8064 0 : if (num_zones > total_zones || !num_zones) {
8065 0 : return -EINVAL;
8066 : }
8067 :
8068 0 : assert(!bio->zone_report_buf);
8069 0 : bio->zone_report_buf = calloc(1, zone_report_bufsize);
8070 0 : if (!bio->zone_report_buf) {
8071 0 : return -ENOMEM;
8072 : }
8073 :
8074 0 : bio->handled_zones = 0;
8075 :
8076 0 : return spdk_nvme_zns_report_zones(ns, qpair, bio->zone_report_buf, zone_report_bufsize,
8077 : zone_id, SPDK_NVME_ZRA_LIST_ALL, true,
8078 : bdev_nvme_get_zone_info_done, bio);
8079 : }
8080 :
8081 : static int
8082 0 : bdev_nvme_zone_management(struct nvme_bdev_io *bio, uint64_t zone_id,
8083 : enum spdk_bdev_zone_action action)
8084 : {
8085 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8086 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8087 :
8088 0 : switch (action) {
8089 0 : case SPDK_BDEV_ZONE_CLOSE:
8090 0 : return spdk_nvme_zns_close_zone(ns, qpair, zone_id, false,
8091 : bdev_nvme_zone_management_done, bio);
8092 0 : case SPDK_BDEV_ZONE_FINISH:
8093 0 : return spdk_nvme_zns_finish_zone(ns, qpair, zone_id, false,
8094 : bdev_nvme_zone_management_done, bio);
8095 0 : case SPDK_BDEV_ZONE_OPEN:
8096 0 : return spdk_nvme_zns_open_zone(ns, qpair, zone_id, false,
8097 : bdev_nvme_zone_management_done, bio);
8098 0 : case SPDK_BDEV_ZONE_RESET:
8099 0 : return spdk_nvme_zns_reset_zone(ns, qpair, zone_id, false,
8100 : bdev_nvme_zone_management_done, bio);
8101 0 : case SPDK_BDEV_ZONE_OFFLINE:
8102 0 : return spdk_nvme_zns_offline_zone(ns, qpair, zone_id, false,
8103 : bdev_nvme_zone_management_done, bio);
8104 0 : default:
8105 0 : return -EINVAL;
8106 : }
8107 : }
8108 :
8109 : static void
8110 5 : bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio,
8111 : struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes)
8112 : {
8113 : struct nvme_io_path *io_path;
8114 : struct nvme_ctrlr *nvme_ctrlr;
8115 : uint32_t max_xfer_size;
8116 5 : int rc = -ENXIO;
8117 :
8118 : /* Choose the first ctrlr which is not failed. */
8119 8 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
8120 7 : nvme_ctrlr = io_path->qpair->ctrlr;
8121 :
8122 : /* We should skip any unavailable nvme_ctrlr rather than checking
8123 : * if the return value of spdk_nvme_ctrlr_cmd_admin_raw() is -ENXIO.
8124 : */
8125 7 : if (!nvme_ctrlr_is_available(nvme_ctrlr)) {
8126 3 : continue;
8127 : }
8128 :
8129 4 : max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nvme_ctrlr->ctrlr);
8130 :
8131 4 : if (nbytes > max_xfer_size) {
8132 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
8133 0 : rc = -EINVAL;
8134 0 : goto err;
8135 : }
8136 :
8137 4 : rc = spdk_nvme_ctrlr_cmd_admin_raw(nvme_ctrlr->ctrlr, cmd, buf, (uint32_t)nbytes,
8138 : bdev_nvme_admin_passthru_done, bio);
8139 4 : if (rc == 0) {
8140 4 : return;
8141 : }
8142 : }
8143 :
8144 1 : err:
8145 1 : bdev_nvme_admin_complete(bio, rc);
8146 : }
8147 :
8148 : static int
8149 0 : bdev_nvme_io_passthru(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
8150 : void *buf, size_t nbytes)
8151 : {
8152 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8153 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8154 0 : uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
8155 0 : struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
8156 :
8157 0 : if (nbytes > max_xfer_size) {
8158 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
8159 0 : return -EINVAL;
8160 : }
8161 :
8162 : /*
8163 : * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
8164 : * so fill it out automatically.
8165 : */
8166 0 : cmd->nsid = spdk_nvme_ns_get_id(ns);
8167 :
8168 0 : return spdk_nvme_ctrlr_cmd_io_raw(ctrlr, qpair, cmd, buf,
8169 : (uint32_t)nbytes, bdev_nvme_queued_done, bio);
8170 : }
8171 :
8172 : static int
8173 0 : bdev_nvme_io_passthru_md(struct nvme_bdev_io *bio, struct spdk_nvme_cmd *cmd,
8174 : void *buf, size_t nbytes, void *md_buf, size_t md_len)
8175 : {
8176 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8177 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8178 0 : size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(ns);
8179 0 : uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
8180 0 : struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
8181 :
8182 0 : if (nbytes > max_xfer_size) {
8183 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
8184 0 : return -EINVAL;
8185 : }
8186 :
8187 0 : if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(ns)) {
8188 0 : SPDK_ERRLOG("invalid meta data buffer size\n");
8189 0 : return -EINVAL;
8190 : }
8191 :
8192 : /*
8193 : * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
8194 : * so fill it out automatically.
8195 : */
8196 0 : cmd->nsid = spdk_nvme_ns_get_id(ns);
8197 :
8198 0 : return spdk_nvme_ctrlr_cmd_io_raw_with_md(ctrlr, qpair, cmd, buf,
8199 : (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio);
8200 : }
8201 :
8202 : static int
8203 0 : bdev_nvme_iov_passthru_md(struct nvme_bdev_io *bio,
8204 : struct spdk_nvme_cmd *cmd, struct iovec *iov, int iovcnt,
8205 : size_t nbytes, void *md_buf, size_t md_len)
8206 : {
8207 0 : struct spdk_nvme_ns *ns = bio->io_path->nvme_ns->ns;
8208 0 : struct spdk_nvme_qpair *qpair = bio->io_path->qpair->qpair;
8209 0 : size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(ns);
8210 0 : uint32_t max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
8211 0 : struct spdk_nvme_ctrlr *ctrlr = spdk_nvme_ns_get_ctrlr(ns);
8212 :
8213 0 : bio->iovs = iov;
8214 0 : bio->iovcnt = iovcnt;
8215 0 : bio->iovpos = 0;
8216 0 : bio->iov_offset = 0;
8217 :
8218 0 : if (nbytes > max_xfer_size) {
8219 0 : SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
8220 0 : return -EINVAL;
8221 : }
8222 :
8223 0 : if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(ns)) {
8224 0 : SPDK_ERRLOG("invalid meta data buffer size\n");
8225 0 : return -EINVAL;
8226 : }
8227 :
8228 : /*
8229 : * Each NVMe bdev is a specific namespace, and all NVMe I/O commands
8230 : * require a nsid, so fill it out automatically.
8231 : */
8232 0 : cmd->nsid = spdk_nvme_ns_get_id(ns);
8233 :
8234 0 : return spdk_nvme_ctrlr_cmd_iov_raw_with_md(
8235 : ctrlr, qpair, cmd, (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio,
8236 : bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge);
8237 : }
8238 :
8239 : static void
8240 6 : bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio,
8241 : struct nvme_bdev_io *bio_to_abort)
8242 : {
8243 : struct nvme_io_path *io_path;
8244 6 : int rc = 0;
8245 :
8246 6 : rc = bdev_nvme_abort_retry_io(nbdev_ch, bio_to_abort);
8247 6 : if (rc == 0) {
8248 1 : bdev_nvme_admin_complete(bio, 0);
8249 1 : return;
8250 : }
8251 :
8252 5 : io_path = bio_to_abort->io_path;
8253 5 : if (io_path != NULL) {
8254 3 : rc = spdk_nvme_ctrlr_cmd_abort_ext(io_path->qpair->ctrlr->ctrlr,
8255 3 : io_path->qpair->qpair,
8256 : bio_to_abort,
8257 : bdev_nvme_abort_done, bio);
8258 : } else {
8259 3 : STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
8260 2 : rc = spdk_nvme_ctrlr_cmd_abort_ext(io_path->qpair->ctrlr->ctrlr,
8261 : NULL,
8262 : bio_to_abort,
8263 : bdev_nvme_abort_done, bio);
8264 :
8265 2 : if (rc != -ENOENT) {
8266 1 : break;
8267 : }
8268 : }
8269 : }
8270 :
8271 5 : if (rc != 0) {
8272 : /* If no command was found or there was any error, complete the abort
8273 : * request with failure.
8274 : */
8275 2 : bdev_nvme_admin_complete(bio, rc);
8276 : }
8277 : }
8278 :
8279 : static int
8280 0 : bdev_nvme_copy(struct nvme_bdev_io *bio, uint64_t dst_offset_blocks, uint64_t src_offset_blocks,
8281 : uint64_t num_blocks)
8282 : {
8283 0 : struct spdk_nvme_scc_source_range range = {
8284 : .slba = src_offset_blocks,
8285 0 : .nlb = num_blocks - 1
8286 : };
8287 :
8288 0 : return spdk_nvme_ns_cmd_copy(bio->io_path->nvme_ns->ns,
8289 0 : bio->io_path->qpair->qpair,
8290 : &range, 1, dst_offset_blocks,
8291 : bdev_nvme_queued_done, bio);
8292 : }
8293 :
8294 : static void
8295 0 : bdev_nvme_opts_config_json(struct spdk_json_write_ctx *w)
8296 : {
8297 : const char *action;
8298 : uint32_t i;
8299 :
8300 0 : if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET) {
8301 0 : action = "reset";
8302 0 : } else if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT) {
8303 0 : action = "abort";
8304 : } else {
8305 0 : action = "none";
8306 : }
8307 :
8308 0 : spdk_json_write_object_begin(w);
8309 :
8310 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_set_options");
8311 :
8312 0 : spdk_json_write_named_object_begin(w, "params");
8313 0 : spdk_json_write_named_string(w, "action_on_timeout", action);
8314 0 : spdk_json_write_named_uint64(w, "timeout_us", g_opts.timeout_us);
8315 0 : spdk_json_write_named_uint64(w, "timeout_admin_us", g_opts.timeout_admin_us);
8316 0 : spdk_json_write_named_uint32(w, "keep_alive_timeout_ms", g_opts.keep_alive_timeout_ms);
8317 0 : spdk_json_write_named_uint32(w, "arbitration_burst", g_opts.arbitration_burst);
8318 0 : spdk_json_write_named_uint32(w, "low_priority_weight", g_opts.low_priority_weight);
8319 0 : spdk_json_write_named_uint32(w, "medium_priority_weight", g_opts.medium_priority_weight);
8320 0 : spdk_json_write_named_uint32(w, "high_priority_weight", g_opts.high_priority_weight);
8321 0 : spdk_json_write_named_uint64(w, "nvme_adminq_poll_period_us", g_opts.nvme_adminq_poll_period_us);
8322 0 : spdk_json_write_named_uint64(w, "nvme_ioq_poll_period_us", g_opts.nvme_ioq_poll_period_us);
8323 0 : spdk_json_write_named_uint32(w, "io_queue_requests", g_opts.io_queue_requests);
8324 0 : spdk_json_write_named_bool(w, "delay_cmd_submit", g_opts.delay_cmd_submit);
8325 0 : spdk_json_write_named_uint32(w, "transport_retry_count", g_opts.transport_retry_count);
8326 0 : spdk_json_write_named_int32(w, "bdev_retry_count", g_opts.bdev_retry_count);
8327 0 : spdk_json_write_named_uint8(w, "transport_ack_timeout", g_opts.transport_ack_timeout);
8328 0 : spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", g_opts.ctrlr_loss_timeout_sec);
8329 0 : spdk_json_write_named_uint32(w, "reconnect_delay_sec", g_opts.reconnect_delay_sec);
8330 0 : spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec", g_opts.fast_io_fail_timeout_sec);
8331 0 : spdk_json_write_named_bool(w, "disable_auto_failback", g_opts.disable_auto_failback);
8332 0 : spdk_json_write_named_bool(w, "generate_uuids", g_opts.generate_uuids);
8333 0 : spdk_json_write_named_uint8(w, "transport_tos", g_opts.transport_tos);
8334 0 : spdk_json_write_named_bool(w, "nvme_error_stat", g_opts.nvme_error_stat);
8335 0 : spdk_json_write_named_uint32(w, "rdma_srq_size", g_opts.rdma_srq_size);
8336 0 : spdk_json_write_named_bool(w, "io_path_stat", g_opts.io_path_stat);
8337 0 : spdk_json_write_named_bool(w, "allow_accel_sequence", g_opts.allow_accel_sequence);
8338 0 : spdk_json_write_named_uint32(w, "rdma_max_cq_size", g_opts.rdma_max_cq_size);
8339 0 : spdk_json_write_named_uint16(w, "rdma_cm_event_timeout_ms", g_opts.rdma_cm_event_timeout_ms);
8340 0 : spdk_json_write_named_array_begin(w, "dhchap_digests");
8341 0 : for (i = 0; i < 32; ++i) {
8342 0 : if (g_opts.dhchap_digests & SPDK_BIT(i)) {
8343 0 : spdk_json_write_string(w, spdk_nvme_dhchap_get_digest_name(i));
8344 : }
8345 : }
8346 0 : spdk_json_write_array_end(w);
8347 0 : spdk_json_write_named_array_begin(w, "dhchap_dhgroups");
8348 0 : for (i = 0; i < 32; ++i) {
8349 0 : if (g_opts.dhchap_dhgroups & SPDK_BIT(i)) {
8350 0 : spdk_json_write_string(w, spdk_nvme_dhchap_get_dhgroup_name(i));
8351 : }
8352 : }
8353 :
8354 0 : spdk_json_write_array_end(w);
8355 0 : spdk_json_write_object_end(w);
8356 :
8357 0 : spdk_json_write_object_end(w);
8358 0 : }
8359 :
8360 : static void
8361 0 : bdev_nvme_discovery_config_json(struct spdk_json_write_ctx *w, struct discovery_ctx *ctx)
8362 : {
8363 0 : struct spdk_nvme_transport_id trid;
8364 :
8365 0 : spdk_json_write_object_begin(w);
8366 :
8367 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_start_discovery");
8368 :
8369 0 : spdk_json_write_named_object_begin(w, "params");
8370 0 : spdk_json_write_named_string(w, "name", ctx->name);
8371 0 : spdk_json_write_named_string(w, "hostnqn", ctx->hostnqn);
8372 :
8373 0 : trid = ctx->trid;
8374 0 : memset(trid.subnqn, 0, sizeof(trid.subnqn));
8375 0 : nvme_bdev_dump_trid_json(&trid, w);
8376 :
8377 0 : spdk_json_write_named_bool(w, "wait_for_attach", ctx->wait_for_attach);
8378 0 : spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", ctx->bdev_opts.ctrlr_loss_timeout_sec);
8379 0 : spdk_json_write_named_uint32(w, "reconnect_delay_sec", ctx->bdev_opts.reconnect_delay_sec);
8380 0 : spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec",
8381 : ctx->bdev_opts.fast_io_fail_timeout_sec);
8382 0 : spdk_json_write_object_end(w);
8383 :
8384 0 : spdk_json_write_object_end(w);
8385 0 : }
8386 :
8387 : #ifdef SPDK_CONFIG_NVME_CUSE
8388 : static void
8389 0 : nvme_ctrlr_cuse_config_json(struct spdk_json_write_ctx *w,
8390 : struct nvme_ctrlr *nvme_ctrlr)
8391 0 : {
8392 0 : size_t cuse_name_size = 128;
8393 0 : char cuse_name[cuse_name_size];
8394 :
8395 0 : if (spdk_nvme_cuse_get_ctrlr_name(nvme_ctrlr->ctrlr,
8396 : cuse_name, &cuse_name_size) != 0) {
8397 0 : return;
8398 : }
8399 :
8400 0 : spdk_json_write_object_begin(w);
8401 :
8402 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_cuse_register");
8403 :
8404 0 : spdk_json_write_named_object_begin(w, "params");
8405 0 : spdk_json_write_named_string(w, "name", nvme_ctrlr->nbdev_ctrlr->name);
8406 0 : spdk_json_write_object_end(w);
8407 :
8408 0 : spdk_json_write_object_end(w);
8409 : }
8410 : #endif
8411 :
8412 : static void
8413 0 : nvme_ctrlr_config_json(struct spdk_json_write_ctx *w,
8414 : struct nvme_ctrlr *nvme_ctrlr)
8415 : {
8416 : struct spdk_nvme_transport_id *trid;
8417 : const struct spdk_nvme_ctrlr_opts *opts;
8418 :
8419 0 : if (nvme_ctrlr->opts.from_discovery_service) {
8420 : /* Do not emit an RPC for this - it will be implicitly
8421 : * covered by a separate bdev_nvme_start_discovery or
8422 : * bdev_nvme_start_mdns_discovery RPC.
8423 : */
8424 0 : return;
8425 : }
8426 :
8427 0 : trid = &nvme_ctrlr->active_path_id->trid;
8428 :
8429 0 : spdk_json_write_object_begin(w);
8430 :
8431 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_attach_controller");
8432 :
8433 0 : spdk_json_write_named_object_begin(w, "params");
8434 0 : spdk_json_write_named_string(w, "name", nvme_ctrlr->nbdev_ctrlr->name);
8435 0 : nvme_bdev_dump_trid_json(trid, w);
8436 0 : spdk_json_write_named_bool(w, "prchk_reftag",
8437 0 : (nvme_ctrlr->opts.prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) != 0);
8438 0 : spdk_json_write_named_bool(w, "prchk_guard",
8439 0 : (nvme_ctrlr->opts.prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) != 0);
8440 0 : spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", nvme_ctrlr->opts.ctrlr_loss_timeout_sec);
8441 0 : spdk_json_write_named_uint32(w, "reconnect_delay_sec", nvme_ctrlr->opts.reconnect_delay_sec);
8442 0 : spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec",
8443 : nvme_ctrlr->opts.fast_io_fail_timeout_sec);
8444 0 : if (nvme_ctrlr->psk != NULL) {
8445 0 : spdk_json_write_named_string(w, "psk", spdk_key_get_name(nvme_ctrlr->psk));
8446 0 : } else if (nvme_ctrlr->opts.psk[0] != '\0') {
8447 0 : spdk_json_write_named_string(w, "psk", nvme_ctrlr->opts.psk);
8448 : }
8449 :
8450 0 : opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr);
8451 0 : spdk_json_write_named_string(w, "hostnqn", opts->hostnqn);
8452 0 : spdk_json_write_named_bool(w, "hdgst", opts->header_digest);
8453 0 : spdk_json_write_named_bool(w, "ddgst", opts->data_digest);
8454 0 : if (opts->src_addr[0] != '\0') {
8455 0 : spdk_json_write_named_string(w, "hostaddr", opts->src_addr);
8456 : }
8457 0 : if (opts->src_svcid[0] != '\0') {
8458 0 : spdk_json_write_named_string(w, "hostsvcid", opts->src_svcid);
8459 : }
8460 :
8461 0 : spdk_json_write_object_end(w);
8462 :
8463 0 : spdk_json_write_object_end(w);
8464 : }
8465 :
8466 : static void
8467 0 : bdev_nvme_hotplug_config_json(struct spdk_json_write_ctx *w)
8468 : {
8469 0 : spdk_json_write_object_begin(w);
8470 0 : spdk_json_write_named_string(w, "method", "bdev_nvme_set_hotplug");
8471 :
8472 0 : spdk_json_write_named_object_begin(w, "params");
8473 0 : spdk_json_write_named_uint64(w, "period_us", g_nvme_hotplug_poll_period_us);
8474 0 : spdk_json_write_named_bool(w, "enable", g_nvme_hotplug_enabled);
8475 0 : spdk_json_write_object_end(w);
8476 :
8477 0 : spdk_json_write_object_end(w);
8478 0 : }
8479 :
8480 : static int
8481 0 : bdev_nvme_config_json(struct spdk_json_write_ctx *w)
8482 : {
8483 : struct nvme_bdev_ctrlr *nbdev_ctrlr;
8484 : struct nvme_ctrlr *nvme_ctrlr;
8485 : struct discovery_ctx *ctx;
8486 :
8487 0 : bdev_nvme_opts_config_json(w);
8488 :
8489 0 : pthread_mutex_lock(&g_bdev_nvme_mutex);
8490 :
8491 0 : TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
8492 0 : TAILQ_FOREACH(nvme_ctrlr, &nbdev_ctrlr->ctrlrs, tailq) {
8493 0 : nvme_ctrlr_config_json(w, nvme_ctrlr);
8494 :
8495 : #ifdef SPDK_CONFIG_NVME_CUSE
8496 0 : nvme_ctrlr_cuse_config_json(w, nvme_ctrlr);
8497 : #endif
8498 : }
8499 : }
8500 :
8501 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
8502 0 : if (!ctx->from_mdns_discovery_service) {
8503 0 : bdev_nvme_discovery_config_json(w, ctx);
8504 : }
8505 : }
8506 :
8507 0 : bdev_nvme_mdns_discovery_config_json(w);
8508 :
8509 : /* Dump as last parameter to give all NVMe bdevs chance to be constructed
8510 : * before enabling hotplug poller.
8511 : */
8512 0 : bdev_nvme_hotplug_config_json(w);
8513 :
8514 0 : pthread_mutex_unlock(&g_bdev_nvme_mutex);
8515 0 : return 0;
8516 : }
8517 :
8518 : struct spdk_nvme_ctrlr *
8519 1 : bdev_nvme_get_ctrlr(struct spdk_bdev *bdev)
8520 : {
8521 : struct nvme_bdev *nbdev;
8522 : struct nvme_ns *nvme_ns;
8523 :
8524 1 : if (!bdev || bdev->module != &nvme_if) {
8525 0 : return NULL;
8526 : }
8527 :
8528 1 : nbdev = SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk);
8529 1 : nvme_ns = TAILQ_FIRST(&nbdev->nvme_ns_list);
8530 1 : assert(nvme_ns != NULL);
8531 :
8532 1 : return nvme_ns->ctrlr->ctrlr;
8533 : }
8534 :
8535 : void
8536 0 : nvme_io_path_info_json(struct spdk_json_write_ctx *w, struct nvme_io_path *io_path)
8537 : {
8538 0 : struct nvme_ns *nvme_ns = io_path->nvme_ns;
8539 0 : struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
8540 : const struct spdk_nvme_ctrlr_data *cdata;
8541 : const struct spdk_nvme_transport_id *trid;
8542 : const struct nvme_bdev_channel *nbdev_ch;
8543 : const char *adrfam_str;
8544 : bool current;
8545 :
8546 0 : spdk_json_write_object_begin(w);
8547 :
8548 0 : spdk_json_write_named_string(w, "bdev_name", nvme_ns->bdev->disk.name);
8549 :
8550 0 : cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);
8551 0 : trid = spdk_nvme_ctrlr_get_transport_id(nvme_ctrlr->ctrlr);
8552 :
8553 0 : spdk_json_write_named_uint32(w, "cntlid", cdata->cntlid);
8554 0 : nbdev_ch = io_path->nbdev_ch;
8555 0 : if (nbdev_ch == NULL) {
8556 0 : current = false;
8557 0 : } else if (nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE) {
8558 0 : struct nvme_io_path *optimized_io_path = NULL;
8559 :
8560 0 : STAILQ_FOREACH(optimized_io_path, &nbdev_ch->io_path_list, stailq) {
8561 0 : if (optimized_io_path->nvme_ns->ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE) {
8562 0 : break;
8563 : }
8564 : }
8565 :
8566 0 : current = nvme_io_path_is_available(io_path);
8567 0 : if (io_path->nvme_ns->ana_state == SPDK_NVME_ANA_NON_OPTIMIZED_STATE) {
8568 : /* A non-optimized path is only current if there are no optimized paths. */
8569 0 : current = current && (optimized_io_path == NULL);
8570 : }
8571 : } else {
8572 0 : if (nbdev_ch->current_io_path) {
8573 0 : current = (io_path == nbdev_ch->current_io_path);
8574 : } else {
8575 : struct nvme_io_path *first_path;
8576 :
8577 : /* We arrived here as there are no optimized paths for active-passive
8578 : * mode. Check if this io_path is the first one available on the list.
8579 : */
8580 0 : current = false;
8581 0 : STAILQ_FOREACH(first_path, &nbdev_ch->io_path_list, stailq) {
8582 0 : if (nvme_io_path_is_available(first_path)) {
8583 0 : current = (io_path == first_path);
8584 0 : break;
8585 : }
8586 : }
8587 : }
8588 : }
8589 0 : spdk_json_write_named_bool(w, "current", current);
8590 0 : spdk_json_write_named_bool(w, "connected", nvme_qpair_is_connected(io_path->qpair));
8591 0 : spdk_json_write_named_bool(w, "accessible", nvme_ns_is_accessible(nvme_ns));
8592 :
8593 0 : spdk_json_write_named_object_begin(w, "transport");
8594 0 : spdk_json_write_named_string(w, "trtype", trid->trstring);
8595 0 : spdk_json_write_named_string(w, "traddr", trid->traddr);
8596 0 : if (trid->trsvcid[0] != '\0') {
8597 0 : spdk_json_write_named_string(w, "trsvcid", trid->trsvcid);
8598 : }
8599 0 : adrfam_str = spdk_nvme_transport_id_adrfam_str(trid->adrfam);
8600 0 : if (adrfam_str) {
8601 0 : spdk_json_write_named_string(w, "adrfam", adrfam_str);
8602 : }
8603 0 : spdk_json_write_object_end(w);
8604 :
8605 0 : spdk_json_write_object_end(w);
8606 0 : }
8607 :
8608 : void
8609 0 : bdev_nvme_get_discovery_info(struct spdk_json_write_ctx *w)
8610 : {
8611 : struct discovery_ctx *ctx;
8612 : struct discovery_entry_ctx *entry_ctx;
8613 :
8614 0 : spdk_json_write_array_begin(w);
8615 0 : TAILQ_FOREACH(ctx, &g_discovery_ctxs, tailq) {
8616 0 : spdk_json_write_object_begin(w);
8617 0 : spdk_json_write_named_string(w, "name", ctx->name);
8618 :
8619 0 : spdk_json_write_named_object_begin(w, "trid");
8620 0 : nvme_bdev_dump_trid_json(&ctx->trid, w);
8621 0 : spdk_json_write_object_end(w);
8622 :
8623 0 : spdk_json_write_named_array_begin(w, "referrals");
8624 0 : TAILQ_FOREACH(entry_ctx, &ctx->discovery_entry_ctxs, tailq) {
8625 0 : spdk_json_write_object_begin(w);
8626 0 : spdk_json_write_named_object_begin(w, "trid");
8627 0 : nvme_bdev_dump_trid_json(&entry_ctx->trid, w);
8628 0 : spdk_json_write_object_end(w);
8629 0 : spdk_json_write_object_end(w);
8630 : }
8631 0 : spdk_json_write_array_end(w);
8632 :
8633 0 : spdk_json_write_object_end(w);
8634 : }
8635 0 : spdk_json_write_array_end(w);
8636 0 : }
8637 :
8638 1 : SPDK_LOG_REGISTER_COMPONENT(bdev_nvme)
8639 :
8640 1 : SPDK_TRACE_REGISTER_FN(bdev_nvme_trace, "bdev_nvme", TRACE_GROUP_BDEV_NVME)
8641 : {
8642 0 : struct spdk_trace_tpoint_opts opts[] = {
8643 : {
8644 : "BDEV_NVME_IO_START", TRACE_BDEV_NVME_IO_START,
8645 : OWNER_TYPE_NONE, OBJECT_BDEV_NVME_IO, 1,
8646 : {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }}
8647 : },
8648 : {
8649 : "BDEV_NVME_IO_DONE", TRACE_BDEV_NVME_IO_DONE,
8650 : OWNER_TYPE_NONE, OBJECT_BDEV_NVME_IO, 0,
8651 : {{ "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }}
8652 : }
8653 : };
8654 :
8655 :
8656 0 : spdk_trace_register_object(OBJECT_BDEV_NVME_IO, 'N');
8657 0 : spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
8658 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_PCIE_SUBMIT, OBJECT_BDEV_NVME_IO, 0);
8659 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_TCP_SUBMIT, OBJECT_BDEV_NVME_IO, 0);
8660 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_PCIE_COMPLETE, OBJECT_BDEV_NVME_IO, 0);
8661 0 : spdk_trace_tpoint_register_relation(TRACE_NVME_TCP_COMPLETE, OBJECT_BDEV_NVME_IO, 0);
8662 0 : }
|