Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2019 Intel Corporation.
3 : * All rights reserved.
4 : */
5 : #include "spdk/stdinc.h"
6 : #include "spdk/string.h"
7 : #include "spdk/config.h"
8 : #include "spdk/fd_group.h"
9 : #include "spdk/log.h"
10 : #include "spdk/nvme.h"
11 :
12 : #define FUSE_USE_VERSION 31
13 :
14 : #include <fuse3/cuse_lowlevel.h>
15 :
16 : #include <linux/nvme_ioctl.h>
17 : #include <linux/fs.h>
18 :
19 : #include "nvme_internal.h"
20 : #include "nvme_io_msg.h"
21 : #include "nvme_cuse.h"
22 :
23 : struct cuse_device {
24 : bool force_exit;
25 : char dev_name[128];
26 : uint32_t index;
27 : int claim_fd;
28 : char lock_name[64];
29 :
30 : struct spdk_nvme_ctrlr *ctrlr; /**< NVMe controller */
31 : uint32_t nsid; /**< NVMe name space id, or 0 */
32 :
33 : struct fuse_session *session;
34 : int fuse_efd;
35 :
36 : struct cuse_device *ctrlr_device;
37 : TAILQ_HEAD(, cuse_device) ns_devices;
38 :
39 : TAILQ_ENTRY(cuse_device) tailq;
40 : TAILQ_ENTRY(cuse_device) cuse_thread_tailq;
41 : };
42 :
43 : static pthread_mutex_t g_cuse_mtx = PTHREAD_MUTEX_INITIALIZER;
44 : static TAILQ_HEAD(, cuse_device) g_ctrlr_ctx_head = TAILQ_HEAD_INITIALIZER(g_ctrlr_ctx_head);
45 : static struct spdk_bit_array *g_ctrlr_started;
46 :
47 : static pthread_mutex_t g_pending_device_mtx = PTHREAD_MUTEX_INITIALIZER;
48 : static struct spdk_fd_group *g_device_fdgrp;
49 : static int g_cuse_thread_msg_fd;
50 : static TAILQ_HEAD(, cuse_device) g_pending_device_head = TAILQ_HEAD_INITIALIZER(
51 : g_pending_device_head);
52 : static TAILQ_HEAD(, cuse_device) g_active_device_head = TAILQ_HEAD_INITIALIZER(
53 : g_active_device_head);
54 :
55 : struct cuse_io_ctx {
56 : struct spdk_nvme_cmd nvme_cmd;
57 : enum spdk_nvme_data_transfer data_transfer;
58 :
59 : uint64_t lba;
60 : uint32_t lba_count;
61 : uint16_t apptag;
62 : uint16_t appmask;
63 :
64 : void *data;
65 : void *metadata;
66 :
67 : int data_len;
68 : int metadata_len;
69 :
70 : fuse_req_t req;
71 : };
72 :
73 : static void
74 8 : cuse_io_ctx_free(struct cuse_io_ctx *ctx)
75 : {
76 8 : spdk_free(ctx->data);
77 8 : spdk_free(ctx->metadata);
78 8 : free(ctx);
79 8 : }
80 :
81 : #define FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, val) \
82 : if (out_bufsz == 0) { \
83 : struct iovec out_iov; \
84 : out_iov.iov_base = (void *)arg; \
85 : out_iov.iov_len = sizeof(val); \
86 : fuse_reply_ioctl_retry(req, NULL, 0, &out_iov, 1); \
87 : return; \
88 : }
89 :
90 : #define FUSE_MAX_SIZE 128*1024
91 :
92 : static bool
93 2 : fuse_check_req_size(fuse_req_t req, struct iovec iov[], int iovcnt)
94 : {
95 2 : int total_iov_len = 0;
96 5 : for (int i = 0; i < iovcnt; i++) {
97 3 : total_iov_len += iov[i].iov_len;
98 3 : if (total_iov_len > FUSE_MAX_SIZE) {
99 0 : fuse_reply_err(req, ENOMEM);
100 0 : SPDK_ERRLOG("FUSE request cannot be larger that %d\n", FUSE_MAX_SIZE);
101 0 : return false;
102 : }
103 : }
104 2 : return true;
105 : }
106 :
107 : static void
108 0 : cuse_nvme_passthru_cmd_cb(void *arg, const struct spdk_nvme_cpl *cpl)
109 : {
110 0 : struct cuse_io_ctx *ctx = arg;
111 0 : struct iovec out_iov[3];
112 0 : struct spdk_nvme_cpl _cpl;
113 0 : int out_iovcnt = 0;
114 0 : uint16_t status_field = cpl->status_raw >> 1; /* Drop out phase bit */
115 :
116 0 : memcpy(&_cpl, cpl, sizeof(struct spdk_nvme_cpl));
117 0 : out_iov[out_iovcnt].iov_base = &_cpl.cdw0;
118 0 : out_iov[out_iovcnt].iov_len = sizeof(_cpl.cdw0);
119 0 : out_iovcnt += 1;
120 :
121 0 : if (ctx->data_transfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
122 0 : if (ctx->data_len > 0) {
123 0 : out_iov[out_iovcnt].iov_base = ctx->data;
124 0 : out_iov[out_iovcnt].iov_len = ctx->data_len;
125 0 : out_iovcnt += 1;
126 : }
127 0 : if (ctx->metadata_len > 0) {
128 0 : out_iov[out_iovcnt].iov_base = ctx->metadata;
129 0 : out_iov[out_iovcnt].iov_len = ctx->metadata_len;
130 0 : out_iovcnt += 1;
131 : }
132 : }
133 :
134 0 : fuse_reply_ioctl_iov(ctx->req, status_field, out_iov, out_iovcnt);
135 0 : cuse_io_ctx_free(ctx);
136 0 : }
137 :
138 : static void
139 0 : cuse_nvme_passthru_cmd_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
140 : {
141 : int rc;
142 0 : struct cuse_io_ctx *ctx = arg;
143 :
144 0 : if (nsid != 0) {
145 0 : rc = spdk_nvme_ctrlr_cmd_io_raw_with_md(ctrlr, ctrlr->external_io_msgs_qpair, &ctx->nvme_cmd,
146 : ctx->data,
147 0 : ctx->data_len, ctx->metadata, cuse_nvme_passthru_cmd_cb, (void *)ctx);
148 : } else {
149 0 : rc = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &ctx->nvme_cmd, ctx->data, ctx->data_len,
150 : cuse_nvme_passthru_cmd_cb, (void *)ctx);
151 : }
152 0 : if (rc < 0) {
153 0 : fuse_reply_err(ctx->req, EINVAL);
154 0 : cuse_io_ctx_free(ctx);
155 : }
156 0 : }
157 :
158 : static void
159 2 : cuse_nvme_passthru_cmd_send(fuse_req_t req, struct nvme_passthru_cmd *passthru_cmd,
160 : const void *data, const void *metadata, int cmd)
161 : {
162 : struct cuse_io_ctx *ctx;
163 2 : struct cuse_device *cuse_device = fuse_req_userdata(req);
164 : int rv;
165 :
166 2 : ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx));
167 2 : if (!ctx) {
168 0 : SPDK_ERRLOG("Cannot allocate memory for cuse_io_ctx\n");
169 0 : fuse_reply_err(req, ENOMEM);
170 0 : return;
171 : }
172 :
173 2 : ctx->req = req;
174 2 : ctx->data_transfer = spdk_nvme_opc_get_data_transfer(passthru_cmd->opcode);
175 :
176 2 : memset(&ctx->nvme_cmd, 0, sizeof(ctx->nvme_cmd));
177 2 : ctx->nvme_cmd.opc = passthru_cmd->opcode;
178 2 : ctx->nvme_cmd.nsid = passthru_cmd->nsid;
179 2 : ctx->nvme_cmd.cdw10 = passthru_cmd->cdw10;
180 2 : ctx->nvme_cmd.cdw11 = passthru_cmd->cdw11;
181 2 : ctx->nvme_cmd.cdw12 = passthru_cmd->cdw12;
182 2 : ctx->nvme_cmd.cdw13 = passthru_cmd->cdw13;
183 2 : ctx->nvme_cmd.cdw14 = passthru_cmd->cdw14;
184 2 : ctx->nvme_cmd.cdw15 = passthru_cmd->cdw15;
185 :
186 2 : ctx->data_len = passthru_cmd->data_len;
187 2 : ctx->metadata_len = passthru_cmd->metadata_len;
188 :
189 2 : if (ctx->data_len > 0) {
190 2 : ctx->data = spdk_malloc(ctx->data_len, 4096, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
191 2 : if (!ctx->data) {
192 0 : SPDK_ERRLOG("Cannot allocate memory for data\n");
193 0 : fuse_reply_err(req, ENOMEM);
194 0 : free(ctx);
195 0 : return;
196 : }
197 2 : if (data != NULL) {
198 0 : memcpy(ctx->data, data, ctx->data_len);
199 : }
200 : }
201 :
202 2 : if (ctx->metadata_len > 0) {
203 1 : ctx->metadata = spdk_malloc(ctx->metadata_len, 4096, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
204 1 : if (!ctx->metadata) {
205 0 : SPDK_ERRLOG("Cannot allocate memory for metadata\n");
206 0 : fuse_reply_err(req, ENOMEM);
207 0 : cuse_io_ctx_free(ctx);
208 0 : return;
209 : }
210 1 : if (metadata != NULL) {
211 0 : memcpy(ctx->metadata, metadata, ctx->metadata_len);
212 : }
213 : }
214 :
215 2 : if ((unsigned int)cmd != NVME_IOCTL_ADMIN_CMD) {
216 : /* Send NS for IO IOCTLs */
217 2 : rv = nvme_io_msg_send(cuse_device->ctrlr, passthru_cmd->nsid, cuse_nvme_passthru_cmd_execute, ctx);
218 : } else {
219 : /* NS == 0 for Admin IOCTLs */
220 0 : rv = nvme_io_msg_send(cuse_device->ctrlr, 0, cuse_nvme_passthru_cmd_execute, ctx);
221 : }
222 2 : if (rv) {
223 0 : SPDK_ERRLOG("Cannot send io msg to the controller\n");
224 0 : fuse_reply_err(req, -rv);
225 0 : cuse_io_ctx_free(ctx);
226 0 : return;
227 : }
228 : }
229 :
230 : static void
231 0 : cuse_nvme_passthru_cmd(fuse_req_t req, int cmd, void *arg,
232 : struct fuse_file_info *fi, unsigned flags,
233 : const void *in_buf, size_t in_bufsz, size_t out_bufsz)
234 : {
235 : struct nvme_passthru_cmd *passthru_cmd;
236 0 : struct iovec in_iov[3], out_iov[3];
237 0 : int in_iovcnt = 0, out_iovcnt = 0;
238 0 : const void *dptr = NULL, *mdptr = NULL;
239 : enum spdk_nvme_data_transfer data_transfer;
240 :
241 0 : in_iov[in_iovcnt].iov_base = (void *)arg;
242 0 : in_iov[in_iovcnt].iov_len = sizeof(*passthru_cmd);
243 0 : in_iovcnt += 1;
244 0 : if (in_bufsz == 0) {
245 0 : fuse_reply_ioctl_retry(req, in_iov, in_iovcnt, NULL, out_iovcnt);
246 0 : return;
247 : }
248 :
249 0 : passthru_cmd = (struct nvme_passthru_cmd *)in_buf;
250 0 : data_transfer = spdk_nvme_opc_get_data_transfer(passthru_cmd->opcode);
251 :
252 0 : if (data_transfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
253 : /* Make data pointer accessible (RO) */
254 0 : if (passthru_cmd->addr != 0) {
255 0 : in_iov[in_iovcnt].iov_base = (void *)passthru_cmd->addr;
256 0 : in_iov[in_iovcnt].iov_len = passthru_cmd->data_len;
257 0 : in_iovcnt += 1;
258 : }
259 : /* Make metadata pointer accessible (RO) */
260 0 : if (passthru_cmd->metadata != 0) {
261 0 : in_iov[in_iovcnt].iov_base = (void *)passthru_cmd->metadata;
262 0 : in_iov[in_iovcnt].iov_len = passthru_cmd->metadata_len;
263 0 : in_iovcnt += 1;
264 : }
265 : }
266 :
267 0 : if (!fuse_check_req_size(req, in_iov, in_iovcnt)) {
268 0 : return;
269 : }
270 : /* Always make result field writeable regardless of data transfer bits */
271 0 : out_iov[out_iovcnt].iov_base = &((struct nvme_passthru_cmd *)arg)->result;
272 0 : out_iov[out_iovcnt].iov_len = sizeof(uint32_t);
273 0 : out_iovcnt += 1;
274 :
275 0 : if (data_transfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
276 : /* Make data pointer accessible (WO) */
277 0 : if (passthru_cmd->data_len > 0) {
278 0 : out_iov[out_iovcnt].iov_base = (void *)passthru_cmd->addr;
279 0 : out_iov[out_iovcnt].iov_len = passthru_cmd->data_len;
280 0 : out_iovcnt += 1;
281 : }
282 : /* Make metadata pointer accessible (WO) */
283 0 : if (passthru_cmd->metadata_len > 0) {
284 0 : out_iov[out_iovcnt].iov_base = (void *)passthru_cmd->metadata;
285 0 : out_iov[out_iovcnt].iov_len = passthru_cmd->metadata_len;
286 0 : out_iovcnt += 1;
287 : }
288 : }
289 :
290 0 : if (!fuse_check_req_size(req, out_iov, out_iovcnt)) {
291 0 : return;
292 : }
293 :
294 0 : if (out_bufsz == 0) {
295 0 : fuse_reply_ioctl_retry(req, in_iov, in_iovcnt, out_iov, out_iovcnt);
296 0 : return;
297 : }
298 :
299 0 : if (data_transfer == SPDK_NVME_DATA_BIDIRECTIONAL) {
300 0 : fuse_reply_err(req, EINVAL);
301 0 : return;
302 : }
303 :
304 0 : if (data_transfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
305 0 : dptr = (passthru_cmd->addr == 0) ? NULL : (uint8_t *)in_buf + sizeof(*passthru_cmd);
306 0 : mdptr = (passthru_cmd->metadata == 0) ? NULL : (uint8_t *)in_buf + sizeof(*passthru_cmd) +
307 0 : passthru_cmd->data_len;
308 : }
309 :
310 0 : cuse_nvme_passthru_cmd_send(req, passthru_cmd, dptr, mdptr, cmd);
311 : }
312 :
313 : static void
314 0 : cuse_nvme_reset_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
315 : {
316 : int rc;
317 0 : fuse_req_t req = arg;
318 :
319 0 : rc = spdk_nvme_ctrlr_reset(ctrlr);
320 0 : if (rc) {
321 0 : fuse_reply_err(req, rc);
322 0 : return;
323 : }
324 :
325 0 : fuse_reply_ioctl_iov(req, 0, NULL, 0);
326 : }
327 :
328 : static void
329 0 : cuse_nvme_subsys_reset_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
330 : {
331 : int rc;
332 0 : fuse_req_t req = arg;
333 :
334 0 : rc = spdk_nvme_ctrlr_reset_subsystem(ctrlr);
335 0 : if (rc) {
336 0 : fuse_reply_err(req, rc);
337 0 : return;
338 : }
339 :
340 0 : fuse_reply_ioctl_iov(req, 0, NULL, 0);
341 : }
342 :
343 : static void
344 2 : cuse_nvme_reset(fuse_req_t req, int cmd, void *arg,
345 : struct fuse_file_info *fi, unsigned flags,
346 : const void *in_buf, size_t in_bufsz, size_t out_bufsz)
347 : {
348 : int rv;
349 2 : struct cuse_device *cuse_device = fuse_req_userdata(req);
350 :
351 2 : if (cuse_device->nsid) {
352 1 : SPDK_ERRLOG("Namespace reset not supported\n");
353 1 : fuse_reply_err(req, EINVAL);
354 1 : return;
355 : }
356 :
357 1 : if (cmd == NVME_IOCTL_SUBSYS_RESET) {
358 0 : SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_SUBSYS_RESET\n");
359 0 : rv = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_subsys_reset_execute,
360 : (void *)req);
361 : } else {
362 1 : SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_RESET\n");
363 1 : rv = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_reset_execute, (void *)req);
364 : }
365 1 : if (rv) {
366 0 : SPDK_ERRLOG("Cannot send reset\n");
367 0 : fuse_reply_err(req, EINVAL);
368 : }
369 : }
370 :
371 : static void
372 0 : cuse_nvme_rescan_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
373 : {
374 0 : fuse_req_t req = arg;
375 :
376 0 : nvme_ctrlr_update_namespaces(ctrlr);
377 0 : fuse_reply_ioctl_iov(req, 0, NULL, 0);
378 0 : }
379 :
380 : static void
381 0 : cuse_nvme_rescan(fuse_req_t req, int cmd, void *arg,
382 : struct fuse_file_info *fi, unsigned flags,
383 : const void *in_buf, size_t in_bufsz, size_t out_bufsz)
384 : {
385 : int rv;
386 0 : struct cuse_device *cuse_device = fuse_req_userdata(req);
387 :
388 0 : if (cuse_device->nsid) {
389 0 : SPDK_ERRLOG("Namespace rescan not supported\n");
390 0 : fuse_reply_err(req, EINVAL);
391 0 : return;
392 : }
393 :
394 0 : rv = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_rescan_execute, (void *)req);
395 0 : if (rv) {
396 0 : SPDK_ERRLOG("Cannot send rescan\n");
397 0 : fuse_reply_err(req, EINVAL);
398 : }
399 : }
400 :
401 : /*****************************************************************************
402 : * Namespace IO requests
403 : */
404 :
405 : static void
406 0 : cuse_nvme_submit_io_write_done(void *ref, const struct spdk_nvme_cpl *cpl)
407 : {
408 0 : struct cuse_io_ctx *ctx = (struct cuse_io_ctx *)ref;
409 0 : uint16_t status_field = cpl->status_raw >> 1; /* Drop out phase bit */
410 :
411 0 : fuse_reply_ioctl_iov(ctx->req, status_field, NULL, 0);
412 :
413 0 : cuse_io_ctx_free(ctx);
414 0 : }
415 :
416 : static void
417 0 : cuse_nvme_submit_io_write_cb(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
418 : {
419 : int rc;
420 0 : struct cuse_io_ctx *ctx = arg;
421 0 : struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
422 :
423 0 : rc = spdk_nvme_ns_cmd_write_with_md(ns, ctrlr->external_io_msgs_qpair, ctx->data, ctx->metadata,
424 : ctx->lba, /* LBA start */
425 : ctx->lba_count, /* number of LBAs */
426 : cuse_nvme_submit_io_write_done, ctx, 0,
427 0 : ctx->appmask, ctx->apptag);
428 :
429 0 : if (rc != 0) {
430 0 : SPDK_ERRLOG("write failed: rc = %d\n", rc);
431 0 : fuse_reply_err(ctx->req, rc);
432 0 : cuse_io_ctx_free(ctx);
433 0 : return;
434 : }
435 : }
436 :
437 : static void
438 3 : cuse_nvme_submit_io_write(struct cuse_device *cuse_device, fuse_req_t req, int cmd, void *arg,
439 : struct fuse_file_info *fi, unsigned flags, uint32_t block_size, uint32_t md_size,
440 : const void *in_buf, size_t in_bufsz, size_t out_bufsz)
441 : {
442 3 : const struct nvme_user_io *user_io = in_buf;
443 : struct cuse_io_ctx *ctx;
444 : int rc;
445 :
446 3 : ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx));
447 3 : if (!ctx) {
448 0 : SPDK_ERRLOG("Cannot allocate memory for context\n");
449 0 : fuse_reply_err(req, ENOMEM);
450 0 : return;
451 : }
452 :
453 3 : ctx->req = req;
454 3 : ctx->lba = user_io->slba;
455 3 : ctx->lba_count = user_io->nblocks + 1;
456 3 : ctx->data_len = ctx->lba_count * block_size;
457 :
458 3 : ctx->data = spdk_zmalloc(ctx->data_len, 0x1000, NULL, SPDK_ENV_NUMA_ID_ANY,
459 : SPDK_MALLOC_DMA);
460 3 : if (ctx->data == NULL) {
461 0 : SPDK_ERRLOG("Write buffer allocation failed\n");
462 0 : fuse_reply_err(ctx->req, ENOMEM);
463 0 : free(ctx);
464 0 : return;
465 : }
466 :
467 3 : memcpy(ctx->data, (uint8_t *)in_buf + sizeof(*user_io), ctx->data_len);
468 :
469 3 : if (user_io->metadata) {
470 1 : ctx->apptag = user_io->apptag;
471 1 : ctx->appmask = user_io->appmask;
472 1 : ctx->metadata_len = md_size * ctx->lba_count;
473 1 : ctx->metadata = spdk_zmalloc(ctx->metadata_len, 4096, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
474 :
475 1 : if (ctx->metadata == NULL) {
476 0 : SPDK_ERRLOG("Cannot allocate memory for metadata\n");
477 0 : if (ctx->metadata_len == 0) {
478 0 : SPDK_ERRLOG("Device format does not support metadata\n");
479 : }
480 0 : fuse_reply_err(req, ENOMEM);
481 0 : cuse_io_ctx_free(ctx);
482 0 : return;
483 : }
484 :
485 1 : memcpy(ctx->metadata, (uint8_t *)in_buf + sizeof(*user_io) + ctx->data_len,
486 1 : ctx->metadata_len);
487 : }
488 :
489 3 : rc = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_submit_io_write_cb,
490 : ctx);
491 3 : if (rc < 0) {
492 0 : SPDK_ERRLOG("Cannot send write io\n");
493 0 : fuse_reply_err(ctx->req, rc);
494 0 : cuse_io_ctx_free(ctx);
495 : }
496 : }
497 :
498 : static void
499 0 : cuse_nvme_submit_io_read_done(void *ref, const struct spdk_nvme_cpl *cpl)
500 : {
501 0 : struct cuse_io_ctx *ctx = (struct cuse_io_ctx *)ref;
502 0 : struct iovec iov[2];
503 0 : int iovcnt = 0;
504 0 : uint16_t status_field = cpl->status_raw >> 1; /* Drop out phase bit */
505 :
506 0 : iov[iovcnt].iov_base = ctx->data;
507 0 : iov[iovcnt].iov_len = ctx->data_len;
508 0 : iovcnt += 1;
509 :
510 0 : if (ctx->metadata) {
511 0 : iov[iovcnt].iov_base = ctx->metadata;
512 0 : iov[iovcnt].iov_len = ctx->metadata_len;
513 0 : iovcnt += 1;
514 : }
515 :
516 0 : fuse_reply_ioctl_iov(ctx->req, status_field, iov, iovcnt);
517 :
518 0 : cuse_io_ctx_free(ctx);
519 0 : }
520 :
521 : static void
522 0 : cuse_nvme_submit_io_read_cb(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
523 : {
524 : int rc;
525 0 : struct cuse_io_ctx *ctx = arg;
526 0 : struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
527 :
528 0 : rc = spdk_nvme_ns_cmd_read_with_md(ns, ctrlr->external_io_msgs_qpair, ctx->data, ctx->metadata,
529 : ctx->lba, /* LBA start */
530 : ctx->lba_count, /* number of LBAs */
531 : cuse_nvme_submit_io_read_done, ctx, 0,
532 0 : ctx->appmask, ctx->apptag);
533 :
534 0 : if (rc != 0) {
535 0 : SPDK_ERRLOG("read failed: rc = %d\n", rc);
536 0 : fuse_reply_err(ctx->req, rc);
537 0 : cuse_io_ctx_free(ctx);
538 0 : return;
539 : }
540 : }
541 :
542 : static void
543 3 : cuse_nvme_submit_io_read(struct cuse_device *cuse_device, fuse_req_t req, int cmd, void *arg,
544 : struct fuse_file_info *fi, unsigned flags, uint32_t block_size, uint32_t md_size,
545 : const void *in_buf, size_t in_bufsz, size_t out_bufsz)
546 : {
547 : int rc;
548 : struct cuse_io_ctx *ctx;
549 3 : const struct nvme_user_io *user_io = in_buf;
550 :
551 3 : ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx));
552 3 : if (!ctx) {
553 0 : SPDK_ERRLOG("Cannot allocate memory for context\n");
554 0 : fuse_reply_err(req, ENOMEM);
555 0 : return;
556 : }
557 :
558 3 : ctx->req = req;
559 3 : ctx->lba = user_io->slba;
560 3 : ctx->lba_count = user_io->nblocks + 1;
561 :
562 3 : ctx->data_len = ctx->lba_count * block_size;
563 3 : ctx->data = spdk_zmalloc(ctx->data_len, 0x1000, NULL, SPDK_ENV_NUMA_ID_ANY,
564 : SPDK_MALLOC_DMA);
565 3 : if (ctx->data == NULL) {
566 0 : SPDK_ERRLOG("Read buffer allocation failed\n");
567 0 : fuse_reply_err(ctx->req, ENOMEM);
568 0 : free(ctx);
569 0 : return;
570 : }
571 :
572 3 : if (user_io->metadata) {
573 1 : ctx->apptag = user_io->apptag;
574 1 : ctx->appmask = user_io->appmask;
575 1 : ctx->metadata_len = md_size * ctx->lba_count;
576 1 : ctx->metadata = spdk_zmalloc(ctx->metadata_len, 4096, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
577 :
578 1 : if (ctx->metadata == NULL) {
579 0 : SPDK_ERRLOG("Cannot allocate memory for metadata\n");
580 0 : if (ctx->metadata_len == 0) {
581 0 : SPDK_ERRLOG("Device format does not support metadata\n");
582 : }
583 0 : fuse_reply_err(req, ENOMEM);
584 0 : cuse_io_ctx_free(ctx);
585 0 : return;
586 : }
587 : }
588 :
589 3 : rc = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_submit_io_read_cb, ctx);
590 3 : if (rc < 0) {
591 0 : SPDK_ERRLOG("Cannot send read io\n");
592 0 : fuse_reply_err(ctx->req, rc);
593 0 : cuse_io_ctx_free(ctx);
594 : }
595 : }
596 :
597 :
598 : static void
599 3 : cuse_nvme_submit_io(fuse_req_t req, int cmd, void *arg,
600 : struct fuse_file_info *fi, unsigned flags,
601 : const void *in_buf, size_t in_bufsz, size_t out_bufsz)
602 : {
603 : const struct nvme_user_io *user_io;
604 3 : struct iovec in_iov[3], out_iov[2];
605 3 : int in_iovcnt = 0, out_iovcnt = 0;
606 3 : struct cuse_device *cuse_device = fuse_req_userdata(req);
607 : struct spdk_nvme_ns *ns;
608 : uint32_t block_size;
609 : uint32_t md_size;
610 :
611 3 : in_iov[in_iovcnt].iov_base = (void *)arg;
612 3 : in_iov[in_iovcnt].iov_len = sizeof(*user_io);
613 3 : in_iovcnt += 1;
614 3 : if (in_bufsz == 0) {
615 0 : fuse_reply_ioctl_retry(req, in_iov, in_iovcnt, NULL, 0);
616 0 : return;
617 : }
618 :
619 3 : user_io = in_buf;
620 :
621 3 : ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
622 3 : block_size = spdk_nvme_ns_get_sector_size(ns);
623 3 : md_size = spdk_nvme_ns_get_md_size(ns);
624 :
625 3 : switch (user_io->opcode) {
626 1 : case SPDK_NVME_OPC_READ:
627 1 : out_iov[out_iovcnt].iov_base = (void *)user_io->addr;
628 1 : out_iov[out_iovcnt].iov_len = (user_io->nblocks + 1) * block_size;
629 1 : out_iovcnt += 1;
630 1 : if (user_io->metadata != 0) {
631 0 : out_iov[out_iovcnt].iov_base = (void *)user_io->metadata;
632 0 : out_iov[out_iovcnt].iov_len = (user_io->nblocks + 1) * md_size;
633 0 : out_iovcnt += 1;
634 : }
635 1 : if (!fuse_check_req_size(req, out_iov, out_iovcnt)) {
636 0 : return;
637 : }
638 1 : if (out_bufsz == 0) {
639 0 : fuse_reply_ioctl_retry(req, in_iov, in_iovcnt, out_iov, out_iovcnt);
640 0 : return;
641 : }
642 :
643 1 : cuse_nvme_submit_io_read(cuse_device, req, cmd, arg, fi, flags,
644 : block_size, md_size, in_buf, in_bufsz, out_bufsz);
645 1 : break;
646 1 : case SPDK_NVME_OPC_WRITE:
647 1 : in_iov[in_iovcnt].iov_base = (void *)user_io->addr;
648 1 : in_iov[in_iovcnt].iov_len = (user_io->nblocks + 1) * block_size;
649 1 : in_iovcnt += 1;
650 1 : if (user_io->metadata != 0) {
651 0 : in_iov[in_iovcnt].iov_base = (void *)user_io->metadata;
652 0 : in_iov[in_iovcnt].iov_len = (user_io->nblocks + 1) * md_size;
653 0 : in_iovcnt += 1;
654 : }
655 1 : if (!fuse_check_req_size(req, in_iov, in_iovcnt)) {
656 0 : return;
657 : }
658 1 : if (in_bufsz == sizeof(*user_io)) {
659 0 : fuse_reply_ioctl_retry(req, in_iov, in_iovcnt, NULL, out_iovcnt);
660 0 : return;
661 : }
662 :
663 1 : cuse_nvme_submit_io_write(cuse_device, req, cmd, arg, fi, flags,
664 : block_size, md_size, in_buf, in_bufsz, out_bufsz);
665 1 : break;
666 1 : default:
667 1 : SPDK_ERRLOG("SUBMIT_IO: opc:%d not valid\n", user_io->opcode);
668 1 : fuse_reply_err(req, EINVAL);
669 1 : return;
670 : }
671 :
672 : }
673 :
674 : /*****************************************************************************
675 : * Other namespace IOCTLs
676 : */
677 : static void
678 0 : cuse_blkgetsize64(fuse_req_t req, int cmd, void *arg,
679 : struct fuse_file_info *fi, unsigned flags,
680 : const void *in_buf, size_t in_bufsz, size_t out_bufsz)
681 : {
682 0 : uint64_t size;
683 : struct spdk_nvme_ns *ns;
684 0 : struct cuse_device *cuse_device = fuse_req_userdata(req);
685 :
686 0 : FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, size);
687 :
688 0 : ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
689 0 : size = spdk_nvme_ns_get_num_sectors(ns);
690 0 : fuse_reply_ioctl(req, 0, &size, sizeof(size));
691 : }
692 :
693 : static void
694 0 : cuse_blkpbszget(fuse_req_t req, int cmd, void *arg,
695 : struct fuse_file_info *fi, unsigned flags,
696 : const void *in_buf, size_t in_bufsz, size_t out_bufsz)
697 : {
698 0 : int pbsz;
699 : struct spdk_nvme_ns *ns;
700 0 : struct cuse_device *cuse_device = fuse_req_userdata(req);
701 :
702 0 : FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, pbsz);
703 :
704 0 : ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
705 0 : pbsz = spdk_nvme_ns_get_sector_size(ns);
706 0 : fuse_reply_ioctl(req, 0, &pbsz, sizeof(pbsz));
707 : }
708 :
709 : static void
710 0 : cuse_blkgetsize(fuse_req_t req, int cmd, void *arg,
711 : struct fuse_file_info *fi, unsigned flags,
712 : const void *in_buf, size_t in_bufsz, size_t out_bufsz)
713 : {
714 0 : long size;
715 : struct spdk_nvme_ns *ns;
716 0 : struct cuse_device *cuse_device = fuse_req_userdata(req);
717 :
718 0 : FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, size);
719 :
720 0 : ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
721 :
722 : /* return size in 512 bytes blocks */
723 0 : size = spdk_nvme_ns_get_num_sectors(ns) * 512 / spdk_nvme_ns_get_sector_size(ns);
724 0 : fuse_reply_ioctl(req, 0, &size, sizeof(size));
725 : }
726 :
727 : static void
728 0 : cuse_blkgetsectorsize(fuse_req_t req, int cmd, void *arg,
729 : struct fuse_file_info *fi, unsigned flags,
730 : const void *in_buf, size_t in_bufsz, size_t out_bufsz)
731 : {
732 0 : int ssize;
733 : struct spdk_nvme_ns *ns;
734 0 : struct cuse_device *cuse_device = fuse_req_userdata(req);
735 :
736 0 : FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, ssize);
737 :
738 0 : ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
739 0 : ssize = spdk_nvme_ns_get_sector_size(ns);
740 0 : fuse_reply_ioctl(req, 0, &ssize, sizeof(ssize));
741 : }
742 :
743 : static void
744 0 : cuse_getid(fuse_req_t req, int cmd, void *arg,
745 : struct fuse_file_info *fi, unsigned flags,
746 : const void *in_buf, size_t in_bufsz, size_t out_bufsz)
747 : {
748 0 : struct cuse_device *cuse_device = fuse_req_userdata(req);
749 :
750 0 : fuse_reply_ioctl(req, cuse_device->nsid, NULL, 0);
751 0 : }
752 :
753 : struct cuse_transport {
754 : char trstring[SPDK_NVMF_TRSTRING_MAX_LEN + 1];
755 : char traddr[SPDK_NVMF_TRADDR_MAX_LEN + 1];
756 : };
757 :
758 : #define SPDK_CUSE_GET_TRANSPORT _IOWR('n', 0x1, struct cuse_transport)
759 :
760 : static void
761 0 : cuse_get_transport(fuse_req_t req, int cmd, void *arg,
762 : struct fuse_file_info *fi, unsigned flags,
763 : const void *in_buf, size_t in_bufsz, size_t out_bufsz)
764 : {
765 0 : struct cuse_device *cuse_device = fuse_req_userdata(req);
766 0 : struct cuse_transport tr = {};
767 :
768 0 : FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, tr);
769 :
770 0 : memcpy(tr.trstring, cuse_device->ctrlr->trid.trstring, SPDK_NVMF_TRSTRING_MAX_LEN + 1);
771 0 : memcpy(tr.traddr, cuse_device->ctrlr->trid.traddr, SPDK_NVMF_TRADDR_MAX_LEN + 1);
772 :
773 0 : fuse_reply_ioctl(req, 0, &tr, sizeof(tr));
774 : }
775 :
776 : static void
777 0 : cuse_ctrlr_ioctl(fuse_req_t req, int cmd, void *arg,
778 : struct fuse_file_info *fi, unsigned flags,
779 : const void *in_buf, size_t in_bufsz, size_t out_bufsz)
780 : {
781 0 : if (flags & FUSE_IOCTL_COMPAT) {
782 0 : fuse_reply_err(req, ENOSYS);
783 0 : return;
784 : }
785 :
786 0 : switch ((unsigned int)cmd) {
787 0 : case NVME_IOCTL_ADMIN_CMD:
788 0 : SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_ADMIN_CMD\n");
789 0 : cuse_nvme_passthru_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
790 0 : break;
791 :
792 0 : case NVME_IOCTL_RESET:
793 : case NVME_IOCTL_SUBSYS_RESET:
794 0 : cuse_nvme_reset(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
795 0 : break;
796 :
797 0 : case NVME_IOCTL_RESCAN:
798 0 : SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_RESCAN\n");
799 0 : cuse_nvme_rescan(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
800 0 : break;
801 :
802 0 : case NVME_IOCTL_ID:
803 : /* Return error but don't ERRLOG - nvme-cli will frequently send this
804 : * IOCTL to controller devices.
805 : */
806 0 : fuse_reply_err(req, ENOTTY);
807 0 : break;
808 :
809 0 : case SPDK_CUSE_GET_TRANSPORT:
810 0 : SPDK_DEBUGLOG(nvme_cuse, "SPDK_CUSE_GET_TRANSPORT\n");
811 0 : cuse_get_transport(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
812 0 : break;
813 :
814 0 : default:
815 0 : SPDK_ERRLOG("Unsupported IOCTL 0x%X.\n", cmd);
816 0 : fuse_reply_err(req, ENOTTY);
817 : }
818 : }
819 :
820 : static void
821 0 : cuse_ns_ioctl(fuse_req_t req, int cmd, void *arg,
822 : struct fuse_file_info *fi, unsigned flags,
823 : const void *in_buf, size_t in_bufsz, size_t out_bufsz)
824 : {
825 0 : if (flags & FUSE_IOCTL_COMPAT) {
826 0 : fuse_reply_err(req, ENOSYS);
827 0 : return;
828 : }
829 :
830 0 : switch ((unsigned int)cmd) {
831 0 : case NVME_IOCTL_ADMIN_CMD:
832 0 : SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_ADMIN_CMD\n");
833 0 : cuse_nvme_passthru_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
834 0 : break;
835 :
836 0 : case NVME_IOCTL_SUBMIT_IO:
837 0 : SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_SUBMIT_IO\n");
838 0 : cuse_nvme_submit_io(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
839 0 : break;
840 :
841 0 : case NVME_IOCTL_IO_CMD:
842 0 : SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_IO_CMD\n");
843 0 : cuse_nvme_passthru_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
844 0 : break;
845 :
846 0 : case NVME_IOCTL_ID:
847 0 : SPDK_DEBUGLOG(nvme_cuse, "NVME_IOCTL_ID\n");
848 0 : cuse_getid(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
849 0 : break;
850 :
851 0 : case BLKPBSZGET:
852 0 : SPDK_DEBUGLOG(nvme_cuse, "BLKPBSZGET\n");
853 0 : cuse_blkpbszget(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
854 0 : break;
855 :
856 0 : case BLKSSZGET:
857 0 : SPDK_DEBUGLOG(nvme_cuse, "BLKSSZGET\n");
858 0 : cuse_blkgetsectorsize(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
859 0 : break;
860 :
861 0 : case BLKGETSIZE:
862 0 : SPDK_DEBUGLOG(nvme_cuse, "BLKGETSIZE\n");
863 : /* Returns the device size as a number of 512-byte blocks (returns pointer to long) */
864 0 : cuse_blkgetsize(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
865 0 : break;
866 :
867 0 : case BLKGETSIZE64:
868 0 : SPDK_DEBUGLOG(nvme_cuse, "BLKGETSIZE64\n");
869 : /* Returns the device size in sectors (returns pointer to uint64_t) */
870 0 : cuse_blkgetsize64(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
871 0 : break;
872 :
873 0 : default:
874 0 : SPDK_ERRLOG("Unsupported IOCTL 0x%X.\n", cmd);
875 0 : fuse_reply_err(req, ENOTTY);
876 : }
877 : }
878 :
879 : /*****************************************************************************
880 : * CUSE threads initialization.
881 : */
882 :
883 : static void
884 0 : cuse_open(fuse_req_t req, struct fuse_file_info *fi)
885 : {
886 0 : fuse_reply_open(req, fi);
887 0 : }
888 :
889 : static const struct cuse_lowlevel_ops cuse_ctrlr_clop = {
890 : .open = cuse_open,
891 : .ioctl = cuse_ctrlr_ioctl,
892 : };
893 :
894 : static const struct cuse_lowlevel_ops cuse_ns_clop = {
895 : .open = cuse_open,
896 : .ioctl = cuse_ns_ioctl,
897 : };
898 :
899 : static int
900 4 : cuse_session_create(struct cuse_device *cuse_device)
901 : {
902 4 : char *cuse_argv[] = { "cuse", "-f" };
903 4 : int multithreaded;
904 4 : int cuse_argc = SPDK_COUNTOF(cuse_argv);
905 4 : struct cuse_info ci;
906 4 : char devname_arg[128 + 8];
907 4 : const char *dev_info_argv[] = { devname_arg };
908 :
909 4 : snprintf(devname_arg, sizeof(devname_arg), "DEVNAME=%s", cuse_device->dev_name);
910 :
911 4 : memset(&ci, 0, sizeof(ci));
912 4 : ci.dev_info_argc = 1;
913 4 : ci.dev_info_argv = dev_info_argv;
914 4 : ci.flags = CUSE_UNRESTRICTED_IOCTL;
915 :
916 4 : if (cuse_device->nsid) {
917 3 : cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ns_clop,
918 : &multithreaded, cuse_device);
919 : } else {
920 1 : cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ctrlr_clop,
921 : &multithreaded, cuse_device);
922 : }
923 :
924 4 : if (!cuse_device->session) {
925 0 : SPDK_ERRLOG("Cannot create cuse session\n");
926 0 : return -1;
927 : }
928 4 : SPDK_NOTICELOG("fuse session for device %s created\n", cuse_device->dev_name);
929 4 : cuse_device->fuse_efd = fuse_session_fd(cuse_device->session);
930 :
931 4 : pthread_mutex_lock(&g_pending_device_mtx);
932 4 : TAILQ_INSERT_TAIL(&g_pending_device_head, cuse_device, cuse_thread_tailq);
933 4 : if (eventfd_write(g_cuse_thread_msg_fd, 1) != 0) {
934 0 : TAILQ_REMOVE(&g_pending_device_head, cuse_device, cuse_thread_tailq);
935 0 : pthread_mutex_unlock(&g_pending_device_mtx);
936 0 : SPDK_ERRLOG("eventfd_write failed: (%s).\n", spdk_strerror(errno));
937 0 : return -errno;
938 : }
939 4 : pthread_mutex_unlock(&g_pending_device_mtx);
940 4 : return 0;
941 : }
942 :
943 : static int
944 0 : process_cuse_event(void *arg)
945 : {
946 0 : struct fuse_session *session = arg;
947 0 : struct fuse_buf buf = { .mem = NULL };
948 0 : int rc = fuse_session_receive_buf(session, &buf);
949 :
950 0 : if (rc > 0) {
951 0 : fuse_session_process_buf(session, &buf);
952 : }
953 0 : free(buf.mem);
954 0 : return 0;
955 : }
956 :
957 : static int
958 4 : cuse_thread_add_session(void *arg)
959 : {
960 : struct cuse_device *cuse_device, *tmp;
961 : int ret;
962 4 : eventfd_t val;
963 :
964 4 : eventfd_read(g_cuse_thread_msg_fd, &val);
965 :
966 4 : pthread_mutex_lock(&g_pending_device_mtx);
967 8 : TAILQ_FOREACH_SAFE(cuse_device, &g_pending_device_head, cuse_thread_tailq, tmp) {
968 4 : ret = spdk_fd_group_add(g_device_fdgrp, cuse_device->fuse_efd, process_cuse_event,
969 4 : cuse_device->session, cuse_device->dev_name);
970 4 : if (ret < 0) {
971 0 : SPDK_ERRLOG("Failed to add fd %d: (%s).\n", cuse_device->fuse_efd,
972 : spdk_strerror(-ret));
973 0 : TAILQ_REMOVE(&g_pending_device_head, cuse_device, cuse_thread_tailq);
974 0 : free(cuse_device);
975 0 : assert(false);
976 : }
977 : }
978 4 : TAILQ_CONCAT(&g_active_device_head, &g_pending_device_head, cuse_thread_tailq);
979 4 : pthread_mutex_unlock(&g_pending_device_mtx);
980 4 : return 0;
981 : }
982 :
983 : static void *
984 1 : cuse_thread(void *unused)
985 : {
986 : struct cuse_device *cuse_device, *tmp;
987 1 : int timeout_msecs = 500;
988 : bool retry;
989 :
990 1 : spdk_unaffinitize_thread();
991 :
992 : do {
993 1 : retry = false;
994 1 : spdk_fd_group_wait(g_device_fdgrp, timeout_msecs);
995 6 : while (!TAILQ_EMPTY(&g_active_device_head)) {
996 16 : TAILQ_FOREACH_SAFE(cuse_device, &g_active_device_head, cuse_thread_tailq, tmp) {
997 11 : if (fuse_session_exited(cuse_device->session)) {
998 4 : spdk_fd_group_remove(g_device_fdgrp, cuse_device->fuse_efd);
999 4 : fuse_session_reset(cuse_device->session);
1000 4 : TAILQ_REMOVE(&g_active_device_head, cuse_device, cuse_thread_tailq);
1001 4 : if (cuse_device->force_exit) {
1002 4 : cuse_lowlevel_teardown(cuse_device->session);
1003 4 : free(cuse_device);
1004 : }
1005 : }
1006 : }
1007 : /* Receive and process fuse event and new cuse device addition requests. */
1008 5 : spdk_fd_group_wait(g_device_fdgrp, timeout_msecs);
1009 : }
1010 1 : pthread_mutex_lock(&g_cuse_mtx);
1011 1 : if (!TAILQ_EMPTY(&g_pending_device_head)) {
1012 0 : pthread_mutex_unlock(&g_cuse_mtx);
1013 : /* Retry as we have some cuse devices pending to be polled on. */
1014 0 : retry = true;
1015 : }
1016 1 : } while (retry);
1017 :
1018 1 : spdk_fd_group_remove(g_device_fdgrp, g_cuse_thread_msg_fd);
1019 1 : close(g_cuse_thread_msg_fd);
1020 1 : spdk_fd_group_destroy(g_device_fdgrp);
1021 1 : g_device_fdgrp = NULL;
1022 1 : pthread_mutex_unlock(&g_cuse_mtx);
1023 1 : SPDK_NOTICELOG("Cuse thread exited.\n");
1024 1 : return NULL;
1025 : }
1026 :
1027 : static struct cuse_device *nvme_cuse_get_cuse_ns_device(struct spdk_nvme_ctrlr *ctrlr,
1028 : uint32_t nsid);
1029 :
1030 : /*****************************************************************************
1031 : * CUSE devices management
1032 : */
1033 :
1034 : static int
1035 3 : cuse_nvme_ns_start(struct cuse_device *ctrlr_device, uint32_t nsid)
1036 : {
1037 3 : struct cuse_device *ns_device = NULL;
1038 : int rv;
1039 :
1040 3 : ns_device = nvme_cuse_get_cuse_ns_device(ctrlr_device->ctrlr, nsid);
1041 3 : if (ns_device != NULL) {
1042 0 : return 0;
1043 : }
1044 :
1045 3 : ns_device = calloc(1, sizeof(struct cuse_device));
1046 3 : if (ns_device == NULL) {
1047 0 : return -ENOMEM;
1048 : }
1049 :
1050 3 : ns_device->ctrlr = ctrlr_device->ctrlr;
1051 3 : ns_device->ctrlr_device = ctrlr_device;
1052 3 : ns_device->nsid = nsid;
1053 3 : rv = snprintf(ns_device->dev_name, sizeof(ns_device->dev_name), "%sn%d",
1054 3 : ctrlr_device->dev_name, ns_device->nsid);
1055 3 : if (rv < 0) {
1056 0 : SPDK_ERRLOG("Device name too long.\n");
1057 0 : rv = -ENAMETOOLONG;
1058 0 : goto free_device;
1059 : }
1060 :
1061 3 : rv = cuse_session_create(ns_device);
1062 3 : if (rv != 0) {
1063 0 : goto free_device;
1064 : }
1065 :
1066 3 : TAILQ_INSERT_TAIL(&ctrlr_device->ns_devices, ns_device, tailq);
1067 :
1068 3 : return 0;
1069 :
1070 0 : free_device:
1071 0 : free(ns_device);
1072 0 : return rv;
1073 : }
1074 :
1075 : static void
1076 3 : cuse_nvme_ns_stop(struct cuse_device *ctrlr_device, struct cuse_device *ns_device)
1077 : {
1078 3 : TAILQ_REMOVE(&ctrlr_device->ns_devices, ns_device, tailq);
1079 : /* ns_device will be freed by cuse_thread */
1080 3 : if (ns_device->session != NULL) {
1081 3 : ns_device->force_exit = true;
1082 3 : fuse_session_exit(ns_device->session);
1083 : }
1084 3 : }
1085 :
1086 : static int
1087 1 : nvme_cuse_claim(struct cuse_device *ctrlr_device, uint32_t index)
1088 : {
1089 : int dev_fd;
1090 : int pid;
1091 : void *dev_map;
1092 1 : struct flock cusedev_lock = {
1093 : .l_type = F_WRLCK,
1094 : .l_whence = SEEK_SET,
1095 : .l_start = 0,
1096 : .l_len = 0,
1097 : };
1098 :
1099 1 : snprintf(ctrlr_device->lock_name, sizeof(ctrlr_device->lock_name),
1100 : "/var/tmp/spdk_nvme_cuse_lock_%" PRIu32, index);
1101 :
1102 1 : dev_fd = open(ctrlr_device->lock_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
1103 1 : if (dev_fd == -1) {
1104 0 : SPDK_ERRLOG("could not open %s\n", ctrlr_device->lock_name);
1105 0 : return -errno;
1106 : }
1107 :
1108 1 : if (ftruncate(dev_fd, sizeof(int)) != 0) {
1109 0 : SPDK_ERRLOG("could not truncate %s\n", ctrlr_device->lock_name);
1110 0 : close(dev_fd);
1111 0 : return -errno;
1112 : }
1113 :
1114 1 : dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
1115 : MAP_SHARED, dev_fd, 0);
1116 1 : if (dev_map == MAP_FAILED) {
1117 0 : SPDK_ERRLOG("could not mmap dev %s (%d)\n", ctrlr_device->lock_name, errno);
1118 0 : close(dev_fd);
1119 0 : return -errno;
1120 : }
1121 :
1122 1 : if (fcntl(dev_fd, F_SETLK, &cusedev_lock) != 0) {
1123 0 : pid = *(int *)dev_map;
1124 0 : SPDK_ERRLOG("Cannot create lock on device %s, probably"
1125 : " process %d has claimed it\n", ctrlr_device->lock_name, pid);
1126 0 : munmap(dev_map, sizeof(int));
1127 0 : close(dev_fd);
1128 : /* F_SETLK returns unspecified errnos, normalize them */
1129 0 : return -EACCES;
1130 : }
1131 :
1132 1 : *(int *)dev_map = (int)getpid();
1133 1 : munmap(dev_map, sizeof(int));
1134 1 : ctrlr_device->claim_fd = dev_fd;
1135 1 : ctrlr_device->index = index;
1136 : /* Keep dev_fd open to maintain the lock. */
1137 1 : return 0;
1138 : }
1139 :
1140 : static void
1141 1 : nvme_cuse_unclaim(struct cuse_device *ctrlr_device)
1142 : {
1143 1 : close(ctrlr_device->claim_fd);
1144 1 : ctrlr_device->claim_fd = -1;
1145 1 : unlink(ctrlr_device->lock_name);
1146 1 : }
1147 :
1148 : static void
1149 1 : cuse_nvme_ctrlr_stop(struct cuse_device *ctrlr_device)
1150 : {
1151 : struct cuse_device *ns_device, *tmp;
1152 :
1153 4 : TAILQ_FOREACH_SAFE(ns_device, &ctrlr_device->ns_devices, tailq, tmp) {
1154 3 : cuse_nvme_ns_stop(ctrlr_device, ns_device);
1155 : }
1156 :
1157 1 : assert(TAILQ_EMPTY(&ctrlr_device->ns_devices));
1158 :
1159 1 : spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index);
1160 1 : if (spdk_bit_array_count_set(g_ctrlr_started) == 0) {
1161 1 : spdk_bit_array_free(&g_ctrlr_started);
1162 : }
1163 1 : nvme_cuse_unclaim(ctrlr_device);
1164 :
1165 1 : TAILQ_REMOVE(&g_ctrlr_ctx_head, ctrlr_device, tailq);
1166 : /* ctrlr_device will be freed by cuse_thread */
1167 1 : ctrlr_device->force_exit = true;
1168 1 : fuse_session_exit(ctrlr_device->session);
1169 1 : }
1170 :
1171 : static int
1172 1 : cuse_nvme_ctrlr_update_namespaces(struct cuse_device *ctrlr_device)
1173 : {
1174 : struct cuse_device *ns_device, *tmp;
1175 : uint32_t nsid;
1176 :
1177 : /* Remove namespaces that have disappeared */
1178 1 : TAILQ_FOREACH_SAFE(ns_device, &ctrlr_device->ns_devices, tailq, tmp) {
1179 0 : if (!spdk_nvme_ctrlr_is_active_ns(ctrlr_device->ctrlr, ns_device->nsid)) {
1180 0 : cuse_nvme_ns_stop(ctrlr_device, ns_device);
1181 : }
1182 : }
1183 :
1184 : /* Add new namespaces */
1185 1 : nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr_device->ctrlr);
1186 4 : while (nsid != 0) {
1187 3 : if (cuse_nvme_ns_start(ctrlr_device, nsid) < 0) {
1188 0 : SPDK_ERRLOG("Cannot start CUSE namespace device.");
1189 0 : return -1;
1190 : }
1191 :
1192 3 : nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr_device->ctrlr, nsid);
1193 : }
1194 :
1195 1 : return 0;
1196 : }
1197 :
1198 : #ifdef FUSE_LOG_H_
1199 : static void
1200 0 : nvme_fuse_log_func(enum fuse_log_level level, const char *fmt, va_list ap)
1201 : {
1202 : /* fuse will unnecessarily print this log message when tearing down
1203 : * sessions, once for every session after the first. So use this custom
1204 : * log handler to silence that specific log message.
1205 : */
1206 0 : if (strstr(fmt, "fuse_remove_signal_handlers: unknown session") != NULL) {
1207 0 : return;
1208 : }
1209 :
1210 0 : vfprintf(stderr, fmt, ap);
1211 : }
1212 : #endif
1213 :
1214 : static int
1215 1 : nvme_cuse_start(struct spdk_nvme_ctrlr *ctrlr)
1216 : {
1217 1 : int rv = 0;
1218 : struct cuse_device *ctrlr_device;
1219 :
1220 1 : SPDK_NOTICELOG("Creating cuse device for controller\n");
1221 :
1222 1 : if (g_ctrlr_started == NULL) {
1223 1 : g_ctrlr_started = spdk_bit_array_create(128);
1224 1 : if (g_ctrlr_started == NULL) {
1225 0 : SPDK_ERRLOG("Cannot create bit array\n");
1226 0 : return -ENOMEM;
1227 : }
1228 : #ifdef FUSE_LOG_H_
1229 : /* Older versions of libfuse don't have fuse_set_log_func nor
1230 : * fuse_log.h, so this is the easiest way to check for it
1231 : * without adding a separate CONFIG flag.
1232 : */
1233 1 : fuse_set_log_func(nvme_fuse_log_func);
1234 : #endif
1235 : }
1236 :
1237 1 : ctrlr_device = (struct cuse_device *)calloc(1, sizeof(struct cuse_device));
1238 1 : if (!ctrlr_device) {
1239 0 : SPDK_ERRLOG("Cannot allocate memory for ctrlr_device.");
1240 0 : rv = -ENOMEM;
1241 0 : goto free_device;
1242 : }
1243 :
1244 1 : ctrlr_device->ctrlr = ctrlr;
1245 :
1246 : /* Check if device already exists, if not increment index until success */
1247 1 : ctrlr_device->index = 0;
1248 : while (1) {
1249 1 : ctrlr_device->index = spdk_bit_array_find_first_clear(g_ctrlr_started, ctrlr_device->index);
1250 1 : if (ctrlr_device->index == UINT32_MAX) {
1251 0 : SPDK_ERRLOG("Too many registered controllers\n");
1252 0 : goto free_device;
1253 : }
1254 :
1255 1 : if (nvme_cuse_claim(ctrlr_device, ctrlr_device->index) == 0) {
1256 1 : break;
1257 : }
1258 0 : ctrlr_device->index++;
1259 : }
1260 1 : spdk_bit_array_set(g_ctrlr_started, ctrlr_device->index);
1261 1 : snprintf(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name), "spdk/nvme%d",
1262 : ctrlr_device->index);
1263 :
1264 1 : rv = cuse_session_create(ctrlr_device);
1265 1 : if (rv != 0) {
1266 0 : goto clear_and_free;
1267 : }
1268 :
1269 1 : TAILQ_INSERT_TAIL(&g_ctrlr_ctx_head, ctrlr_device, tailq);
1270 :
1271 1 : TAILQ_INIT(&ctrlr_device->ns_devices);
1272 :
1273 : /* Start all active namespaces */
1274 1 : if (cuse_nvme_ctrlr_update_namespaces(ctrlr_device) < 0) {
1275 0 : SPDK_ERRLOG("Cannot start CUSE namespace devices.");
1276 0 : cuse_nvme_ctrlr_stop(ctrlr_device);
1277 0 : return -1;
1278 : }
1279 :
1280 1 : return 0;
1281 :
1282 0 : clear_and_free:
1283 0 : spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index);
1284 0 : free_device:
1285 0 : free(ctrlr_device);
1286 0 : if (spdk_bit_array_count_set(g_ctrlr_started) == 0) {
1287 0 : spdk_bit_array_free(&g_ctrlr_started);
1288 : }
1289 0 : return rv;
1290 : }
1291 :
1292 : static struct cuse_device *
1293 14 : nvme_cuse_get_cuse_ctrlr_device(struct spdk_nvme_ctrlr *ctrlr)
1294 : {
1295 14 : struct cuse_device *ctrlr_device = NULL;
1296 :
1297 16 : TAILQ_FOREACH(ctrlr_device, &g_ctrlr_ctx_head, tailq) {
1298 14 : if (ctrlr_device->ctrlr == ctrlr) {
1299 12 : break;
1300 : }
1301 : }
1302 :
1303 14 : return ctrlr_device;
1304 : }
1305 :
1306 : static struct cuse_device *
1307 10 : nvme_cuse_get_cuse_ns_device(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
1308 : {
1309 10 : struct cuse_device *ctrlr_device = NULL;
1310 : struct cuse_device *ns_device;
1311 :
1312 10 : ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
1313 10 : if (!ctrlr_device) {
1314 1 : return NULL;
1315 : }
1316 :
1317 16 : TAILQ_FOREACH(ns_device, &ctrlr_device->ns_devices, tailq) {
1318 10 : if (ns_device->nsid == nsid) {
1319 3 : return ns_device;
1320 : }
1321 : }
1322 :
1323 6 : return NULL;
1324 : }
1325 :
1326 : static void
1327 1 : nvme_cuse_stop(struct spdk_nvme_ctrlr *ctrlr)
1328 : {
1329 : struct cuse_device *ctrlr_device;
1330 :
1331 1 : assert(spdk_process_is_primary());
1332 :
1333 1 : pthread_mutex_lock(&g_cuse_mtx);
1334 :
1335 1 : ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
1336 1 : if (!ctrlr_device) {
1337 0 : SPDK_ERRLOG("Cannot find associated CUSE device\n");
1338 0 : pthread_mutex_unlock(&g_cuse_mtx);
1339 0 : return;
1340 : }
1341 :
1342 1 : cuse_nvme_ctrlr_stop(ctrlr_device);
1343 :
1344 1 : pthread_mutex_unlock(&g_cuse_mtx);
1345 : }
1346 :
1347 : static void
1348 0 : nvme_cuse_update(struct spdk_nvme_ctrlr *ctrlr)
1349 : {
1350 : struct cuse_device *ctrlr_device;
1351 :
1352 0 : assert(spdk_process_is_primary());
1353 :
1354 0 : pthread_mutex_lock(&g_cuse_mtx);
1355 :
1356 0 : ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
1357 0 : if (!ctrlr_device) {
1358 0 : pthread_mutex_unlock(&g_cuse_mtx);
1359 0 : return;
1360 : }
1361 :
1362 0 : cuse_nvme_ctrlr_update_namespaces(ctrlr_device);
1363 :
1364 0 : pthread_mutex_unlock(&g_cuse_mtx);
1365 : }
1366 :
1367 : static struct nvme_io_msg_producer cuse_nvme_io_msg_producer = {
1368 : .name = "cuse",
1369 : .stop = nvme_cuse_stop,
1370 : .update = nvme_cuse_update,
1371 : };
1372 :
1373 : static int
1374 1 : start_cuse_thread(void)
1375 : {
1376 1 : int rc = 0;
1377 1 : pthread_t tid;
1378 :
1379 1 : rc = spdk_fd_group_create(&g_device_fdgrp);
1380 1 : if (rc < 0) {
1381 0 : SPDK_ERRLOG("Failed to create fd group: (%s).\n", spdk_strerror(-rc));
1382 0 : return rc;
1383 : }
1384 :
1385 1 : g_cuse_thread_msg_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
1386 1 : if (g_cuse_thread_msg_fd < 0) {
1387 0 : SPDK_ERRLOG("Failed to create eventfd: (%s).\n", spdk_strerror(errno));
1388 0 : rc = -errno;
1389 0 : goto destroy_fd_group;
1390 : }
1391 :
1392 1 : rc = SPDK_FD_GROUP_ADD(g_device_fdgrp, g_cuse_thread_msg_fd,
1393 : cuse_thread_add_session, NULL);
1394 1 : if (rc < 0) {
1395 0 : SPDK_ERRLOG("Failed to add fd %d: %s.\n", g_cuse_thread_msg_fd,
1396 : spdk_strerror(-rc));
1397 0 : goto close_and_destroy_fd;
1398 : }
1399 :
1400 1 : rc = pthread_create(&tid, NULL, cuse_thread, NULL);
1401 1 : if (rc != 0) {
1402 0 : SPDK_ERRLOG("pthread_create failed\n");
1403 0 : rc = -rc;
1404 0 : goto remove_close_and_destroy_fd;
1405 : }
1406 1 : pthread_detach(tid);
1407 1 : pthread_setname_np(tid, "cuse_thread");
1408 1 : SPDK_NOTICELOG("Successfully started cuse thread to poll for admin commands\n");
1409 1 : return rc;
1410 :
1411 0 : remove_close_and_destroy_fd:
1412 0 : spdk_fd_group_remove(g_device_fdgrp, g_cuse_thread_msg_fd);
1413 0 : close_and_destroy_fd:
1414 0 : close(g_cuse_thread_msg_fd);
1415 0 : destroy_fd_group:
1416 0 : spdk_fd_group_destroy(g_device_fdgrp);
1417 0 : g_device_fdgrp = NULL;
1418 0 : return rc;
1419 : }
1420 :
1421 : int
1422 1 : spdk_nvme_cuse_register(struct spdk_nvme_ctrlr *ctrlr)
1423 : {
1424 : int rc;
1425 :
1426 1 : if (!spdk_process_is_primary()) {
1427 0 : SPDK_ERRLOG("only allowed from primary process\n");
1428 0 : return -EINVAL;
1429 : }
1430 :
1431 1 : rc = nvme_io_msg_ctrlr_register(ctrlr, &cuse_nvme_io_msg_producer);
1432 1 : if (rc) {
1433 0 : return rc;
1434 : }
1435 :
1436 1 : pthread_mutex_lock(&g_cuse_mtx);
1437 :
1438 1 : if (g_device_fdgrp == NULL) {
1439 1 : rc = start_cuse_thread();
1440 1 : if (rc < 0) {
1441 0 : SPDK_ERRLOG("Failed to start cuse thread to poll for admin commands\n");
1442 0 : pthread_mutex_unlock(&g_cuse_mtx);
1443 0 : return rc;
1444 : }
1445 : }
1446 :
1447 1 : rc = nvme_cuse_start(ctrlr);
1448 1 : if (rc) {
1449 0 : nvme_io_msg_ctrlr_unregister(ctrlr, &cuse_nvme_io_msg_producer);
1450 : }
1451 :
1452 1 : pthread_mutex_unlock(&g_cuse_mtx);
1453 :
1454 1 : return rc;
1455 : }
1456 :
1457 : int
1458 0 : spdk_nvme_cuse_unregister(struct spdk_nvme_ctrlr *ctrlr)
1459 : {
1460 : struct cuse_device *ctrlr_device;
1461 :
1462 0 : if (!spdk_process_is_primary()) {
1463 0 : SPDK_ERRLOG("only allowed from primary process\n");
1464 0 : return -EINVAL;
1465 : }
1466 :
1467 0 : pthread_mutex_lock(&g_cuse_mtx);
1468 :
1469 0 : ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
1470 0 : if (!ctrlr_device) {
1471 0 : SPDK_ERRLOG("Cannot find associated CUSE device\n");
1472 0 : pthread_mutex_unlock(&g_cuse_mtx);
1473 0 : return -ENODEV;
1474 : }
1475 :
1476 0 : cuse_nvme_ctrlr_stop(ctrlr_device);
1477 :
1478 0 : pthread_mutex_unlock(&g_cuse_mtx);
1479 :
1480 0 : nvme_io_msg_ctrlr_unregister(ctrlr, &cuse_nvme_io_msg_producer);
1481 :
1482 0 : return 0;
1483 : }
1484 :
1485 : void
1486 0 : spdk_nvme_cuse_update_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1487 : {
1488 0 : nvme_cuse_update(ctrlr);
1489 0 : }
1490 :
1491 : int
1492 3 : spdk_nvme_cuse_get_ctrlr_name(struct spdk_nvme_ctrlr *ctrlr, char *name, size_t *size)
1493 : {
1494 : struct cuse_device *ctrlr_device;
1495 : size_t req_len;
1496 :
1497 3 : pthread_mutex_lock(&g_cuse_mtx);
1498 :
1499 3 : ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
1500 3 : if (!ctrlr_device) {
1501 1 : pthread_mutex_unlock(&g_cuse_mtx);
1502 1 : return -ENODEV;
1503 : }
1504 :
1505 2 : req_len = strnlen(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name));
1506 2 : if (*size < req_len) {
1507 1 : *size = req_len;
1508 1 : pthread_mutex_unlock(&g_cuse_mtx);
1509 1 : return -ENOSPC;
1510 : }
1511 1 : snprintf(name, req_len + 1, "%s", ctrlr_device->dev_name);
1512 :
1513 1 : pthread_mutex_unlock(&g_cuse_mtx);
1514 :
1515 1 : return 0;
1516 : }
1517 :
1518 : int
1519 4 : spdk_nvme_cuse_get_ns_name(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, char *name, size_t *size)
1520 : {
1521 : struct cuse_device *ns_device;
1522 : size_t req_len;
1523 :
1524 4 : pthread_mutex_lock(&g_cuse_mtx);
1525 :
1526 4 : ns_device = nvme_cuse_get_cuse_ns_device(ctrlr, nsid);
1527 4 : if (!ns_device) {
1528 2 : pthread_mutex_unlock(&g_cuse_mtx);
1529 2 : return -ENODEV;
1530 : }
1531 :
1532 2 : req_len = strnlen(ns_device->dev_name, sizeof(ns_device->dev_name));
1533 2 : if (*size < req_len) {
1534 1 : *size = req_len;
1535 1 : pthread_mutex_unlock(&g_cuse_mtx);
1536 1 : return -ENOSPC;
1537 : }
1538 1 : snprintf(name, req_len + 1, "%s", ns_device->dev_name);
1539 :
1540 1 : pthread_mutex_unlock(&g_cuse_mtx);
1541 :
1542 1 : return 0;
1543 : }
1544 :
1545 1 : SPDK_LOG_REGISTER_COMPONENT(nvme_cuse)
|