LCOV - code coverage report
Current view: top level - module/bdev/uring - bdev_uring.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 0 444 0.0 %
Date: 2024-12-06 22:28:46 Functions: 0 34 0.0 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2019 Intel Corporation.
       3             :  *   All rights reserved.
       4             :  */
       5             : 
       6             : #include "bdev_uring.h"
       7             : 
       8             : #include "spdk/stdinc.h"
       9             : #include "spdk/config.h"
      10             : #include "spdk/barrier.h"
      11             : #include "spdk/bdev.h"
      12             : #include "spdk/env.h"
      13             : #include "spdk/fd.h"
      14             : #include "spdk/likely.h"
      15             : #include "spdk/thread.h"
      16             : #include "spdk/json.h"
      17             : #include "spdk/util.h"
      18             : #include "spdk/string.h"
      19             : #include "spdk/file.h"
      20             : 
      21             : #include "spdk/log.h"
      22             : #include "spdk_internal/uring.h"
      23             : 
      24             : #ifdef SPDK_CONFIG_URING_ZNS
      25             : #include <linux/blkzoned.h>
      26             : #define SECTOR_SHIFT 9
      27             : #endif
      28             : 
      29             : struct bdev_uring_zoned_dev {
      30             :         uint64_t                num_zones;
      31             :         uint32_t                zone_shift;
      32             :         uint32_t                lba_shift;
      33             : };
      34             : 
      35             : struct bdev_uring_io_channel {
      36             :         struct bdev_uring_group_channel         *group_ch;
      37             : };
      38             : 
      39             : struct bdev_uring_group_channel {
      40             :         uint64_t                                io_inflight;
      41             :         uint64_t                                io_pending;
      42             :         struct spdk_poller                      *poller;
      43             :         struct io_uring                         uring;
      44             : };
      45             : 
      46             : struct bdev_uring_task {
      47             :         uint64_t                        len;
      48             :         struct bdev_uring_io_channel    *ch;
      49             :         TAILQ_ENTRY(bdev_uring_task)    link;
      50             : };
      51             : 
      52             : struct bdev_uring {
      53             :         struct spdk_bdev        bdev;
      54             :         struct bdev_uring_zoned_dev     zd;
      55             :         char                    *filename;
      56             :         int                     fd;
      57             :         TAILQ_ENTRY(bdev_uring)  link;
      58             : };
      59             : 
      60             : static int bdev_uring_init(void);
      61             : static void bdev_uring_fini(void);
      62             : static void uring_free_bdev(struct bdev_uring *uring);
      63             : static TAILQ_HEAD(, bdev_uring) g_uring_bdev_head = TAILQ_HEAD_INITIALIZER(g_uring_bdev_head);
      64             : 
      65             : #define SPDK_URING_QUEUE_DEPTH 512
      66             : #define MAX_EVENTS_PER_POLL 32
      67             : 
      68             : static int
      69           0 : bdev_uring_get_ctx_size(void)
      70             : {
      71           0 :         return sizeof(struct bdev_uring_task);
      72             : }
      73             : 
      74             : static struct spdk_bdev_module uring_if = {
      75             :         .name           = "uring",
      76             :         .module_init    = bdev_uring_init,
      77             :         .module_fini    = bdev_uring_fini,
      78             :         .get_ctx_size   = bdev_uring_get_ctx_size,
      79             : };
      80             : 
      81           0 : SPDK_BDEV_MODULE_REGISTER(uring, &uring_if)
      82             : 
      83             : static int
      84           0 : bdev_uring_open(struct bdev_uring *bdev)
      85             : {
      86             :         int fd;
      87             : 
      88           0 :         fd = open(bdev->filename, O_RDWR | O_DIRECT | O_NOATIME);
      89           0 :         if (fd < 0) {
      90             :                 /* Try without O_DIRECT for non-disk files */
      91           0 :                 fd = open(bdev->filename, O_RDWR | O_NOATIME);
      92           0 :                 if (fd < 0) {
      93           0 :                         SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n",
      94             :                                     bdev->filename, errno, spdk_strerror(errno));
      95           0 :                         bdev->fd = -1;
      96           0 :                         return -1;
      97             :                 }
      98             :         }
      99             : 
     100           0 :         bdev->fd = fd;
     101             : 
     102           0 :         return 0;
     103             : }
     104             : 
     105             : static void
     106           0 : dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
     107             : {
     108           0 : }
     109             : 
     110             : int
     111           0 : bdev_uring_rescan(const char *name)
     112             : {
     113             :         struct spdk_bdev_desc *desc;
     114             :         struct spdk_bdev *bdev;
     115             :         struct bdev_uring *uring;
     116             :         uint64_t uring_size, blockcnt;
     117             :         int rc;
     118             : 
     119           0 :         rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &desc);
     120           0 :         if (rc != 0) {
     121           0 :                 return rc;
     122             :         }
     123             : 
     124           0 :         bdev = spdk_bdev_desc_get_bdev(desc);
     125           0 :         if (bdev->module != &uring_if) {
     126           0 :                 rc = -ENODEV;
     127           0 :                 goto exit;
     128             :         }
     129             : 
     130           0 :         uring = SPDK_CONTAINEROF(bdev, struct bdev_uring, bdev);
     131           0 :         uring_size = spdk_fd_get_size(uring->fd);
     132           0 :         blockcnt = uring_size / bdev->blocklen;
     133             : 
     134           0 :         if (bdev->blockcnt != blockcnt) {
     135           0 :                 SPDK_NOTICELOG("URING device is resized: bdev name %s, old block count %" PRIu64
     136             :                                ", new block count %"
     137             :                                PRIu64 "\n",
     138             :                                uring->filename,
     139             :                                bdev->blockcnt,
     140             :                                blockcnt);
     141           0 :                 rc = spdk_bdev_notify_blockcnt_change(bdev, blockcnt);
     142           0 :                 if (rc != 0) {
     143           0 :                         SPDK_ERRLOG("Could not change num blocks for uring bdev: name %s, errno: %d.\n",
     144             :                                     uring->filename, rc);
     145           0 :                         goto exit;
     146             :                 }
     147             :         }
     148             : 
     149           0 : exit:
     150           0 :         spdk_bdev_close(desc);
     151           0 :         return rc;
     152             : }
     153             : 
     154             : static int
     155           0 : bdev_uring_close(struct bdev_uring *bdev)
     156             : {
     157             :         int rc;
     158             : 
     159           0 :         if (bdev->fd == -1) {
     160           0 :                 return 0;
     161             :         }
     162             : 
     163           0 :         rc = close(bdev->fd);
     164           0 :         if (rc < 0) {
     165           0 :                 SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n",
     166             :                             bdev->fd, errno, spdk_strerror(errno));
     167           0 :                 return -1;
     168             :         }
     169             : 
     170           0 :         bdev->fd = -1;
     171             : 
     172           0 :         return 0;
     173             : }
     174             : 
     175             : static int64_t
     176           0 : bdev_uring_readv(struct bdev_uring *uring, struct spdk_io_channel *ch,
     177             :                  struct bdev_uring_task *uring_task,
     178             :                  struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset)
     179             : {
     180           0 :         struct bdev_uring_io_channel *uring_ch = spdk_io_channel_get_ctx(ch);
     181           0 :         struct bdev_uring_group_channel *group_ch = uring_ch->group_ch;
     182             :         struct io_uring_sqe *sqe;
     183             : 
     184           0 :         sqe = io_uring_get_sqe(&group_ch->uring);
     185           0 :         if (!sqe) {
     186           0 :                 SPDK_DEBUGLOG(uring, "get sqe failed as out of resource\n");
     187           0 :                 return -ENOMEM;
     188             :         }
     189             : 
     190           0 :         io_uring_prep_readv(sqe, uring->fd, iov, iovcnt, offset);
     191           0 :         io_uring_sqe_set_data(sqe, uring_task);
     192           0 :         uring_task->len = nbytes;
     193           0 :         uring_task->ch = uring_ch;
     194             : 
     195           0 :         SPDK_DEBUGLOG(uring, "read %d iovs size %lu to off: %#lx\n",
     196             :                       iovcnt, nbytes, offset);
     197             : 
     198           0 :         group_ch->io_pending++;
     199           0 :         return nbytes;
     200             : }
     201             : 
     202             : static int64_t
     203           0 : bdev_uring_writev(struct bdev_uring *uring, struct spdk_io_channel *ch,
     204             :                   struct bdev_uring_task *uring_task,
     205             :                   struct iovec *iov, int iovcnt, size_t nbytes, uint64_t offset)
     206             : {
     207           0 :         struct bdev_uring_io_channel *uring_ch = spdk_io_channel_get_ctx(ch);
     208           0 :         struct bdev_uring_group_channel *group_ch = uring_ch->group_ch;
     209             :         struct io_uring_sqe *sqe;
     210             : 
     211           0 :         sqe = io_uring_get_sqe(&group_ch->uring);
     212           0 :         if (!sqe) {
     213           0 :                 SPDK_DEBUGLOG(uring, "get sqe failed as out of resource\n");
     214           0 :                 return -ENOMEM;
     215             :         }
     216             : 
     217           0 :         io_uring_prep_writev(sqe, uring->fd, iov, iovcnt, offset);
     218           0 :         io_uring_sqe_set_data(sqe, uring_task);
     219           0 :         uring_task->len = nbytes;
     220           0 :         uring_task->ch = uring_ch;
     221             : 
     222           0 :         SPDK_DEBUGLOG(uring, "write %d iovs size %lu from off: %#lx\n",
     223             :                       iovcnt, nbytes, offset);
     224             : 
     225           0 :         group_ch->io_pending++;
     226           0 :         return nbytes;
     227             : }
     228             : 
     229             : static int
     230           0 : bdev_uring_destruct(void *ctx)
     231             : {
     232           0 :         struct bdev_uring *uring = ctx;
     233           0 :         int rc = 0;
     234             : 
     235           0 :         TAILQ_REMOVE(&g_uring_bdev_head, uring, link);
     236           0 :         rc = bdev_uring_close(uring);
     237           0 :         if (rc < 0) {
     238           0 :                 SPDK_ERRLOG("bdev_uring_close() failed\n");
     239             :         }
     240           0 :         spdk_io_device_unregister(uring, NULL);
     241           0 :         uring_free_bdev(uring);
     242           0 :         return rc;
     243             : }
     244             : 
     245             : static int
     246           0 : bdev_uring_reap(struct io_uring *ring, int max)
     247             : {
     248             :         int i, count, ret;
     249             :         struct io_uring_cqe *cqe;
     250             :         struct bdev_uring_task *uring_task;
     251             :         enum spdk_bdev_io_status status;
     252             : 
     253           0 :         count = 0;
     254           0 :         for (i = 0; i < max; i++) {
     255           0 :                 ret = io_uring_peek_cqe(ring, &cqe);
     256           0 :                 if (ret != 0) {
     257           0 :                         return ret;
     258             :                 }
     259             : 
     260           0 :                 if (cqe == NULL) {
     261           0 :                         return count;
     262             :                 }
     263             : 
     264           0 :                 uring_task = (struct bdev_uring_task *)cqe->user_data;
     265           0 :                 if (cqe->res != (signed)uring_task->len) {
     266           0 :                         status = SPDK_BDEV_IO_STATUS_FAILED;
     267             :                 } else {
     268           0 :                         status = SPDK_BDEV_IO_STATUS_SUCCESS;
     269             :                 }
     270             : 
     271           0 :                 uring_task->ch->group_ch->io_inflight--;
     272           0 :                 io_uring_cqe_seen(ring, cqe);
     273           0 :                 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(uring_task), status);
     274           0 :                 count++;
     275             :         }
     276             : 
     277           0 :         return count;
     278             : }
     279             : 
     280             : static int
     281           0 : bdev_uring_group_poll(void *arg)
     282             : {
     283           0 :         struct bdev_uring_group_channel *group_ch = arg;
     284             :         int to_complete, to_submit;
     285             :         int count, ret;
     286             : 
     287           0 :         to_submit = group_ch->io_pending;
     288             : 
     289           0 :         if (to_submit > 0) {
     290             :                 /* If there are I/O to submit, use io_uring_submit here.
     291             :                  * It will automatically call spdk_io_uring_enter appropriately. */
     292           0 :                 ret = io_uring_submit(&group_ch->uring);
     293           0 :                 if (ret < 0) {
     294           0 :                         return SPDK_POLLER_BUSY;
     295             :                 }
     296             : 
     297           0 :                 group_ch->io_pending = 0;
     298           0 :                 group_ch->io_inflight += to_submit;
     299             :         }
     300             : 
     301           0 :         to_complete = group_ch->io_inflight;
     302           0 :         count = 0;
     303           0 :         if (to_complete > 0) {
     304           0 :                 count = bdev_uring_reap(&group_ch->uring, to_complete);
     305             :         }
     306             : 
     307           0 :         if (count + to_submit > 0) {
     308           0 :                 return SPDK_POLLER_BUSY;
     309             :         } else {
     310           0 :                 return SPDK_POLLER_IDLE;
     311             :         }
     312             : }
     313             : 
     314             : static void
     315           0 : bdev_uring_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
     316             :                       bool success)
     317             : {
     318           0 :         int64_t ret = 0;
     319             : 
     320           0 :         if (!success) {
     321           0 :                 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
     322           0 :                 return;
     323             :         }
     324             : 
     325           0 :         switch (bdev_io->type) {
     326           0 :         case SPDK_BDEV_IO_TYPE_READ:
     327           0 :                 ret = bdev_uring_readv((struct bdev_uring *)bdev_io->bdev->ctxt,
     328             :                                        ch,
     329           0 :                                        (struct bdev_uring_task *)bdev_io->driver_ctx,
     330             :                                        bdev_io->u.bdev.iovs,
     331             :                                        bdev_io->u.bdev.iovcnt,
     332           0 :                                        bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
     333           0 :                                        bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
     334           0 :                 break;
     335           0 :         case SPDK_BDEV_IO_TYPE_WRITE:
     336           0 :                 ret = bdev_uring_writev((struct bdev_uring *)bdev_io->bdev->ctxt,
     337             :                                         ch,
     338           0 :                                         (struct bdev_uring_task *)bdev_io->driver_ctx,
     339             :                                         bdev_io->u.bdev.iovs,
     340             :                                         bdev_io->u.bdev.iovcnt,
     341           0 :                                         bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
     342           0 :                                         bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
     343           0 :                 break;
     344           0 :         default:
     345           0 :                 SPDK_ERRLOG("Wrong io type\n");
     346           0 :                 break;
     347             :         }
     348             : 
     349           0 :         if (ret == -ENOMEM) {
     350           0 :                 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
     351             :         }
     352             : }
     353             : 
     354             : #ifdef SPDK_CONFIG_URING_ZNS
     355             : static int
     356           0 : bdev_uring_fill_zone_type(struct spdk_bdev_zone_info *zone_info, struct blk_zone *zones_rep)
     357             : {
     358           0 :         switch (zones_rep->type) {
     359           0 :         case BLK_ZONE_TYPE_CONVENTIONAL:
     360           0 :                 zone_info->type = SPDK_BDEV_ZONE_TYPE_CNV;
     361           0 :                 break;
     362           0 :         case BLK_ZONE_TYPE_SEQWRITE_REQ:
     363           0 :                 zone_info->type = SPDK_BDEV_ZONE_TYPE_SEQWR;
     364           0 :                 break;
     365           0 :         case BLK_ZONE_TYPE_SEQWRITE_PREF:
     366           0 :                 zone_info->type = SPDK_BDEV_ZONE_TYPE_SEQWP;
     367           0 :                 break;
     368           0 :         default:
     369           0 :                 SPDK_ERRLOG("Invalid zone type: %#x in zone report\n", zones_rep->type);
     370           0 :                 return -EIO;
     371             :         }
     372           0 :         return 0;
     373             : }
     374             : 
     375             : static int
     376           0 : bdev_uring_fill_zone_state(struct spdk_bdev_zone_info *zone_info, struct blk_zone *zones_rep)
     377             : {
     378           0 :         switch (zones_rep->cond) {
     379           0 :         case BLK_ZONE_COND_EMPTY:
     380           0 :                 zone_info->state = SPDK_BDEV_ZONE_STATE_EMPTY;
     381           0 :                 break;
     382           0 :         case BLK_ZONE_COND_IMP_OPEN:
     383           0 :                 zone_info->state = SPDK_BDEV_ZONE_STATE_IMP_OPEN;
     384           0 :                 break;
     385           0 :         case BLK_ZONE_COND_EXP_OPEN:
     386           0 :                 zone_info->state = SPDK_BDEV_ZONE_STATE_EXP_OPEN;
     387           0 :                 break;
     388           0 :         case BLK_ZONE_COND_CLOSED:
     389           0 :                 zone_info->state = SPDK_BDEV_ZONE_STATE_CLOSED;
     390           0 :                 break;
     391           0 :         case BLK_ZONE_COND_READONLY:
     392           0 :                 zone_info->state = SPDK_BDEV_ZONE_STATE_READ_ONLY;
     393           0 :                 break;
     394           0 :         case BLK_ZONE_COND_FULL:
     395           0 :                 zone_info->state = SPDK_BDEV_ZONE_STATE_FULL;
     396           0 :                 break;
     397           0 :         case BLK_ZONE_COND_OFFLINE:
     398           0 :                 zone_info->state = SPDK_BDEV_ZONE_STATE_OFFLINE;
     399           0 :                 break;
     400           0 :         case BLK_ZONE_COND_NOT_WP:
     401           0 :                 zone_info->state = SPDK_BDEV_ZONE_STATE_NOT_WP;
     402           0 :                 break;
     403           0 :         default:
     404           0 :                 SPDK_ERRLOG("Invalid zone state: %#x in zone report\n", zones_rep->cond);
     405           0 :                 return -EIO;
     406             :         }
     407           0 :         return 0;
     408             : }
     409             : 
     410             : static int
     411           0 : bdev_uring_zone_management_op(struct spdk_bdev_io *bdev_io)
     412             : {
     413             :         struct bdev_uring *uring;
     414             :         struct blk_zone_range range;
     415             :         long unsigned zone_mgmt_op;
     416           0 :         uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id;
     417             : 
     418           0 :         uring = (struct bdev_uring *)bdev_io->bdev->ctxt;
     419             : 
     420           0 :         switch (bdev_io->u.zone_mgmt.zone_action) {
     421           0 :         case SPDK_BDEV_ZONE_RESET:
     422           0 :                 zone_mgmt_op = BLKRESETZONE;
     423           0 :                 break;
     424           0 :         case SPDK_BDEV_ZONE_OPEN:
     425           0 :                 zone_mgmt_op = BLKOPENZONE;
     426           0 :                 break;
     427           0 :         case SPDK_BDEV_ZONE_CLOSE:
     428           0 :                 zone_mgmt_op = BLKCLOSEZONE;
     429           0 :                 break;
     430           0 :         case SPDK_BDEV_ZONE_FINISH:
     431           0 :                 zone_mgmt_op = BLKFINISHZONE;
     432           0 :                 break;
     433           0 :         default:
     434           0 :                 return -EINVAL;
     435             :         }
     436             : 
     437           0 :         range.sector = (zone_id << uring->zd.lba_shift);
     438           0 :         range.nr_sectors = (uring->bdev.zone_size << uring->zd.lba_shift);
     439             : 
     440           0 :         if (ioctl(uring->fd, zone_mgmt_op, &range)) {
     441           0 :                 SPDK_ERRLOG("Ioctl BLKXXXZONE(%#x) failed errno: %d(%s)\n",
     442             :                             bdev_io->u.zone_mgmt.zone_action, errno, strerror(errno));
     443           0 :                 return -EINVAL;
     444             :         }
     445             : 
     446           0 :         spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
     447             : 
     448           0 :         return 0;
     449             : }
     450             : 
     451             : static int
     452           0 : bdev_uring_zone_get_info(struct spdk_bdev_io *bdev_io)
     453             : {
     454             :         struct bdev_uring *uring;
     455             :         struct blk_zone *zones;
     456             :         struct blk_zone_report *rep;
     457           0 :         struct spdk_bdev_zone_info *zone_info = bdev_io->u.zone_mgmt.buf;
     458             :         size_t repsize;
     459             :         uint32_t i, shift;
     460           0 :         uint32_t num_zones = bdev_io->u.zone_mgmt.num_zones;
     461           0 :         uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id;
     462             : 
     463           0 :         uring = (struct bdev_uring *)bdev_io->bdev->ctxt;
     464           0 :         shift = uring->zd.lba_shift;
     465             : 
     466           0 :         if ((num_zones > uring->zd.num_zones) || !num_zones) {
     467           0 :                 return -EINVAL;
     468             :         }
     469             : 
     470           0 :         repsize = sizeof(struct blk_zone_report) + (sizeof(struct blk_zone) * num_zones);
     471           0 :         rep = (struct blk_zone_report *)malloc(repsize);
     472           0 :         if (!rep) {
     473           0 :                 return -ENOMEM;
     474             :         }
     475             : 
     476           0 :         zones = (struct blk_zone *)(rep + 1);
     477             : 
     478           0 :         while (num_zones && ((zone_id >> uring->zd.zone_shift) <= num_zones)) {
     479           0 :                 memset(rep, 0, repsize);
     480           0 :                 rep->sector = zone_id;
     481           0 :                 rep->nr_zones = num_zones;
     482             : 
     483           0 :                 if (ioctl(uring->fd, BLKREPORTZONE, rep)) {
     484           0 :                         SPDK_ERRLOG("Ioctl BLKREPORTZONE failed errno: %d(%s)\n",
     485             :                                     errno, strerror(errno));
     486           0 :                         free(rep);
     487           0 :                         return -EINVAL;
     488             :                 }
     489             : 
     490           0 :                 if (!rep->nr_zones) {
     491           0 :                         break;
     492             :                 }
     493             : 
     494           0 :                 for (i = 0; i < rep->nr_zones; i++) {
     495           0 :                         zone_info->zone_id = ((zones + i)->start >> shift);
     496           0 :                         zone_info->write_pointer = ((zones + i)->wp >> shift);
     497           0 :                         zone_info->capacity = ((zones + i)->capacity >> shift);
     498             : 
     499           0 :                         bdev_uring_fill_zone_state(zone_info, zones + i);
     500           0 :                         bdev_uring_fill_zone_type(zone_info, zones + i);
     501             : 
     502           0 :                         zone_id = ((zones + i)->start + (zones + i)->len) >> shift;
     503           0 :                         zone_info++;
     504           0 :                         num_zones--;
     505             :                 }
     506             :         }
     507             : 
     508           0 :         spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
     509           0 :         free(rep);
     510           0 :         return 0;
     511             : }
     512             : 
     513             : static int
     514           0 : bdev_uring_check_zoned_support(struct bdev_uring *uring, const char *name, const char *filename)
     515             : {
     516           0 :         char *filename_dup = NULL, *base;
     517           0 :         char *str = NULL;
     518             :         uint32_t val;
     519             :         uint32_t zinfo;
     520           0 :         int retval = -1;
     521             :         struct stat sb;
     522             :         char resolved_path[PATH_MAX], *rp;
     523           0 :         char *sysfs_path = NULL;
     524             : 
     525           0 :         uring->bdev.zoned = false;
     526             : 
     527             :         /* Follow symlink */
     528           0 :         if ((rp = realpath(filename, resolved_path))) {
     529           0 :                 filename = rp;
     530             :         }
     531             : 
     532             :         /* Perform check on block devices only */
     533           0 :         if (stat(filename, &sb) == 0 && S_ISBLK(sb.st_mode)) {
     534           0 :                 return 0;
     535             :         }
     536             : 
     537             :         /* strdup() because basename() may modify the passed parameter */
     538           0 :         filename_dup = strdup(filename);
     539           0 :         if (filename_dup == NULL) {
     540           0 :                 SPDK_ERRLOG("Could not duplicate string %s\n", filename);
     541           0 :                 return -1;
     542             :         }
     543             : 
     544           0 :         base = basename(filename_dup);
     545           0 :         free(filename_dup);
     546           0 :         sysfs_path = spdk_sprintf_alloc("/sys/block/%s/queue/zoned", base);
     547           0 :         retval = spdk_read_sysfs_attribute(&str, "%s", sysfs_path);
     548             :         /* Check if this is a zoned block device */
     549           0 :         if (retval < 0) {
     550           0 :                 SPDK_ERRLOG("Unable to open file %s. errno: %d\n", sysfs_path, retval);
     551           0 :         } else if (strcmp(str, "host-aware") == 0 || strcmp(str, "host-managed") == 0) {
     552             :                 /* Only host-aware & host-managed zns devices */
     553           0 :                 uring->bdev.zoned = true;
     554             : 
     555           0 :                 if (ioctl(uring->fd, BLKGETNRZONES, &zinfo)) {
     556           0 :                         SPDK_ERRLOG("ioctl BLKNRZONES failed %d (%s)\n", errno, strerror(errno));
     557           0 :                         goto err_ret;
     558             :                 }
     559           0 :                 uring->zd.num_zones = zinfo;
     560             : 
     561           0 :                 if (ioctl(uring->fd, BLKGETZONESZ, &zinfo)) {
     562           0 :                         SPDK_ERRLOG("ioctl BLKGETZONESZ failed %d (%s)\n", errno, strerror(errno));
     563           0 :                         goto err_ret;
     564             :                 }
     565             : 
     566           0 :                 uring->zd.lba_shift = uring->bdev.required_alignment - SECTOR_SHIFT;
     567           0 :                 uring->bdev.zone_size = (zinfo >> uring->zd.lba_shift);
     568           0 :                 uring->zd.zone_shift = spdk_u32log2(zinfo >> uring->zd.lba_shift);
     569             : 
     570           0 :                 retval = spdk_read_sysfs_attribute_uint32(&val, "/sys/block/%s/queue/max_open_zones", base);
     571           0 :                 if (retval < 0) {
     572           0 :                         SPDK_ERRLOG("Failed to get max open zones %d (%s)\n", retval, strerror(-retval));
     573           0 :                         goto err_ret;
     574             :                 }
     575           0 :                 uring->bdev.max_open_zones = uring->bdev.optimal_open_zones = val;
     576             : 
     577           0 :                 retval = spdk_read_sysfs_attribute_uint32(&val, "/sys/block/%s/queue/max_active_zones", base);
     578           0 :                 if (retval < 0) {
     579           0 :                         SPDK_ERRLOG("Failed to get max active zones %d (%s)\n", retval, strerror(-retval));
     580           0 :                         goto err_ret;
     581             :                 }
     582           0 :                 uring->bdev.max_active_zones = val;
     583           0 :                 retval = 0;
     584             :         } else {
     585           0 :                 retval = 0;        /* queue/zoned=none */
     586             :         }
     587           0 : err_ret:
     588           0 :         free(str);
     589           0 :         free(sysfs_path);
     590           0 :         return retval;
     591             : }
     592             : #else
     593             : /* No support for zoned devices */
     594             : static int
     595             : bdev_uring_zone_management_op(struct spdk_bdev_io *bdev_io)
     596             : {
     597             :         return -1;
     598             : }
     599             : 
     600             : static int
     601             : bdev_uring_zone_get_info(struct spdk_bdev_io *bdev_io)
     602             : {
     603             :         return -1;
     604             : }
     605             : 
     606             : static int
     607             : bdev_uring_check_zoned_support(struct bdev_uring *uring, const char *name, const char *filename)
     608             : {
     609             :         return 0;
     610             : }
     611             : #endif
     612             : 
     613             : static int
     614           0 : _bdev_uring_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
     615             : {
     616             : 
     617           0 :         switch (bdev_io->type) {
     618           0 :         case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
     619           0 :                 return bdev_uring_zone_get_info(bdev_io);
     620           0 :         case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
     621           0 :                 return bdev_uring_zone_management_op(bdev_io);
     622             :         /* Read and write operations must be performed on buffers aligned to
     623             :          * bdev->required_alignment. If user specified unaligned buffers,
     624             :          * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */
     625           0 :         case SPDK_BDEV_IO_TYPE_READ:
     626             :         case SPDK_BDEV_IO_TYPE_WRITE:
     627           0 :                 spdk_bdev_io_get_buf(bdev_io, bdev_uring_get_buf_cb,
     628           0 :                                      bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
     629           0 :                 return 0;
     630           0 :         default:
     631           0 :                 return -1;
     632             :         }
     633             : }
     634             : 
     635             : static void
     636           0 : bdev_uring_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
     637             : {
     638           0 :         if (_bdev_uring_submit_request(ch, bdev_io) < 0) {
     639           0 :                 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
     640             :         }
     641           0 : }
     642             : 
     643             : static bool
     644           0 : bdev_uring_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
     645             : {
     646           0 :         switch (io_type) {
     647             : #ifdef SPDK_CONFIG_URING_ZNS
     648           0 :         case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
     649             :         case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
     650             : #endif
     651             :         case SPDK_BDEV_IO_TYPE_READ:
     652             :         case SPDK_BDEV_IO_TYPE_WRITE:
     653           0 :                 return true;
     654           0 :         default:
     655           0 :                 return false;
     656             :         }
     657             : }
     658             : 
     659             : static int
     660           0 : bdev_uring_create_cb(void *io_device, void *ctx_buf)
     661             : {
     662           0 :         struct bdev_uring_io_channel *ch = ctx_buf;
     663             : 
     664           0 :         ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&uring_if));
     665             : 
     666           0 :         return 0;
     667             : }
     668             : 
     669             : static void
     670           0 : bdev_uring_destroy_cb(void *io_device, void *ctx_buf)
     671             : {
     672           0 :         struct bdev_uring_io_channel *ch = ctx_buf;
     673             : 
     674           0 :         spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch));
     675           0 : }
     676             : 
     677             : static struct spdk_io_channel *
     678           0 : bdev_uring_get_io_channel(void *ctx)
     679             : {
     680           0 :         struct bdev_uring *uring = ctx;
     681             : 
     682           0 :         return spdk_get_io_channel(uring);
     683             : }
     684             : 
     685             : static int
     686           0 : bdev_uring_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
     687             : {
     688           0 :         struct bdev_uring *uring = ctx;
     689             : 
     690           0 :         spdk_json_write_named_object_begin(w, "uring");
     691             : 
     692           0 :         spdk_json_write_named_string(w, "filename", uring->filename);
     693             : 
     694           0 :         spdk_json_write_object_end(w);
     695             : 
     696           0 :         return 0;
     697             : }
     698             : 
     699             : static void
     700           0 : bdev_uring_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
     701             : {
     702           0 :         struct bdev_uring *uring = bdev->ctxt;
     703             :         char uuid_str[SPDK_UUID_STRING_LEN];
     704             : 
     705           0 :         spdk_json_write_object_begin(w);
     706             : 
     707           0 :         spdk_json_write_named_string(w, "method", "bdev_uring_create");
     708             : 
     709           0 :         spdk_json_write_named_object_begin(w, "params");
     710           0 :         spdk_json_write_named_string(w, "name", bdev->name);
     711           0 :         spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
     712           0 :         spdk_json_write_named_string(w, "filename", uring->filename);
     713           0 :         spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &bdev->uuid);
     714           0 :         spdk_json_write_named_string(w, "uuid", uuid_str);
     715           0 :         spdk_json_write_object_end(w);
     716             : 
     717           0 :         spdk_json_write_object_end(w);
     718           0 : }
     719             : 
     720             : static const struct spdk_bdev_fn_table uring_fn_table = {
     721             :         .destruct               = bdev_uring_destruct,
     722             :         .submit_request         = bdev_uring_submit_request,
     723             :         .io_type_supported      = bdev_uring_io_type_supported,
     724             :         .get_io_channel         = bdev_uring_get_io_channel,
     725             :         .dump_info_json         = bdev_uring_dump_info_json,
     726             :         .write_config_json      = bdev_uring_write_json_config,
     727             : };
     728             : 
     729             : static void
     730           0 : uring_free_bdev(struct bdev_uring *uring)
     731             : {
     732           0 :         if (uring == NULL) {
     733           0 :                 return;
     734             :         }
     735           0 :         free(uring->filename);
     736           0 :         free(uring->bdev.name);
     737           0 :         free(uring);
     738             : }
     739             : 
     740             : static int
     741           0 : bdev_uring_group_create_cb(void *io_device, void *ctx_buf)
     742             : {
     743           0 :         struct bdev_uring_group_channel *ch = ctx_buf;
     744             : 
     745             :         /* Do not use IORING_SETUP_IOPOLL until the Linux kernel can support not only
     746             :          * local devices but also devices attached from remote target */
     747           0 :         if (io_uring_queue_init(SPDK_URING_QUEUE_DEPTH, &ch->uring, 0) < 0) {
     748           0 :                 SPDK_ERRLOG("uring I/O context setup failure\n");
     749           0 :                 return -1;
     750             :         }
     751             : 
     752           0 :         ch->poller = SPDK_POLLER_REGISTER(bdev_uring_group_poll, ch, 0);
     753           0 :         return 0;
     754             : }
     755             : 
     756             : static void
     757           0 : bdev_uring_group_destroy_cb(void *io_device, void *ctx_buf)
     758             : {
     759           0 :         struct bdev_uring_group_channel *ch = ctx_buf;
     760             : 
     761           0 :         io_uring_queue_exit(&ch->uring);
     762             : 
     763           0 :         spdk_poller_unregister(&ch->poller);
     764           0 : }
     765             : 
     766             : struct spdk_bdev *
     767           0 : create_uring_bdev(const struct bdev_uring_opts *opts)
     768             : {
     769             :         struct bdev_uring *uring;
     770             :         uint32_t detected_block_size;
     771             :         uint64_t bdev_size;
     772             :         int rc;
     773           0 :         uint32_t block_size = opts->block_size;
     774             : 
     775           0 :         uring = calloc(1, sizeof(*uring));
     776           0 :         if (!uring) {
     777           0 :                 SPDK_ERRLOG("Unable to allocate enough memory for uring backend\n");
     778           0 :                 return NULL;
     779             :         }
     780             : 
     781           0 :         uring->filename = strdup(opts->filename);
     782           0 :         if (!uring->filename) {
     783           0 :                 goto error_return;
     784             :         }
     785             : 
     786           0 :         if (bdev_uring_open(uring)) {
     787           0 :                 SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", opts->filename, uring->fd, errno);
     788           0 :                 goto error_return;
     789             :         }
     790             : 
     791           0 :         bdev_size = spdk_fd_get_size(uring->fd);
     792             : 
     793           0 :         uring->bdev.name = strdup(opts->name);
     794           0 :         if (!uring->bdev.name) {
     795           0 :                 goto error_return;
     796             :         }
     797           0 :         uring->bdev.product_name = "URING bdev";
     798           0 :         uring->bdev.module = &uring_if;
     799             : 
     800           0 :         uring->bdev.write_cache = 0;
     801             : 
     802           0 :         detected_block_size = spdk_fd_get_blocklen(uring->fd);
     803           0 :         if (block_size == 0) {
     804             :                 /* User did not specify block size - use autodetected block size. */
     805           0 :                 if (detected_block_size == 0) {
     806           0 :                         SPDK_ERRLOG("Block size could not be auto-detected\n");
     807           0 :                         goto error_return;
     808             :                 }
     809           0 :                 block_size = detected_block_size;
     810             :         } else {
     811           0 :                 if (block_size < detected_block_size) {
     812           0 :                         SPDK_ERRLOG("Specified block size %" PRIu32 " is smaller than "
     813             :                                     "auto-detected block size %" PRIu32 "\n",
     814             :                                     block_size, detected_block_size);
     815           0 :                         goto error_return;
     816           0 :                 } else if (detected_block_size != 0 && block_size != detected_block_size) {
     817           0 :                         SPDK_WARNLOG("Specified block size %" PRIu32 " does not match "
     818             :                                      "auto-detected block size %" PRIu32 "\n",
     819             :                                      block_size, detected_block_size);
     820             :                 }
     821             :         }
     822             : 
     823           0 :         if (block_size < 512) {
     824           0 :                 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size);
     825           0 :                 goto error_return;
     826             :         }
     827             : 
     828           0 :         if (!spdk_u32_is_pow2(block_size)) {
     829           0 :                 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size);
     830           0 :                 goto error_return;
     831             :         }
     832             : 
     833           0 :         uring->bdev.blocklen = block_size;
     834           0 :         uring->bdev.required_alignment = spdk_u32log2(block_size);
     835             : 
     836           0 :         rc = bdev_uring_check_zoned_support(uring, opts->name, opts->filename);
     837           0 :         if (rc) {
     838           0 :                 goto error_return;
     839             :         }
     840             : 
     841           0 :         if (bdev_size % uring->bdev.blocklen != 0) {
     842           0 :                 SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n",
     843             :                             bdev_size, uring->bdev.blocklen);
     844           0 :                 goto error_return;
     845             :         }
     846             : 
     847           0 :         uring->bdev.blockcnt = bdev_size / uring->bdev.blocklen;
     848           0 :         uring->bdev.ctxt = uring;
     849             : 
     850           0 :         uring->bdev.fn_table = &uring_fn_table;
     851             : 
     852           0 :         if (!spdk_mem_all_zero(&opts->uuid, sizeof(opts->uuid))) {
     853           0 :                 spdk_uuid_copy(&uring->bdev.uuid, &opts->uuid);
     854             :         }
     855             : 
     856           0 :         spdk_io_device_register(uring, bdev_uring_create_cb, bdev_uring_destroy_cb,
     857             :                                 sizeof(struct bdev_uring_io_channel),
     858           0 :                                 uring->bdev.name);
     859           0 :         rc = spdk_bdev_register(&uring->bdev);
     860           0 :         if (rc) {
     861           0 :                 spdk_io_device_unregister(uring, NULL);
     862           0 :                 goto error_return;
     863             :         }
     864             : 
     865           0 :         TAILQ_INSERT_TAIL(&g_uring_bdev_head, uring, link);
     866           0 :         return &uring->bdev;
     867             : 
     868           0 : error_return:
     869           0 :         bdev_uring_close(uring);
     870           0 :         uring_free_bdev(uring);
     871           0 :         return NULL;
     872             : }
     873             : 
     874             : struct delete_uring_bdev_ctx {
     875             :         spdk_delete_uring_complete cb_fn;
     876             :         void *cb_arg;
     877             : };
     878             : 
     879             : static void
     880           0 : uring_bdev_unregister_cb(void *arg, int bdeverrno)
     881             : {
     882           0 :         struct delete_uring_bdev_ctx *ctx = arg;
     883             : 
     884           0 :         ctx->cb_fn(ctx->cb_arg, bdeverrno);
     885           0 :         free(ctx);
     886           0 : }
     887             : 
     888             : void
     889           0 : delete_uring_bdev(const char *name, spdk_delete_uring_complete cb_fn, void *cb_arg)
     890             : {
     891             :         struct delete_uring_bdev_ctx *ctx;
     892             :         int rc;
     893             : 
     894           0 :         ctx = calloc(1, sizeof(*ctx));
     895           0 :         if (ctx == NULL) {
     896           0 :                 cb_fn(cb_arg, -ENOMEM);
     897           0 :                 return;
     898             :         }
     899             : 
     900           0 :         ctx->cb_fn = cb_fn;
     901           0 :         ctx->cb_arg = cb_arg;
     902           0 :         rc = spdk_bdev_unregister_by_name(name, &uring_if, uring_bdev_unregister_cb, ctx);
     903           0 :         if (rc != 0) {
     904           0 :                 uring_bdev_unregister_cb(ctx, rc);
     905             :         }
     906             : }
     907             : 
     908             : static int
     909           0 : bdev_uring_init(void)
     910             : {
     911           0 :         spdk_io_device_register(&uring_if, bdev_uring_group_create_cb, bdev_uring_group_destroy_cb,
     912             :                                 sizeof(struct bdev_uring_group_channel), "uring_module");
     913             : 
     914           0 :         return 0;
     915             : }
     916             : 
     917             : static void
     918           0 : bdev_uring_fini(void)
     919             : {
     920           0 :         spdk_io_device_unregister(&uring_if, NULL);
     921           0 : }
     922             : 
     923           0 : SPDK_LOG_REGISTER_COMPONENT(uring)

Generated by: LCOV version 1.15