LCOV - code coverage report
Current view: top level - module/bdev/aio - bdev_aio.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 0 402 0.0 %
Date: 2024-07-14 18:22:36 Functions: 0 42 0.0 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2017 Intel Corporation.
       3             :  *   All rights reserved.
       4             :  *   Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
       5             :  */
       6             : 
       7             : #include "bdev_aio.h"
       8             : 
       9             : #include "spdk/stdinc.h"
      10             : 
      11             : #include "spdk/barrier.h"
      12             : #include "spdk/bdev.h"
      13             : #include "spdk/bdev_module.h"
      14             : #include "spdk/env.h"
      15             : #include "spdk/fd.h"
      16             : #include "spdk/likely.h"
      17             : #include "spdk/thread.h"
      18             : #include "spdk/json.h"
      19             : #include "spdk/util.h"
      20             : #include "spdk/string.h"
      21             : 
      22             : #include "spdk/log.h"
      23             : 
      24             : #include <sys/eventfd.h>
      25             : 
      26             : #ifndef __FreeBSD__
      27             : #include <libaio.h>
      28             : #endif
      29             : 
      30             : struct bdev_aio_io_channel {
      31             :         uint64_t                                io_inflight;
      32             : #ifdef __FreeBSD__
      33             :         int                                     kqfd;
      34             : #else
      35             :         io_context_t                            io_ctx;
      36             : #endif
      37             :         struct bdev_aio_group_channel           *group_ch;
      38             :         TAILQ_ENTRY(bdev_aio_io_channel)        link;
      39             : };
      40             : 
      41             : struct bdev_aio_group_channel {
      42             :         /* eventfd for io completion notification in interrupt mode.
      43             :          * Negative value like '-1' indicates it is invalid or unused.
      44             :          */
      45             :         int                                     efd;
      46             :         struct spdk_interrupt                   *intr;
      47             :         struct spdk_poller                      *poller;
      48             :         TAILQ_HEAD(, bdev_aio_io_channel)       io_ch_head;
      49             : };
      50             : 
      51             : struct bdev_aio_task {
      52             : #ifdef __FreeBSD__
      53             :         struct aiocb                    aiocb;
      54             : #else
      55             :         struct iocb                     iocb;
      56             : #endif
      57             :         uint64_t                        len;
      58             :         struct bdev_aio_io_channel      *ch;
      59             : };
      60             : 
      61             : struct file_disk {
      62             :         struct bdev_aio_task    *reset_task;
      63             :         struct spdk_poller      *reset_retry_timer;
      64             :         struct spdk_bdev        disk;
      65             :         char                    *filename;
      66             :         int                     fd;
      67             :         TAILQ_ENTRY(file_disk)  link;
      68             :         bool                    block_size_override;
      69             :         bool                    readonly;
      70             : };
      71             : 
      72             : /* For user space reaping of completions */
      73             : struct spdk_aio_ring {
      74             :         uint32_t id;
      75             :         uint32_t size;
      76             :         uint32_t head;
      77             :         uint32_t tail;
      78             : 
      79             :         uint32_t version;
      80             :         uint32_t compat_features;
      81             :         uint32_t incompat_features;
      82             :         uint32_t header_length;
      83             : };
      84             : 
      85             : #define SPDK_AIO_RING_VERSION   0xa10a10a1
      86             : 
      87             : static int bdev_aio_initialize(void);
      88             : static void bdev_aio_fini(void);
      89             : static void aio_free_disk(struct file_disk *fdisk);
      90             : static TAILQ_HEAD(, file_disk) g_aio_disk_head = TAILQ_HEAD_INITIALIZER(g_aio_disk_head);
      91             : 
      92             : #define SPDK_AIO_QUEUE_DEPTH 128
      93             : #define MAX_EVENTS_PER_POLL 32
      94             : 
      95             : static int
      96           0 : bdev_aio_get_ctx_size(void)
      97             : {
      98           0 :         return sizeof(struct bdev_aio_task);
      99             : }
     100             : 
     101             : static struct spdk_bdev_module aio_if = {
     102             :         .name           = "aio",
     103             :         .module_init    = bdev_aio_initialize,
     104             :         .module_fini    = bdev_aio_fini,
     105             :         .get_ctx_size   = bdev_aio_get_ctx_size,
     106             : };
     107             : 
     108           0 : SPDK_BDEV_MODULE_REGISTER(aio, &aio_if)
     109             : 
     110             : static int
     111           0 : bdev_aio_open(struct file_disk *disk)
     112             : {
     113             :         int fd;
     114           0 :         int io_flag = disk->readonly ? O_RDONLY : O_RDWR;
     115             : 
     116           0 :         fd = open(disk->filename, io_flag | O_DIRECT);
     117           0 :         if (fd < 0) {
     118             :                 /* Try without O_DIRECT for non-disk files */
     119           0 :                 fd = open(disk->filename, io_flag);
     120           0 :                 if (fd < 0) {
     121           0 :                         SPDK_ERRLOG("open() failed (file:%s), errno %d: %s\n",
     122             :                                     disk->filename, errno, spdk_strerror(errno));
     123           0 :                         disk->fd = -1;
     124           0 :                         return -1;
     125             :                 }
     126             :         }
     127             : 
     128           0 :         disk->fd = fd;
     129             : 
     130           0 :         return 0;
     131             : }
     132             : 
     133             : static int
     134           0 : bdev_aio_close(struct file_disk *disk)
     135             : {
     136             :         int rc;
     137             : 
     138           0 :         if (disk->fd == -1) {
     139           0 :                 return 0;
     140             :         }
     141             : 
     142           0 :         rc = close(disk->fd);
     143           0 :         if (rc < 0) {
     144           0 :                 SPDK_ERRLOG("close() failed (fd=%d), errno %d: %s\n",
     145             :                             disk->fd, errno, spdk_strerror(errno));
     146           0 :                 return -1;
     147             :         }
     148             : 
     149           0 :         disk->fd = -1;
     150             : 
     151           0 :         return 0;
     152             : }
     153             : 
     154             : #ifdef __FreeBSD__
     155             : static int
     156             : bdev_aio_submit_io(enum spdk_bdev_io_type type, struct file_disk *fdisk,
     157             :                    struct spdk_io_channel *ch, struct bdev_aio_task *aio_task,
     158             :                    struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset)
     159             : {
     160             :         struct aiocb *aiocb = &aio_task->aiocb;
     161             :         struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
     162             : 
     163             :         memset(aiocb, 0, sizeof(struct aiocb));
     164             :         aiocb->aio_fildes = fdisk->fd;
     165             :         aiocb->aio_iov = iov;
     166             :         aiocb->aio_iovcnt = iovcnt;
     167             :         aiocb->aio_offset = offset;
     168             :         aiocb->aio_sigevent.sigev_notify_kqueue = aio_ch->kqfd;
     169             :         aiocb->aio_sigevent.sigev_value.sival_ptr = aio_task;
     170             :         aiocb->aio_sigevent.sigev_notify = SIGEV_KEVENT;
     171             : 
     172             :         aio_task->len = nbytes;
     173             :         aio_task->ch = aio_ch;
     174             : 
     175             :         if (type == SPDK_BDEV_IO_TYPE_READ) {
     176             :                 return aio_readv(aiocb);
     177             :         }
     178             : 
     179             :         return aio_writev(aiocb);
     180             : }
     181             : #else
     182             : static int
     183           0 : bdev_aio_submit_io(enum spdk_bdev_io_type type, struct file_disk *fdisk,
     184             :                    struct spdk_io_channel *ch, struct bdev_aio_task *aio_task,
     185             :                    struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset)
     186             : {
     187           0 :         struct iocb *iocb = &aio_task->iocb;
     188           0 :         struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
     189             : 
     190           0 :         if (type == SPDK_BDEV_IO_TYPE_READ) {
     191           0 :                 io_prep_preadv(iocb, fdisk->fd, iov, iovcnt, offset);
     192             :         } else {
     193           0 :                 io_prep_pwritev(iocb, fdisk->fd, iov, iovcnt, offset);
     194             :         }
     195             : 
     196           0 :         if (aio_ch->group_ch->efd >= 0) {
     197           0 :                 io_set_eventfd(iocb, aio_ch->group_ch->efd);
     198             :         }
     199           0 :         iocb->data = aio_task;
     200           0 :         aio_task->len = nbytes;
     201           0 :         aio_task->ch = aio_ch;
     202             : 
     203           0 :         return io_submit(aio_ch->io_ctx, 1, &iocb);
     204             : }
     205             : #endif
     206             : 
     207             : static void
     208           0 : bdev_aio_rw(enum spdk_bdev_io_type type, struct file_disk *fdisk,
     209             :             struct spdk_io_channel *ch, struct bdev_aio_task *aio_task,
     210             :             struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset)
     211             : {
     212           0 :         struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
     213             :         int rc;
     214             : 
     215           0 :         if (type == SPDK_BDEV_IO_TYPE_READ) {
     216           0 :                 SPDK_DEBUGLOG(aio, "read %d iovs size %lu to off: %#lx\n",
     217             :                               iovcnt, nbytes, offset);
     218             :         } else {
     219           0 :                 SPDK_DEBUGLOG(aio, "write %d iovs size %lu from off: %#lx\n",
     220             :                               iovcnt, nbytes, offset);
     221             :         }
     222             : 
     223           0 :         rc = bdev_aio_submit_io(type, fdisk, ch, aio_task, iov, iovcnt, nbytes, offset);
     224           0 :         if (spdk_unlikely(rc < 0)) {
     225           0 :                 if (rc == -EAGAIN) {
     226           0 :                         spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM);
     227             :                 } else {
     228           0 :                         spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), rc);
     229           0 :                         SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc);
     230             :                 }
     231             :         } else {
     232           0 :                 aio_ch->io_inflight++;
     233             :         }
     234           0 : }
     235             : 
     236             : static void
     237           0 : bdev_aio_flush(struct file_disk *fdisk, struct bdev_aio_task *aio_task)
     238             : {
     239           0 :         int rc = fsync(fdisk->fd);
     240             : 
     241           0 :         if (rc == 0) {
     242           0 :                 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_SUCCESS);
     243             :         } else {
     244           0 :                 spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), -errno);
     245             :         }
     246           0 : }
     247             : 
     248             : static void
     249           0 : bdev_aio_destruct_cb(void *io_device)
     250             : {
     251           0 :         struct file_disk *fdisk = io_device;
     252           0 :         int rc = 0;
     253             : 
     254           0 :         TAILQ_REMOVE(&g_aio_disk_head, fdisk, link);
     255           0 :         rc = bdev_aio_close(fdisk);
     256           0 :         if (rc < 0) {
     257           0 :                 SPDK_ERRLOG("bdev_aio_close() failed\n");
     258             :         }
     259           0 :         aio_free_disk(fdisk);
     260           0 : }
     261             : 
     262             : static int
     263           0 : bdev_aio_destruct(void *ctx)
     264             : {
     265           0 :         struct file_disk *fdisk = ctx;
     266             : 
     267           0 :         spdk_io_device_unregister(fdisk, bdev_aio_destruct_cb);
     268             : 
     269           0 :         return 0;
     270             : }
     271             : 
     272             : #ifdef __FreeBSD__
     273             : static int
     274             : bdev_user_io_getevents(int kq, unsigned int max, struct kevent *events)
     275             : {
     276             :         struct timespec ts;
     277             :         int count;
     278             : 
     279             :         memset(events, 0, max * sizeof(struct kevent));
     280             :         memset(&ts, 0, sizeof(ts));
     281             : 
     282             :         count = kevent(kq, NULL, 0, events, max, &ts);
     283             :         if (count < 0) {
     284             :                 SPDK_ERRLOG("failed to get kevents: %s.\n", spdk_strerror(errno));
     285             :                 return -errno;
     286             :         }
     287             : 
     288             :         return count;
     289             : }
     290             : 
     291             : static int
     292             : bdev_aio_io_channel_poll(struct bdev_aio_io_channel *io_ch)
     293             : {
     294             :         int nr, i, res = 0;
     295             :         struct bdev_aio_task *aio_task;
     296             :         struct kevent events[SPDK_AIO_QUEUE_DEPTH];
     297             : 
     298             :         nr = bdev_user_io_getevents(io_ch->kqfd, SPDK_AIO_QUEUE_DEPTH, events);
     299             :         if (nr < 0) {
     300             :                 return 0;
     301             :         }
     302             : 
     303             :         for (i = 0; i < nr; i++) {
     304             :                 aio_task = events[i].udata;
     305             :                 aio_task->ch->io_inflight--;
     306             :                 if (aio_task == NULL) {
     307             :                         spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED);
     308             :                         break;
     309             :                 } else if ((uint64_t)aio_return(&aio_task->aiocb) == aio_task->len) {
     310             :                         spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_SUCCESS);
     311             :                 } else {
     312             :                         SPDK_ERRLOG("failed to complete aio: rc %d\n", aio_error(&aio_task->aiocb));
     313             :                         res = aio_error(&aio_task->aiocb);
     314             :                         if (res != 0) {
     315             :                                 spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), res);
     316             :                         } else {
     317             :                                 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED);
     318             :                         }
     319             :                 }
     320             :         }
     321             : 
     322             :         return nr;
     323             : }
     324             : #else
     325             : static int
     326           0 : bdev_user_io_getevents(io_context_t io_ctx, unsigned int max, struct io_event *uevents)
     327             : {
     328             :         uint32_t head, tail, count;
     329             :         struct spdk_aio_ring *ring;
     330           0 :         struct timespec timeout;
     331             :         struct io_event *kevents;
     332             : 
     333           0 :         ring = (struct spdk_aio_ring *)io_ctx;
     334             : 
     335           0 :         if (spdk_unlikely(ring->version != SPDK_AIO_RING_VERSION || ring->incompat_features != 0)) {
     336           0 :                 timeout.tv_sec = 0;
     337           0 :                 timeout.tv_nsec = 0;
     338             : 
     339           0 :                 return io_getevents(io_ctx, 0, max, uevents, &timeout);
     340             :         }
     341             : 
     342             :         /* Read the current state out of the ring */
     343           0 :         head = ring->head;
     344           0 :         tail = ring->tail;
     345             : 
     346             :         /* This memory barrier is required to prevent the loads above
     347             :          * from being re-ordered with stores to the events array
     348             :          * potentially occurring on other threads. */
     349           0 :         spdk_smp_rmb();
     350             : 
     351             :         /* Calculate how many items are in the circular ring */
     352           0 :         count = tail - head;
     353           0 :         if (tail < head) {
     354           0 :                 count += ring->size;
     355             :         }
     356             : 
     357             :         /* Reduce the count to the limit provided by the user */
     358           0 :         count = spdk_min(max, count);
     359             : 
     360             :         /* Grab the memory location of the event array */
     361           0 :         kevents = (struct io_event *)((uintptr_t)ring + ring->header_length);
     362             : 
     363             :         /* Copy the events out of the ring. */
     364           0 :         if ((head + count) <= ring->size) {
     365             :                 /* Only one copy is required */
     366           0 :                 memcpy(uevents, &kevents[head], count * sizeof(struct io_event));
     367             :         } else {
     368           0 :                 uint32_t first_part = ring->size - head;
     369             :                 /* Two copies are required */
     370           0 :                 memcpy(uevents, &kevents[head], first_part * sizeof(struct io_event));
     371           0 :                 memcpy(&uevents[first_part], &kevents[0], (count - first_part) * sizeof(struct io_event));
     372             :         }
     373             : 
     374             :         /* Update the head pointer. On x86, stores will not be reordered with older loads,
     375             :          * so the copies out of the event array will always be complete prior to this
     376             :          * update becoming visible. On other architectures this is not guaranteed, so
     377             :          * add a barrier. */
     378             : #if defined(__i386__) || defined(__x86_64__)
     379           0 :         spdk_compiler_barrier();
     380             : #else
     381             :         spdk_smp_mb();
     382             : #endif
     383           0 :         ring->head = (head + count) % ring->size;
     384             : 
     385           0 :         return count;
     386             : }
     387             : 
     388             : static int
     389           0 : bdev_aio_io_channel_poll(struct bdev_aio_io_channel *io_ch)
     390             : {
     391           0 :         int nr, i, res = 0;
     392             :         struct bdev_aio_task *aio_task;
     393           0 :         struct io_event events[SPDK_AIO_QUEUE_DEPTH];
     394             : 
     395           0 :         nr = bdev_user_io_getevents(io_ch->io_ctx, SPDK_AIO_QUEUE_DEPTH, events);
     396           0 :         if (nr < 0) {
     397           0 :                 return 0;
     398             :         }
     399             : 
     400           0 :         for (i = 0; i < nr; i++) {
     401           0 :                 aio_task = events[i].data;
     402           0 :                 aio_task->ch->io_inflight--;
     403           0 :                 if (events[i].res == aio_task->len) {
     404           0 :                         spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_SUCCESS);
     405             :                 } else {
     406             :                         /* From aio_abi.h, io_event.res is defined __s64, negative errno
     407             :                          * will be assigned to io_event.res for error situation.
     408             :                          * But from libaio.h, io_event.res is defined unsigned long, so
     409             :                          * convert it to signed value for error detection.
     410             :                          */
     411           0 :                         SPDK_ERRLOG("failed to complete aio: rc %"PRId64"\n", events[i].res);
     412           0 :                         res = (int)events[i].res;
     413           0 :                         if (res < 0) {
     414           0 :                                 spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), res);
     415             :                         } else {
     416           0 :                                 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED);
     417             :                         }
     418             :                 }
     419             :         }
     420             : 
     421           0 :         return nr;
     422             : }
     423             : #endif
     424             : 
     425             : static int
     426           0 : bdev_aio_group_poll(void *arg)
     427             : {
     428           0 :         struct bdev_aio_group_channel *group_ch = arg;
     429             :         struct bdev_aio_io_channel *io_ch;
     430           0 :         int nr = 0;
     431             : 
     432           0 :         TAILQ_FOREACH(io_ch, &group_ch->io_ch_head, link) {
     433           0 :                 nr += bdev_aio_io_channel_poll(io_ch);
     434             :         }
     435             : 
     436           0 :         return nr > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
     437             : }
     438             : 
     439             : static int
     440           0 : bdev_aio_group_interrupt(void *arg)
     441             : {
     442           0 :         struct bdev_aio_group_channel *group_ch = arg;
     443             :         int rc;
     444           0 :         uint64_t num_events;
     445             : 
     446           0 :         assert(group_ch->efd >= 0);
     447             : 
     448             :         /* if completed IO number is larger than SPDK_AIO_QUEUE_DEPTH,
     449             :          * io_getevent should be called again to ensure all completed IO are processed.
     450             :          */
     451           0 :         rc = read(group_ch->efd, &num_events, sizeof(num_events));
     452           0 :         if (rc < 0) {
     453           0 :                 SPDK_ERRLOG("failed to acknowledge aio group: %s.\n", spdk_strerror(errno));
     454           0 :                 return -errno;
     455             :         }
     456             : 
     457           0 :         if (num_events > SPDK_AIO_QUEUE_DEPTH) {
     458           0 :                 num_events -= SPDK_AIO_QUEUE_DEPTH;
     459           0 :                 rc = write(group_ch->efd, &num_events, sizeof(num_events));
     460           0 :                 if (rc < 0) {
     461           0 :                         SPDK_ERRLOG("failed to notify aio group: %s.\n", spdk_strerror(errno));
     462             :                 }
     463             :         }
     464             : 
     465           0 :         return bdev_aio_group_poll(group_ch);
     466             : }
     467             : 
     468             : static void
     469           0 : _bdev_aio_get_io_inflight(struct spdk_io_channel_iter *i)
     470             : {
     471           0 :         struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
     472           0 :         struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
     473             : 
     474           0 :         if (aio_ch->io_inflight) {
     475           0 :                 spdk_for_each_channel_continue(i, -1);
     476           0 :                 return;
     477             :         }
     478             : 
     479           0 :         spdk_for_each_channel_continue(i, 0);
     480             : }
     481             : 
     482             : static int bdev_aio_reset_retry_timer(void *arg);
     483             : 
     484             : static void
     485           0 : _bdev_aio_get_io_inflight_done(struct spdk_io_channel_iter *i, int status)
     486             : {
     487           0 :         struct file_disk *fdisk = spdk_io_channel_iter_get_ctx(i);
     488             : 
     489           0 :         if (status == -1) {
     490           0 :                 fdisk->reset_retry_timer = SPDK_POLLER_REGISTER(bdev_aio_reset_retry_timer, fdisk, 500);
     491           0 :                 return;
     492             :         }
     493             : 
     494           0 :         spdk_bdev_io_complete(spdk_bdev_io_from_ctx(fdisk->reset_task), SPDK_BDEV_IO_STATUS_SUCCESS);
     495             : }
     496             : 
     497             : static int
     498           0 : bdev_aio_reset_retry_timer(void *arg)
     499             : {
     500           0 :         struct file_disk *fdisk = arg;
     501             : 
     502           0 :         if (fdisk->reset_retry_timer) {
     503           0 :                 spdk_poller_unregister(&fdisk->reset_retry_timer);
     504             :         }
     505             : 
     506           0 :         spdk_for_each_channel(fdisk,
     507             :                               _bdev_aio_get_io_inflight,
     508             :                               fdisk,
     509             :                               _bdev_aio_get_io_inflight_done);
     510             : 
     511           0 :         return SPDK_POLLER_BUSY;
     512             : }
     513             : 
     514             : static void
     515           0 : bdev_aio_reset(struct file_disk *fdisk, struct bdev_aio_task *aio_task)
     516             : {
     517           0 :         fdisk->reset_task = aio_task;
     518             : 
     519           0 :         bdev_aio_reset_retry_timer(fdisk);
     520           0 : }
     521             : 
     522             : static void
     523           0 : bdev_aio_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
     524             :                     bool success)
     525             : {
     526           0 :         if (!success) {
     527           0 :                 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
     528           0 :                 return;
     529             :         }
     530             : 
     531           0 :         switch (bdev_io->type) {
     532           0 :         case SPDK_BDEV_IO_TYPE_READ:
     533             :         case SPDK_BDEV_IO_TYPE_WRITE:
     534           0 :                 bdev_aio_rw(bdev_io->type,
     535           0 :                             (struct file_disk *)bdev_io->bdev->ctxt,
     536             :                             ch,
     537           0 :                             (struct bdev_aio_task *)bdev_io->driver_ctx,
     538             :                             bdev_io->u.bdev.iovs,
     539             :                             bdev_io->u.bdev.iovcnt,
     540           0 :                             bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
     541           0 :                             bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
     542           0 :                 break;
     543           0 :         default:
     544           0 :                 SPDK_ERRLOG("Wrong io type\n");
     545           0 :                 break;
     546             :         }
     547             : }
     548             : 
     549             : static int
     550           0 : _bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
     551             : {
     552           0 :         struct file_disk *fdisk = (struct file_disk *)bdev_io->bdev->ctxt;
     553             : 
     554           0 :         switch (bdev_io->type) {
     555             :         /* Read and write operations must be performed on buffers aligned to
     556             :          * bdev->required_alignment. If user specified unaligned buffers,
     557             :          * get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */
     558           0 :         case SPDK_BDEV_IO_TYPE_READ:
     559           0 :                 spdk_bdev_io_get_buf(bdev_io, bdev_aio_get_buf_cb,
     560           0 :                                      bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
     561           0 :                 return 0;
     562           0 :         case SPDK_BDEV_IO_TYPE_WRITE:
     563           0 :                 if (fdisk->readonly) {
     564           0 :                         spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
     565             :                 } else {
     566           0 :                         spdk_bdev_io_get_buf(bdev_io, bdev_aio_get_buf_cb,
     567           0 :                                              bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
     568             :                 }
     569           0 :                 return 0;
     570             : 
     571           0 :         case SPDK_BDEV_IO_TYPE_FLUSH:
     572           0 :                 bdev_aio_flush((struct file_disk *)bdev_io->bdev->ctxt,
     573           0 :                                (struct bdev_aio_task *)bdev_io->driver_ctx);
     574           0 :                 return 0;
     575             : 
     576           0 :         case SPDK_BDEV_IO_TYPE_RESET:
     577           0 :                 bdev_aio_reset((struct file_disk *)bdev_io->bdev->ctxt,
     578           0 :                                (struct bdev_aio_task *)bdev_io->driver_ctx);
     579           0 :                 return 0;
     580           0 :         default:
     581           0 :                 return -1;
     582             :         }
     583             : }
     584             : 
     585             : static void
     586           0 : bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
     587             : {
     588           0 :         if (_bdev_aio_submit_request(ch, bdev_io) < 0) {
     589           0 :                 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
     590             :         }
     591           0 : }
     592             : 
     593             : static bool
     594           0 : bdev_aio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
     595             : {
     596           0 :         switch (io_type) {
     597           0 :         case SPDK_BDEV_IO_TYPE_READ:
     598             :         case SPDK_BDEV_IO_TYPE_WRITE:
     599             :         case SPDK_BDEV_IO_TYPE_FLUSH:
     600             :         case SPDK_BDEV_IO_TYPE_RESET:
     601           0 :                 return true;
     602             : 
     603           0 :         default:
     604           0 :                 return false;
     605             :         }
     606             : }
     607             : 
     608             : #ifdef __FreeBSD__
     609             : static int
     610             : bdev_aio_create_io(struct bdev_aio_io_channel *ch)
     611             : {
     612             :         ch->kqfd = kqueue();
     613             :         if (ch->kqfd < 0) {
     614             :                 SPDK_ERRLOG("async I/O context setup failure: %s.\n", spdk_strerror(errno));
     615             :                 return -1;
     616             :         }
     617             : 
     618             :         return 0;
     619             : }
     620             : 
     621             : static void
     622             : bdev_aio_destroy_io(struct bdev_aio_io_channel *ch)
     623             : {
     624             :         close(ch->kqfd);
     625             : }
     626             : #else
     627             : static int
     628           0 : bdev_aio_create_io(struct bdev_aio_io_channel *ch)
     629             : {
     630           0 :         if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) {
     631           0 :                 SPDK_ERRLOG("Async I/O context setup failure, likely due to exceeding kernel limit.\n");
     632           0 :                 SPDK_ERRLOG("This limit may be increased using 'sysctl -w fs.aio-max-nr'.\n");
     633           0 :                 return -1;
     634             :         }
     635             : 
     636           0 :         return 0;
     637             : }
     638             : 
     639             : static void
     640           0 : bdev_aio_destroy_io(struct bdev_aio_io_channel *ch)
     641             : {
     642           0 :         io_destroy(ch->io_ctx);
     643           0 : }
     644             : #endif
     645             : 
     646             : static int
     647           0 : bdev_aio_create_cb(void *io_device, void *ctx_buf)
     648             : {
     649           0 :         struct bdev_aio_io_channel *ch = ctx_buf;
     650             :         int rc;
     651             : 
     652           0 :         rc = bdev_aio_create_io(ch);
     653           0 :         if (rc < 0) {
     654           0 :                 return rc;
     655             :         }
     656             : 
     657           0 :         ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&aio_if));
     658           0 :         TAILQ_INSERT_TAIL(&ch->group_ch->io_ch_head, ch, link);
     659             : 
     660           0 :         return 0;
     661             : }
     662             : 
     663             : static void
     664           0 : bdev_aio_destroy_cb(void *io_device, void *ctx_buf)
     665             : {
     666           0 :         struct bdev_aio_io_channel *ch = ctx_buf;
     667             : 
     668           0 :         bdev_aio_destroy_io(ch);
     669             : 
     670           0 :         assert(ch->group_ch);
     671           0 :         TAILQ_REMOVE(&ch->group_ch->io_ch_head, ch, link);
     672             : 
     673           0 :         spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch));
     674           0 : }
     675             : 
     676             : static struct spdk_io_channel *
     677           0 : bdev_aio_get_io_channel(void *ctx)
     678             : {
     679           0 :         struct file_disk *fdisk = ctx;
     680             : 
     681           0 :         return spdk_get_io_channel(fdisk);
     682             : }
     683             : 
     684             : 
     685             : static int
     686           0 : bdev_aio_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
     687             : {
     688           0 :         struct file_disk *fdisk = ctx;
     689             : 
     690           0 :         spdk_json_write_named_object_begin(w, "aio");
     691             : 
     692           0 :         spdk_json_write_named_string(w, "filename", fdisk->filename);
     693             : 
     694           0 :         spdk_json_write_named_bool(w, "block_size_override", fdisk->block_size_override);
     695             : 
     696           0 :         spdk_json_write_named_bool(w, "readonly", fdisk->readonly);
     697             : 
     698           0 :         spdk_json_write_object_end(w);
     699             : 
     700           0 :         return 0;
     701             : }
     702             : 
     703             : static void
     704           0 : bdev_aio_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
     705             : {
     706           0 :         struct file_disk *fdisk = bdev->ctxt;
     707             : 
     708           0 :         spdk_json_write_object_begin(w);
     709             : 
     710           0 :         spdk_json_write_named_string(w, "method", "bdev_aio_create");
     711             : 
     712           0 :         spdk_json_write_named_object_begin(w, "params");
     713           0 :         spdk_json_write_named_string(w, "name", bdev->name);
     714           0 :         if (fdisk->block_size_override) {
     715           0 :                 spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
     716             :         }
     717           0 :         spdk_json_write_named_string(w, "filename", fdisk->filename);
     718           0 :         spdk_json_write_named_bool(w, "readonly", fdisk->readonly);
     719           0 :         spdk_json_write_object_end(w);
     720             : 
     721           0 :         spdk_json_write_object_end(w);
     722           0 : }
     723             : 
     724             : static const struct spdk_bdev_fn_table aio_fn_table = {
     725             :         .destruct               = bdev_aio_destruct,
     726             :         .submit_request         = bdev_aio_submit_request,
     727             :         .io_type_supported      = bdev_aio_io_type_supported,
     728             :         .get_io_channel         = bdev_aio_get_io_channel,
     729             :         .dump_info_json         = bdev_aio_dump_info_json,
     730             :         .write_config_json      = bdev_aio_write_json_config,
     731             : };
     732             : 
     733             : static void
     734           0 : aio_free_disk(struct file_disk *fdisk)
     735             : {
     736           0 :         if (fdisk == NULL) {
     737           0 :                 return;
     738             :         }
     739           0 :         free(fdisk->filename);
     740           0 :         free(fdisk->disk.name);
     741           0 :         free(fdisk);
     742             : }
     743             : 
     744             : static int
     745           0 : bdev_aio_register_interrupt(struct bdev_aio_group_channel *ch)
     746             : {
     747             :         int efd;
     748             : 
     749           0 :         efd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
     750           0 :         if (efd < 0) {
     751           0 :                 return -1;
     752             :         }
     753             : 
     754           0 :         ch->intr = SPDK_INTERRUPT_REGISTER(efd, bdev_aio_group_interrupt, ch);
     755           0 :         if (ch->intr == NULL) {
     756           0 :                 close(efd);
     757           0 :                 return -1;
     758             :         }
     759           0 :         ch->efd = efd;
     760             : 
     761           0 :         return 0;
     762             : }
     763             : 
     764             : static void
     765           0 : bdev_aio_unregister_interrupt(struct bdev_aio_group_channel *ch)
     766             : {
     767           0 :         spdk_interrupt_unregister(&ch->intr);
     768           0 :         close(ch->efd);
     769           0 :         ch->efd = -1;
     770           0 : }
     771             : 
     772             : static void
     773           0 : bdev_aio_poller_set_interrupt_mode(struct spdk_poller *poller, void *cb_arg, bool interrupt_mode)
     774             : {
     775           0 :         return;
     776             : }
     777             : 
     778             : static int
     779           0 : bdev_aio_group_create_cb(void *io_device, void *ctx_buf)
     780             : {
     781           0 :         struct bdev_aio_group_channel *ch = ctx_buf;
     782             :         int rc;
     783             : 
     784           0 :         TAILQ_INIT(&ch->io_ch_head);
     785             :         /* Initialize ch->efd to be invalid and unused. */
     786           0 :         ch->efd = -1;
     787           0 :         if (spdk_interrupt_mode_is_enabled()) {
     788           0 :                 rc = bdev_aio_register_interrupt(ch);
     789           0 :                 if (rc < 0) {
     790           0 :                         SPDK_ERRLOG("Failed to prepare intr resource to bdev_aio\n");
     791           0 :                         return rc;
     792             :                 }
     793             :         }
     794             : 
     795           0 :         ch->poller = SPDK_POLLER_REGISTER(bdev_aio_group_poll, ch, 0);
     796           0 :         spdk_poller_register_interrupt(ch->poller, bdev_aio_poller_set_interrupt_mode, NULL);
     797             : 
     798           0 :         return 0;
     799             : }
     800             : 
     801             : static void
     802           0 : bdev_aio_group_destroy_cb(void *io_device, void *ctx_buf)
     803             : {
     804           0 :         struct bdev_aio_group_channel *ch = ctx_buf;
     805             : 
     806           0 :         if (!TAILQ_EMPTY(&ch->io_ch_head)) {
     807           0 :                 SPDK_ERRLOG("Group channel of bdev aio has uncleared io channel\n");
     808             :         }
     809             : 
     810           0 :         spdk_poller_unregister(&ch->poller);
     811           0 :         if (spdk_interrupt_mode_is_enabled()) {
     812           0 :                 bdev_aio_unregister_interrupt(ch);
     813             :         }
     814           0 : }
     815             : 
     816             : int
     817           0 : create_aio_bdev(const char *name, const char *filename, uint32_t block_size, bool readonly)
     818             : {
     819             :         struct file_disk *fdisk;
     820             :         uint32_t detected_block_size;
     821             :         uint64_t disk_size;
     822             :         int rc;
     823             : 
     824           0 :         fdisk = calloc(1, sizeof(*fdisk));
     825           0 :         if (!fdisk) {
     826           0 :                 SPDK_ERRLOG("Unable to allocate enough memory for aio backend\n");
     827           0 :                 return -ENOMEM;
     828             :         }
     829           0 :         fdisk->readonly = readonly;
     830             : 
     831           0 :         fdisk->filename = strdup(filename);
     832           0 :         if (!fdisk->filename) {
     833           0 :                 rc = -ENOMEM;
     834           0 :                 goto error_return;
     835             :         }
     836             : 
     837           0 :         if (bdev_aio_open(fdisk)) {
     838           0 :                 SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", filename, fdisk->fd, errno);
     839           0 :                 rc = -errno;
     840           0 :                 goto error_return;
     841             :         }
     842             : 
     843           0 :         disk_size = spdk_fd_get_size(fdisk->fd);
     844             : 
     845           0 :         fdisk->disk.name = strdup(name);
     846           0 :         if (!fdisk->disk.name) {
     847           0 :                 rc = -ENOMEM;
     848           0 :                 goto error_return;
     849             :         }
     850           0 :         fdisk->disk.product_name = "AIO disk";
     851           0 :         fdisk->disk.module = &aio_if;
     852             : 
     853           0 :         fdisk->disk.write_cache = 1;
     854             : 
     855           0 :         detected_block_size = spdk_fd_get_blocklen(fdisk->fd);
     856           0 :         if (block_size == 0) {
     857             :                 /* User did not specify block size - use autodetected block size. */
     858           0 :                 if (detected_block_size == 0) {
     859           0 :                         SPDK_ERRLOG("Block size could not be auto-detected\n");
     860           0 :                         rc = -EINVAL;
     861           0 :                         goto error_return;
     862             :                 }
     863           0 :                 fdisk->block_size_override = false;
     864           0 :                 block_size = detected_block_size;
     865             :         } else {
     866           0 :                 if (block_size < detected_block_size) {
     867           0 :                         SPDK_ERRLOG("Specified block size %" PRIu32 " is smaller than "
     868             :                                     "auto-detected block size %" PRIu32 "\n",
     869             :                                     block_size, detected_block_size);
     870           0 :                         rc = -EINVAL;
     871           0 :                         goto error_return;
     872           0 :                 } else if (detected_block_size != 0 && block_size != detected_block_size) {
     873           0 :                         SPDK_WARNLOG("Specified block size %" PRIu32 " does not match "
     874             :                                      "auto-detected block size %" PRIu32 "\n",
     875             :                                      block_size, detected_block_size);
     876             :                 }
     877           0 :                 fdisk->block_size_override = true;
     878             :         }
     879             : 
     880           0 :         if (block_size < 512) {
     881           0 :                 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be at least 512).\n", block_size);
     882           0 :                 rc = -EINVAL;
     883           0 :                 goto error_return;
     884             :         }
     885             : 
     886           0 :         if (!spdk_u32_is_pow2(block_size)) {
     887           0 :                 SPDK_ERRLOG("Invalid block size %" PRIu32 " (must be a power of 2.)\n", block_size);
     888           0 :                 rc = -EINVAL;
     889           0 :                 goto error_return;
     890             :         }
     891             : 
     892           0 :         fdisk->disk.blocklen = block_size;
     893           0 :         if (fdisk->block_size_override && detected_block_size) {
     894           0 :                 fdisk->disk.required_alignment = spdk_u32log2(detected_block_size);
     895             :         } else {
     896           0 :                 fdisk->disk.required_alignment = spdk_u32log2(block_size);
     897             :         }
     898             : 
     899           0 :         if (disk_size % fdisk->disk.blocklen != 0) {
     900           0 :                 SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n",
     901             :                             disk_size, fdisk->disk.blocklen);
     902           0 :                 rc = -EINVAL;
     903           0 :                 goto error_return;
     904             :         }
     905             : 
     906           0 :         fdisk->disk.blockcnt = disk_size / fdisk->disk.blocklen;
     907           0 :         fdisk->disk.ctxt = fdisk;
     908             : 
     909           0 :         fdisk->disk.fn_table = &aio_fn_table;
     910             : 
     911           0 :         spdk_io_device_register(fdisk, bdev_aio_create_cb, bdev_aio_destroy_cb,
     912             :                                 sizeof(struct bdev_aio_io_channel),
     913           0 :                                 fdisk->disk.name);
     914           0 :         rc = spdk_bdev_register(&fdisk->disk);
     915           0 :         if (rc) {
     916           0 :                 spdk_io_device_unregister(fdisk, NULL);
     917           0 :                 goto error_return;
     918             :         }
     919             : 
     920           0 :         TAILQ_INSERT_TAIL(&g_aio_disk_head, fdisk, link);
     921           0 :         return 0;
     922             : 
     923           0 : error_return:
     924           0 :         bdev_aio_close(fdisk);
     925           0 :         aio_free_disk(fdisk);
     926           0 :         return rc;
     927             : }
     928             : 
     929             : static void
     930           0 : dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
     931             : {
     932           0 : }
     933             : 
     934             : int
     935           0 : bdev_aio_rescan(const char *name)
     936             : {
     937           0 :         struct spdk_bdev_desc *desc;
     938             :         struct spdk_bdev *bdev;
     939             :         struct file_disk *fdisk;
     940             :         uint64_t disk_size, blockcnt;
     941             :         int rc;
     942             : 
     943           0 :         rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &desc);
     944           0 :         if (rc != 0) {
     945           0 :                 return rc;
     946             :         }
     947             : 
     948           0 :         bdev = spdk_bdev_desc_get_bdev(desc);
     949           0 :         if (bdev->module != &aio_if) {
     950           0 :                 rc = -ENODEV;
     951           0 :                 goto exit;
     952             :         }
     953             : 
     954           0 :         fdisk = SPDK_CONTAINEROF(bdev, struct file_disk, disk);
     955           0 :         disk_size = spdk_fd_get_size(fdisk->fd);
     956           0 :         blockcnt = disk_size / bdev->blocklen;
     957             : 
     958           0 :         if (bdev->blockcnt != blockcnt) {
     959           0 :                 SPDK_NOTICELOG("AIO device is resized: bdev name %s, old block count %" PRIu64 ", new block count %"
     960             :                                PRIu64 "\n",
     961             :                                fdisk->filename,
     962             :                                bdev->blockcnt,
     963             :                                blockcnt);
     964           0 :                 rc = spdk_bdev_notify_blockcnt_change(bdev, blockcnt);
     965           0 :                 if (rc != 0) {
     966           0 :                         SPDK_ERRLOG("Could not change num blocks for aio bdev: name %s, errno: %d.\n",
     967             :                                     fdisk->filename, rc);
     968           0 :                         goto exit;
     969             :                 }
     970             :         }
     971             : 
     972           0 : exit:
     973           0 :         spdk_bdev_close(desc);
     974           0 :         return rc;
     975             : }
     976             : 
     977             : struct delete_aio_bdev_ctx {
     978             :         delete_aio_bdev_complete cb_fn;
     979             :         void *cb_arg;
     980             : };
     981             : 
     982             : static void
     983           0 : aio_bdev_unregister_cb(void *arg, int bdeverrno)
     984             : {
     985           0 :         struct delete_aio_bdev_ctx *ctx = arg;
     986             : 
     987           0 :         ctx->cb_fn(ctx->cb_arg, bdeverrno);
     988           0 :         free(ctx);
     989           0 : }
     990             : 
     991             : void
     992           0 : bdev_aio_delete(const char *name, delete_aio_bdev_complete cb_fn, void *cb_arg)
     993             : {
     994             :         struct delete_aio_bdev_ctx *ctx;
     995             :         int rc;
     996             : 
     997           0 :         ctx = calloc(1, sizeof(*ctx));
     998           0 :         if (ctx == NULL) {
     999           0 :                 cb_fn(cb_arg, -ENOMEM);
    1000           0 :                 return;
    1001             :         }
    1002             : 
    1003           0 :         ctx->cb_fn = cb_fn;
    1004           0 :         ctx->cb_arg = cb_arg;
    1005           0 :         rc = spdk_bdev_unregister_by_name(name, &aio_if, aio_bdev_unregister_cb, ctx);
    1006           0 :         if (rc != 0) {
    1007           0 :                 aio_bdev_unregister_cb(ctx, rc);
    1008             :         }
    1009             : }
    1010             : 
    1011             : static int
    1012           0 : bdev_aio_initialize(void)
    1013             : {
    1014           0 :         spdk_io_device_register(&aio_if, bdev_aio_group_create_cb, bdev_aio_group_destroy_cb,
    1015             :                                 sizeof(struct bdev_aio_group_channel), "aio_module");
    1016             : 
    1017           0 :         return 0;
    1018             : }
    1019             : 
    1020             : static void
    1021           0 : bdev_aio_fini(void)
    1022             : {
    1023           0 :         spdk_io_device_unregister(&aio_if, NULL);
    1024           0 : }
    1025             : 
    1026           0 : SPDK_LOG_REGISTER_COMPONENT(aio)

Generated by: LCOV version 1.15