LCOV - code coverage report
Current view: top level - module/bdev/raid - raid0.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 140 195 71.8 %
Date: 2024-12-15 03:46:39 Functions: 9 12 75.0 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2019 Intel Corporation.
       3             :  *   All rights reserved.
       4             :  *   Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
       5             :  */
       6             : 
       7             : #include "bdev_raid.h"
       8             : 
       9             : #include "spdk/env.h"
      10             : #include "spdk/thread.h"
      11             : #include "spdk/string.h"
      12             : #include "spdk/util.h"
      13             : 
      14             : #include "spdk/log.h"
      15             : 
      16             : /*
      17             :  * brief:
      18             :  * raid0_bdev_io_completion function is called by lower layers to notify raid
      19             :  * module that particular bdev_io is completed.
      20             :  * params:
      21             :  * bdev_io - pointer to bdev io submitted to lower layers, like child io
      22             :  * success - bdev_io status
      23             :  * cb_arg - function callback context (parent raid_bdev_io)
      24             :  * returns:
      25             :  * none
      26             :  */
      27             : static void
      28          10 : raid0_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
      29             : {
      30          10 :         struct raid_bdev_io *raid_io = cb_arg;
      31             :         int rc;
      32             : 
      33          10 :         if (success) {
      34           8 :                 if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_READ &&
      35             :                                   spdk_bdev_get_dif_type(bdev_io->bdev) != SPDK_DIF_DISABLE &&
      36             :                                   bdev_io->bdev->dif_check_flags & SPDK_DIF_FLAGS_REFTAG_CHECK)) {
      37             : 
      38           4 :                         rc = raid_bdev_verify_dix_reftag(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
      39           2 :                                                          bdev_io->u.bdev.md_buf, bdev_io->u.bdev.num_blocks, bdev_io->bdev,
      40           2 :                                                          bdev_io->u.bdev.offset_blocks);
      41           2 :                         if (rc != 0) {
      42           0 :                                 SPDK_ERRLOG("Reftag verify failed.\n");
      43           0 :                                 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
      44           0 :                                 return;
      45             :                         }
      46           2 :                 }
      47             : 
      48           8 :                 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS);
      49           8 :         } else {
      50           2 :                 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
      51             :         }
      52             : 
      53          10 :         spdk_bdev_free_io(bdev_io);
      54          10 : }
      55             : 
      56             : static void raid0_submit_rw_request(struct raid_bdev_io *raid_io);
      57             : 
      58             : static void
      59           0 : _raid0_submit_rw_request(void *_raid_io)
      60             : {
      61           0 :         struct raid_bdev_io *raid_io = _raid_io;
      62             : 
      63           0 :         raid0_submit_rw_request(raid_io);
      64           0 : }
      65             : 
      66             : /*
      67             :  * brief:
      68             :  * raid0_submit_rw_request function is used to submit I/O to the correct
      69             :  * member disk for raid0 bdevs.
      70             :  * params:
      71             :  * raid_io
      72             :  * returns:
      73             :  * none
      74             :  */
      75             : static void
      76          10 : raid0_submit_rw_request(struct raid_bdev_io *raid_io)
      77             : {
      78          10 :         struct spdk_bdev_ext_io_opts    io_opts = {};
      79          10 :         struct raid_bdev_io_channel     *raid_ch = raid_io->raid_ch;
      80          10 :         struct raid_bdev                *raid_bdev = raid_io->raid_bdev;
      81             :         uint64_t                        pd_strip;
      82             :         uint32_t                        offset_in_strip;
      83             :         uint64_t                        pd_lba;
      84             :         uint64_t                        pd_blocks;
      85             :         uint8_t                         pd_idx;
      86          10 :         int                             ret = 0;
      87             :         uint64_t                        start_strip;
      88             :         uint64_t                        end_strip;
      89             :         struct raid_base_bdev_info      *base_info;
      90             :         struct spdk_io_channel          *base_ch;
      91             : 
      92          10 :         start_strip = raid_io->offset_blocks >> raid_bdev->strip_size_shift;
      93          20 :         end_strip = (raid_io->offset_blocks + raid_io->num_blocks - 1) >>
      94          10 :                     raid_bdev->strip_size_shift;
      95          10 :         if (start_strip != end_strip && raid_bdev->num_base_bdevs > 1) {
      96           0 :                 assert(false);
      97             :                 SPDK_ERRLOG("I/O spans strip boundary!\n");
      98             :                 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
      99             :                 return;
     100             :         }
     101             : 
     102          10 :         pd_strip = start_strip / raid_bdev->num_base_bdevs;
     103          10 :         pd_idx = start_strip % raid_bdev->num_base_bdevs;
     104          10 :         offset_in_strip = raid_io->offset_blocks & (raid_bdev->strip_size - 1);
     105          10 :         pd_lba = (pd_strip << raid_bdev->strip_size_shift) + offset_in_strip;
     106          10 :         pd_blocks = raid_io->num_blocks;
     107          10 :         base_info = &raid_bdev->base_bdev_info[pd_idx];
     108          10 :         if (base_info->desc == NULL) {
     109           0 :                 SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx);
     110           0 :                 assert(0);
     111             :         }
     112             : 
     113             :         /*
     114             :          * Submit child io to bdev layer with using base bdev descriptors, base
     115             :          * bdev lba, base bdev child io length in blocks, buffer, completion
     116             :          * function and function callback context
     117             :          */
     118          10 :         assert(raid_ch != NULL);
     119          10 :         base_ch = raid_bdev_channel_get_base_channel(raid_ch, pd_idx);
     120             : 
     121          10 :         io_opts.size = sizeof(io_opts);
     122          10 :         io_opts.memory_domain = raid_io->memory_domain;
     123          10 :         io_opts.memory_domain_ctx = raid_io->memory_domain_ctx;
     124          10 :         io_opts.metadata = raid_io->md_buf;
     125             : 
     126          10 :         if (raid_io->type == SPDK_BDEV_IO_TYPE_READ) {
     127           8 :                 ret = raid_bdev_readv_blocks_ext(base_info, base_ch,
     128           4 :                                                  raid_io->iovs, raid_io->iovcnt,
     129           4 :                                                  pd_lba, pd_blocks, raid0_bdev_io_completion,
     130           4 :                                                  raid_io, &io_opts);
     131          10 :         } else if (raid_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
     132           6 :                 struct spdk_bdev *bdev = &base_info->raid_bdev->bdev;
     133             : 
     134           6 :                 if (spdk_unlikely(spdk_bdev_get_dif_type(bdev) != SPDK_DIF_DISABLE &&
     135             :                                   bdev->dif_check_flags & SPDK_DIF_FLAGS_REFTAG_CHECK)) {
     136           6 :                         ret = raid_bdev_verify_dix_reftag(raid_io->iovs, raid_io->iovcnt, io_opts.metadata,
     137           3 :                                                           pd_blocks, bdev, raid_io->offset_blocks);
     138           3 :                         if (ret != 0) {
     139           0 :                                 SPDK_ERRLOG("bdev io submit error due to DIX verify failure\n");
     140           0 :                                 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
     141           0 :                                 return;
     142             :                         }
     143           3 :                 }
     144             : 
     145          12 :                 ret = raid_bdev_writev_blocks_ext(base_info, base_ch,
     146           6 :                                                   raid_io->iovs, raid_io->iovcnt,
     147           6 :                                                   pd_lba, pd_blocks, raid0_bdev_io_completion,
     148           6 :                                                   raid_io, &io_opts);
     149           6 :         } else {
     150           0 :                 SPDK_ERRLOG("Recvd not supported io type %u\n", raid_io->type);
     151           0 :                 assert(0);
     152             :         }
     153             : 
     154          10 :         if (ret == -ENOMEM) {
     155           0 :                 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
     156           0 :                                         base_ch, _raid0_submit_rw_request);
     157          10 :         } else if (ret != 0) {
     158           0 :                 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
     159           0 :                 assert(false);
     160             :                 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
     161             :         }
     162          10 : }
     163             : 
     164             : /* raid0 IO range */
     165             : struct raid_bdev_io_range {
     166             :         uint64_t        strip_size;
     167             :         uint64_t        start_strip_in_disk;
     168             :         uint64_t        end_strip_in_disk;
     169             :         uint64_t        start_offset_in_strip;
     170             :         uint64_t        end_offset_in_strip;
     171             :         uint8_t         start_disk;
     172             :         uint8_t         end_disk;
     173             :         uint8_t         n_disks_involved;
     174             : };
     175             : 
     176             : static inline void
     177        1926 : _raid0_get_io_range(struct raid_bdev_io_range *io_range,
     178             :                     uint8_t num_base_bdevs, uint64_t strip_size, uint64_t strip_size_shift,
     179             :                     uint64_t offset_blocks, uint64_t num_blocks)
     180             : {
     181             :         uint64_t        start_strip;
     182             :         uint64_t        end_strip;
     183             :         uint64_t        total_blocks;
     184             : 
     185        1926 :         io_range->strip_size = strip_size;
     186        1926 :         total_blocks = offset_blocks + num_blocks - (num_blocks > 0);
     187             : 
     188             :         /* The start and end strip index in raid0 bdev scope */
     189        1926 :         start_strip = offset_blocks >> strip_size_shift;
     190        1926 :         end_strip = total_blocks >> strip_size_shift;
     191        1926 :         io_range->start_strip_in_disk = start_strip / num_base_bdevs;
     192        1926 :         io_range->end_strip_in_disk = end_strip / num_base_bdevs;
     193             : 
     194             :         /* The first strip may have unaligned start LBA offset.
     195             :          * The end strip may have unaligned end LBA offset.
     196             :          * Strips between them certainly have aligned offset and length to boundaries.
     197             :          */
     198        1926 :         io_range->start_offset_in_strip = offset_blocks % strip_size;
     199        1926 :         io_range->end_offset_in_strip = total_blocks % strip_size;
     200             : 
     201             :         /* The base bdev indexes in which start and end strips are located */
     202        1926 :         io_range->start_disk = start_strip % num_base_bdevs;
     203        1926 :         io_range->end_disk = end_strip % num_base_bdevs;
     204             : 
     205             :         /* Calculate how many base_bdevs are involved in io operation.
     206             :          * Number of base bdevs involved is between 1 and num_base_bdevs.
     207             :          * It will be 1 if the first strip and last strip are the same one.
     208             :          */
     209        1926 :         io_range->n_disks_involved = spdk_min((end_strip - start_strip + 1), num_base_bdevs);
     210        1926 : }
     211             : 
     212             : static inline void
     213       35406 : _raid0_split_io_range(struct raid_bdev_io_range *io_range, uint8_t disk_idx,
     214             :                       uint64_t *_offset_in_disk, uint64_t *_nblocks_in_disk)
     215             : {
     216             :         uint64_t n_strips_in_disk;
     217             :         uint64_t start_offset_in_disk;
     218             :         uint64_t end_offset_in_disk;
     219             :         uint64_t offset_in_disk;
     220             :         uint64_t nblocks_in_disk;
     221             :         uint64_t start_strip_in_disk;
     222             :         uint64_t end_strip_in_disk;
     223             : 
     224       35406 :         start_strip_in_disk = io_range->start_strip_in_disk;
     225       35406 :         if (disk_idx < io_range->start_disk) {
     226       14760 :                 start_strip_in_disk += 1;
     227       14760 :         }
     228             : 
     229       35406 :         end_strip_in_disk = io_range->end_strip_in_disk;
     230       35406 :         if (disk_idx > io_range->end_disk) {
     231        6966 :                 end_strip_in_disk -= 1;
     232        6966 :         }
     233             : 
     234       35406 :         assert(end_strip_in_disk >= start_strip_in_disk);
     235       35406 :         n_strips_in_disk = end_strip_in_disk - start_strip_in_disk + 1;
     236             : 
     237       35406 :         if (disk_idx == io_range->start_disk) {
     238        1926 :                 start_offset_in_disk = io_range->start_offset_in_strip;
     239        1926 :         } else {
     240       33480 :                 start_offset_in_disk = 0;
     241             :         }
     242             : 
     243       35406 :         if (disk_idx == io_range->end_disk) {
     244        1926 :                 end_offset_in_disk = io_range->end_offset_in_strip;
     245        1926 :         } else {
     246       33480 :                 end_offset_in_disk = io_range->strip_size - 1;
     247             :         }
     248             : 
     249       35406 :         offset_in_disk = start_offset_in_disk + start_strip_in_disk * io_range->strip_size;
     250       70812 :         nblocks_in_disk = (n_strips_in_disk - 1) * io_range->strip_size
     251       35406 :                           + end_offset_in_disk - start_offset_in_disk + 1;
     252             : 
     253       35406 :         SPDK_DEBUGLOG(bdev_raid0,
     254             :                       "raid_bdev (strip_size 0x%" PRIx64 ") splits IO to base_bdev (%u) at (0x%" PRIx64 ", 0x%" PRIx64
     255             :                       ").\n",
     256             :                       io_range->strip_size, disk_idx, offset_in_disk, nblocks_in_disk);
     257             : 
     258       35406 :         *_offset_in_disk = offset_in_disk;
     259       35406 :         *_nblocks_in_disk = nblocks_in_disk;
     260       35406 : }
     261             : 
     262             : static void raid0_submit_null_payload_request(struct raid_bdev_io *raid_io);
     263             : 
     264             : static void
     265           0 : _raid0_submit_null_payload_request(void *_raid_io)
     266             : {
     267           0 :         struct raid_bdev_io *raid_io = _raid_io;
     268             : 
     269           0 :         raid0_submit_null_payload_request(raid_io);
     270           0 : }
     271             : 
     272             : static void
     273       35406 : raid0_base_io_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
     274             : {
     275       35406 :         struct raid_bdev_io *raid_io = cb_arg;
     276             : 
     277       35406 :         raid_bdev_io_complete_part(raid_io, 1, success ?
     278             :                                    SPDK_BDEV_IO_STATUS_SUCCESS :
     279             :                                    SPDK_BDEV_IO_STATUS_FAILED);
     280             : 
     281       35406 :         spdk_bdev_free_io(bdev_io);
     282       35406 : }
     283             : 
     284             : /*
     285             :  * brief:
     286             :  * raid0_submit_null_payload_request function submits the next batch of
     287             :  * io requests with range but without payload, like FLUSH and UNMAP, to member disks;
     288             :  * it will submit as many as possible unless one base io request fails with -ENOMEM,
     289             :  * in which case it will queue itself for later submission.
     290             :  * params:
     291             :  * bdev_io - pointer to parent bdev_io on raid bdev device
     292             :  * returns:
     293             :  * none
     294             :  */
     295             : static void
     296        1926 : raid0_submit_null_payload_request(struct raid_bdev_io *raid_io)
     297             : {
     298             :         struct raid_bdev                *raid_bdev;
     299             :         struct raid_bdev_io_range       io_range;
     300             :         int                             ret;
     301             :         struct raid_base_bdev_info      *base_info;
     302             :         struct spdk_io_channel          *base_ch;
     303             : 
     304        1926 :         raid_bdev = raid_io->raid_bdev;
     305             : 
     306        3852 :         _raid0_get_io_range(&io_range, raid_bdev->num_base_bdevs,
     307        1926 :                             raid_bdev->strip_size, raid_bdev->strip_size_shift,
     308        1926 :                             raid_io->offset_blocks, raid_io->num_blocks);
     309             : 
     310        1926 :         if (raid_io->base_bdev_io_remaining == 0) {
     311        1926 :                 raid_io->base_bdev_io_remaining = io_range.n_disks_involved;
     312        1926 :         }
     313             : 
     314       37332 :         while (raid_io->base_bdev_io_submitted < io_range.n_disks_involved) {
     315             :                 uint8_t disk_idx;
     316             :                 uint64_t offset_in_disk;
     317             :                 uint64_t nblocks_in_disk;
     318             : 
     319             :                 /* base_bdev is started from start_disk to end_disk.
     320             :                  * It is possible that index of start_disk is larger than end_disk's.
     321             :                  */
     322       35406 :                 disk_idx = (io_range.start_disk + raid_io->base_bdev_io_submitted) % raid_bdev->num_base_bdevs;
     323       35406 :                 base_info = &raid_bdev->base_bdev_info[disk_idx];
     324       35406 :                 base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch, disk_idx);
     325             : 
     326       35406 :                 _raid0_split_io_range(&io_range, disk_idx, &offset_in_disk, &nblocks_in_disk);
     327             : 
     328       35406 :                 switch (raid_io->type) {
     329             :                 case SPDK_BDEV_IO_TYPE_UNMAP:
     330       70812 :                         ret = raid_bdev_unmap_blocks(base_info, base_ch,
     331       35406 :                                                      offset_in_disk, nblocks_in_disk,
     332       35406 :                                                      raid0_base_io_complete, raid_io);
     333       35406 :                         break;
     334             : 
     335             :                 case SPDK_BDEV_IO_TYPE_FLUSH:
     336           0 :                         ret = raid_bdev_flush_blocks(base_info, base_ch,
     337           0 :                                                      offset_in_disk, nblocks_in_disk,
     338           0 :                                                      raid0_base_io_complete, raid_io);
     339           0 :                         break;
     340             : 
     341             :                 default:
     342           0 :                         SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", raid_io->type);
     343           0 :                         assert(false);
     344             :                         ret = -EIO;
     345             :                 }
     346             : 
     347       35406 :                 if (ret == 0) {
     348       35406 :                         raid_io->base_bdev_io_submitted++;
     349       35406 :                 } else if (ret == -ENOMEM) {
     350           0 :                         raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
     351           0 :                                                 base_ch, _raid0_submit_null_payload_request);
     352           0 :                         return;
     353             :                 } else {
     354           0 :                         SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
     355           0 :                         assert(false);
     356             :                         raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
     357             :                         return;
     358             :                 }
     359             :         }
     360        1926 : }
     361             : 
     362             : static int
     363           8 : raid0_start(struct raid_bdev *raid_bdev)
     364             : {
     365           8 :         uint64_t min_blockcnt = UINT64_MAX;
     366             :         uint64_t base_bdev_data_size;
     367             :         struct raid_base_bdev_info *base_info;
     368             : 
     369         264 :         RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
     370             :                 /* Calculate minimum block count from all base bdevs */
     371         256 :                 min_blockcnt = spdk_min(min_blockcnt, base_info->data_size);
     372         256 :         }
     373             : 
     374           8 :         base_bdev_data_size = (min_blockcnt >> raid_bdev->strip_size_shift) << raid_bdev->strip_size_shift;
     375             : 
     376         264 :         RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
     377         256 :                 base_info->data_size = base_bdev_data_size;
     378         256 :         }
     379             : 
     380             :         /*
     381             :          * Take the minimum block count based approach where total block count
     382             :          * of raid bdev is the number of base bdev times the minimum block count
     383             :          * of any base bdev.
     384             :          */
     385           8 :         SPDK_DEBUGLOG(bdev_raid0, "min blockcount %" PRIu64 ",  numbasedev %u, strip size shift %u\n",
     386             :                       min_blockcnt, raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift);
     387             : 
     388           8 :         raid_bdev->bdev.blockcnt = base_bdev_data_size * raid_bdev->num_base_bdevs;
     389             : 
     390           8 :         if (raid_bdev->num_base_bdevs > 1) {
     391           8 :                 raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size;
     392           8 :                 raid_bdev->bdev.split_on_optimal_io_boundary = true;
     393           8 :         } else {
     394             :                 /* Do not need to split reads/writes on single bdev RAID modules. */
     395           0 :                 raid_bdev->bdev.optimal_io_boundary = 0;
     396           0 :                 raid_bdev->bdev.split_on_optimal_io_boundary = false;
     397             :         }
     398             : 
     399           8 :         return 0;
     400             : }
     401             : 
     402             : static bool
     403           0 : raid0_resize(struct raid_bdev *raid_bdev)
     404             : {
     405             :         uint64_t blockcnt;
     406             :         int rc;
     407           0 :         uint64_t min_blockcnt = UINT64_MAX;
     408             :         struct raid_base_bdev_info *base_info;
     409             :         uint64_t base_bdev_data_size;
     410             : 
     411           0 :         RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
     412           0 :                 struct spdk_bdev *base_bdev = spdk_bdev_desc_get_bdev(base_info->desc);
     413             : 
     414           0 :                 min_blockcnt = spdk_min(min_blockcnt, base_bdev->blockcnt - base_info->data_offset);
     415           0 :         }
     416             : 
     417           0 :         base_bdev_data_size = (min_blockcnt >> raid_bdev->strip_size_shift) << raid_bdev->strip_size_shift;
     418           0 :         blockcnt = base_bdev_data_size * raid_bdev->num_base_bdevs;
     419             : 
     420           0 :         if (blockcnt == raid_bdev->bdev.blockcnt) {
     421           0 :                 return false;
     422             :         }
     423             : 
     424           0 :         rc = spdk_bdev_notify_blockcnt_change(&raid_bdev->bdev, blockcnt);
     425           0 :         if (rc != 0) {
     426           0 :                 SPDK_ERRLOG("Failed to notify blockcount change\n");
     427           0 :                 return false;
     428             :         }
     429             : 
     430           0 :         RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
     431           0 :                 base_info->data_size = base_bdev_data_size;
     432           0 :         }
     433             : 
     434           0 :         return true;
     435           0 : }
     436             : 
     437             : static struct raid_bdev_module g_raid0_module = {
     438             :         .level = RAID0,
     439             :         .base_bdevs_min = 1,
     440             :         .memory_domains_supported = true,
     441             :         .dif_supported = true,
     442             :         .start = raid0_start,
     443             :         .submit_rw_request = raid0_submit_rw_request,
     444             :         .submit_null_payload_request = raid0_submit_null_payload_request,
     445             :         .resize = raid0_resize,
     446             : };
     447           1 : RAID_MODULE_REGISTER(&g_raid0_module)
     448             : 
     449           1 : SPDK_LOG_REGISTER_COMPONENT(bdev_raid0)

Generated by: LCOV version 1.15