LCOV - code coverage report
Current view: top level - module/bdev/raid - raid5f.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 567 708 80.1 %
Date: 2024-12-14 20:55:11 Functions: 39 46 84.8 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2022 Intel Corporation.
       3             :  *   All rights reserved.
       4             :  */
       5             : 
       6             : #include "bdev_raid.h"
       7             : 
       8             : #include "spdk/env.h"
       9             : #include "spdk/thread.h"
      10             : #include "spdk/string.h"
      11             : #include "spdk/util.h"
      12             : #include "spdk/likely.h"
      13             : #include "spdk/log.h"
      14             : #include "spdk/accel.h"
      15             : 
      16             : /* Maximum concurrent full stripe writes per io channel */
      17             : #define RAID5F_MAX_STRIPES 32
      18             : 
      19             : struct chunk {
      20             :         /* Corresponds to base_bdev index */
      21             :         uint8_t index;
      22             : 
      23             :         /* Array of iovecs */
      24             :         struct iovec *iovs;
      25             : 
      26             :         /* Number of used iovecs */
      27             :         int iovcnt;
      28             : 
      29             :         /* Total number of available iovecs in the array */
      30             :         int iovcnt_max;
      31             : 
      32             :         /* Pointer to buffer with I/O metadata */
      33             :         void *md_buf;
      34             : };
      35             : 
      36             : struct stripe_request;
      37             : typedef void (*stripe_req_xor_cb)(struct stripe_request *stripe_req, int status);
      38             : 
      39             : struct stripe_request {
      40             :         enum stripe_request_type {
      41             :                 STRIPE_REQ_WRITE,
      42             :                 STRIPE_REQ_RECONSTRUCT,
      43             :         } type;
      44             : 
      45             :         struct raid5f_io_channel *r5ch;
      46             : 
      47             :         /* The associated raid_bdev_io */
      48             :         struct raid_bdev_io *raid_io;
      49             : 
      50             :         /* The stripe's index in the raid array. */
      51             :         uint64_t stripe_index;
      52             : 
      53             :         /* The stripe's parity chunk */
      54             :         struct chunk *parity_chunk;
      55             : 
      56             :         union {
      57             :                 struct {
      58             :                         /* Buffer for stripe parity */
      59             :                         void *parity_buf;
      60             : 
      61             :                         /* Buffer for stripe io metadata parity */
      62             :                         void *parity_md_buf;
      63             :                 } write;
      64             : 
      65             :                 struct {
      66             :                         /* Array of buffers for reading chunk data */
      67             :                         void **chunk_buffers;
      68             : 
      69             :                         /* Array of buffers for reading chunk metadata */
      70             :                         void **chunk_md_buffers;
      71             : 
      72             :                         /* Chunk to reconstruct from parity */
      73             :                         struct chunk *chunk;
      74             : 
      75             :                         /* Offset from chunk start */
      76             :                         uint64_t chunk_offset;
      77             :                 } reconstruct;
      78             :         };
      79             : 
      80             :         /* Array of iovec iterators for each chunk */
      81             :         struct spdk_ioviter *chunk_iov_iters;
      82             : 
      83             :         /* Array of source buffer pointers for parity calculation */
      84             :         void **chunk_xor_buffers;
      85             : 
      86             :         /* Array of source buffer pointers for parity calculation of io metadata */
      87             :         void **chunk_xor_md_buffers;
      88             : 
      89             :         struct {
      90             :                 size_t len;
      91             :                 size_t remaining;
      92             :                 size_t remaining_md;
      93             :                 int status;
      94             :                 stripe_req_xor_cb cb;
      95             :         } xor;
      96             : 
      97             :         TAILQ_ENTRY(stripe_request) link;
      98             : 
      99             :         /* Array of chunks corresponding to base_bdevs */
     100             :         struct chunk chunks[0];
     101             : };
     102             : 
     103             : struct raid5f_info {
     104             :         /* The parent raid bdev */
     105             :         struct raid_bdev *raid_bdev;
     106             : 
     107             :         /* Number of data blocks in a stripe (without parity) */
     108             :         uint64_t stripe_blocks;
     109             : 
     110             :         /* Number of stripes on this array */
     111             :         uint64_t total_stripes;
     112             : 
     113             :         /* Alignment for buffer allocation */
     114             :         size_t buf_alignment;
     115             : 
     116             :         /* block length bit shift for optimized calculation, only valid when no interleaved md */
     117             :         uint32_t blocklen_shift;
     118             : };
     119             : 
     120             : struct raid5f_io_channel {
     121             :         /* All available stripe requests on this channel */
     122             :         struct {
     123             :                 TAILQ_HEAD(, stripe_request) write;
     124             :                 TAILQ_HEAD(, stripe_request) reconstruct;
     125             :         } free_stripe_requests;
     126             : 
     127             :         /* accel_fw channel */
     128             :         struct spdk_io_channel *accel_ch;
     129             : 
     130             :         /* For retrying xor if accel_ch runs out of resources */
     131             :         TAILQ_HEAD(, stripe_request) xor_retry_queue;
     132             : 
     133             :         /* For iterating over chunk iovecs during xor calculation */
     134             :         void **chunk_xor_buffers;
     135             :         struct iovec **chunk_xor_iovs;
     136             :         size_t *chunk_xor_iovcnt;
     137             : };
     138             : 
     139             : #define __CHUNK_IN_RANGE(req, c) \
     140             :         c < req->chunks + raid5f_ch_to_r5f_info(req->r5ch)->raid_bdev->num_base_bdevs
     141             : 
     142             : #define FOR_EACH_CHUNK_FROM(req, c, from) \
     143             :         for (c = from; __CHUNK_IN_RANGE(req, c); c++)
     144             : 
     145             : #define FOR_EACH_CHUNK(req, c) \
     146             :         FOR_EACH_CHUNK_FROM(req, c, req->chunks)
     147             : 
     148             : #define __NEXT_DATA_CHUNK(req, c) \
     149             :         c == req->parity_chunk ? c+1 : c
     150             : 
     151             : #define FOR_EACH_DATA_CHUNK(req, c) \
     152             :         for (c = __NEXT_DATA_CHUNK(req, req->chunks); __CHUNK_IN_RANGE(req, c); \
     153             :              c = __NEXT_DATA_CHUNK(req, c+1))
     154             : 
     155             : static inline struct raid5f_info *
     156      653960 : raid5f_ch_to_r5f_info(struct raid5f_io_channel *r5ch)
     157             : {
     158      653960 :         return spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(r5ch));
     159             : }
     160             : 
     161             : static inline struct stripe_request *
     162      105666 : raid5f_chunk_stripe_req(struct chunk *chunk)
     163             : {
     164      105666 :         return SPDK_CONTAINEROF((chunk - chunk->index), struct stripe_request, chunks);
     165             : }
     166             : 
     167             : static inline uint8_t
     168      338831 : raid5f_stripe_data_chunks_num(const struct raid_bdev *raid_bdev)
     169             : {
     170      338831 :         return raid_bdev->min_base_bdevs_operational;
     171             : }
     172             : 
     173             : static inline uint8_t
     174       17244 : raid5f_stripe_parity_chunk_index(const struct raid_bdev *raid_bdev, uint64_t stripe_index)
     175             : {
     176       17244 :         return raid5f_stripe_data_chunks_num(raid_bdev) - stripe_index % raid_bdev->num_base_bdevs;
     177             : }
     178             : 
     179             : static inline void
     180        8604 : raid5f_stripe_request_release(struct stripe_request *stripe_req)
     181             : {
     182        8604 :         if (spdk_likely(stripe_req->type == STRIPE_REQ_WRITE)) {
     183        7605 :                 TAILQ_INSERT_HEAD(&stripe_req->r5ch->free_stripe_requests.write, stripe_req, link);
     184        8604 :         } else if (stripe_req->type == STRIPE_REQ_RECONSTRUCT) {
     185         999 :                 TAILQ_INSERT_HEAD(&stripe_req->r5ch->free_stripe_requests.reconstruct, stripe_req, link);
     186         999 :         } else {
     187           0 :                 assert(false);
     188             :         }
     189        8604 : }
     190             : 
     191             : static void raid5f_xor_stripe_retry(struct stripe_request *stripe_req);
     192             : 
     193             : static void
     194        8517 : raid5f_xor_stripe_done(struct stripe_request *stripe_req)
     195             : {
     196        8517 :         struct raid5f_io_channel *r5ch = stripe_req->r5ch;
     197             : 
     198        8517 :         if (stripe_req->xor.status != 0) {
     199           0 :                 SPDK_ERRLOG("stripe xor failed: %s\n", spdk_strerror(-stripe_req->xor.status));
     200           0 :         }
     201             : 
     202        8517 :         stripe_req->xor.cb(stripe_req, stripe_req->xor.status);
     203             : 
     204        8517 :         if (!TAILQ_EMPTY(&r5ch->xor_retry_queue)) {
     205           0 :                 stripe_req = TAILQ_FIRST(&r5ch->xor_retry_queue);
     206           0 :                 TAILQ_REMOVE(&r5ch->xor_retry_queue, stripe_req, link);
     207           0 :                 raid5f_xor_stripe_retry(stripe_req);
     208           0 :         }
     209        8517 : }
     210             : 
     211             : static void raid5f_xor_stripe_continue(struct stripe_request *stripe_req);
     212             : 
     213             : static void
     214       62458 : _raid5f_xor_stripe_cb(struct stripe_request *stripe_req, int status)
     215             : {
     216       62458 :         if (status != 0) {
     217           0 :                 stripe_req->xor.status = status;
     218           0 :         }
     219             : 
     220       62458 :         if (stripe_req->xor.remaining + stripe_req->xor.remaining_md == 0) {
     221        8517 :                 raid5f_xor_stripe_done(stripe_req);
     222        8517 :         }
     223       62458 : }
     224             : 
     225             : static void
     226       59619 : raid5f_xor_stripe_cb(void *_stripe_req, int status)
     227             : {
     228       59619 :         struct stripe_request *stripe_req = _stripe_req;
     229             : 
     230       59619 :         stripe_req->xor.remaining -= stripe_req->xor.len;
     231             : 
     232       59619 :         if (stripe_req->xor.remaining > 0) {
     233      102204 :                 stripe_req->xor.len = spdk_ioviter_nextv(stripe_req->chunk_iov_iters,
     234       51102 :                                       stripe_req->r5ch->chunk_xor_buffers);
     235       51102 :                 raid5f_xor_stripe_continue(stripe_req);
     236       51102 :         }
     237             : 
     238       59619 :         _raid5f_xor_stripe_cb(stripe_req, status);
     239       59619 : }
     240             : 
     241             : static void
     242        2839 : raid5f_xor_stripe_md_cb(void *_stripe_req, int status)
     243             : {
     244        2839 :         struct stripe_request *stripe_req = _stripe_req;
     245             : 
     246        2839 :         stripe_req->xor.remaining_md = 0;
     247             : 
     248        2839 :         _raid5f_xor_stripe_cb(stripe_req, status);
     249        2839 : }
     250             : 
     251             : static void
     252       59619 : raid5f_xor_stripe_continue(struct stripe_request *stripe_req)
     253             : {
     254       59619 :         struct raid5f_io_channel *r5ch = stripe_req->r5ch;
     255       59619 :         struct raid_bdev_io *raid_io = stripe_req->raid_io;
     256       59619 :         struct raid_bdev *raid_bdev = raid_io->raid_bdev;
     257       59619 :         uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev);
     258       59619 :         uint8_t i;
     259       59619 :         int ret;
     260             : 
     261       59619 :         assert(stripe_req->xor.len > 0);
     262             : 
     263      256494 :         for (i = 0; i < n_src; i++) {
     264      196875 :                 stripe_req->chunk_xor_buffers[i] = r5ch->chunk_xor_buffers[i];
     265      196875 :         }
     266             : 
     267      119238 :         ret = spdk_accel_submit_xor(r5ch->accel_ch, r5ch->chunk_xor_buffers[n_src],
     268       59619 :                                     stripe_req->chunk_xor_buffers, n_src, stripe_req->xor.len,
     269       59619 :                                     raid5f_xor_stripe_cb, stripe_req);
     270       59619 :         if (spdk_unlikely(ret)) {
     271           0 :                 if (ret == -ENOMEM) {
     272           0 :                         TAILQ_INSERT_HEAD(&r5ch->xor_retry_queue, stripe_req, link);
     273           0 :                 } else {
     274           0 :                         stripe_req->xor.status = ret;
     275           0 :                         raid5f_xor_stripe_done(stripe_req);
     276             :                 }
     277           0 :         }
     278       59619 : }
     279             : 
     280             : static void
     281        8517 : raid5f_xor_stripe(struct stripe_request *stripe_req, stripe_req_xor_cb cb)
     282             : {
     283        8517 :         struct raid5f_io_channel *r5ch = stripe_req->r5ch;
     284        8517 :         struct raid_bdev_io *raid_io = stripe_req->raid_io;
     285        8517 :         struct raid_bdev *raid_bdev = raid_io->raid_bdev;
     286        8517 :         struct chunk *chunk;
     287        8517 :         struct chunk *dest_chunk = NULL;
     288        8517 :         uint64_t num_blocks = 0;
     289        8517 :         uint8_t c;
     290             : 
     291        8517 :         assert(cb != NULL);
     292             : 
     293        8517 :         if (spdk_likely(stripe_req->type == STRIPE_REQ_WRITE)) {
     294        7518 :                 num_blocks = raid_bdev->strip_size;
     295        7518 :                 dest_chunk = stripe_req->parity_chunk;
     296        8517 :         } else if (stripe_req->type == STRIPE_REQ_RECONSTRUCT) {
     297         999 :                 num_blocks = raid_io->num_blocks;
     298         999 :                 dest_chunk = stripe_req->reconstruct.chunk;
     299         999 :         } else {
     300           0 :                 assert(false);
     301             :         }
     302             : 
     303        8517 :         c = 0;
     304       45159 :         FOR_EACH_CHUNK(stripe_req, chunk) {
     305       36642 :                 if (chunk == dest_chunk) {
     306        8517 :                         continue;
     307             :                 }
     308       28125 :                 r5ch->chunk_xor_iovs[c] = chunk->iovs;
     309       28125 :                 r5ch->chunk_xor_iovcnt[c] = chunk->iovcnt;
     310       28125 :                 c++;
     311       28125 :         }
     312        8517 :         r5ch->chunk_xor_iovs[c] = dest_chunk->iovs;
     313        8517 :         r5ch->chunk_xor_iovcnt[c] = dest_chunk->iovcnt;
     314             : 
     315       17034 :         stripe_req->xor.len = spdk_ioviter_firstv(stripe_req->chunk_iov_iters,
     316        8517 :                               raid_bdev->num_base_bdevs,
     317        8517 :                               r5ch->chunk_xor_iovs,
     318        8517 :                               r5ch->chunk_xor_iovcnt,
     319        8517 :                               r5ch->chunk_xor_buffers);
     320        8517 :         stripe_req->xor.remaining = num_blocks * raid_bdev->bdev.blocklen;
     321        8517 :         stripe_req->xor.status = 0;
     322        8517 :         stripe_req->xor.cb = cb;
     323             : 
     324        8517 :         if (raid_io->md_buf != NULL) {
     325        2839 :                 uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev);
     326        2839 :                 uint64_t len = num_blocks * raid_bdev->bdev.md_len;
     327        2839 :                 int ret;
     328             : 
     329        2839 :                 stripe_req->xor.remaining_md = len;
     330             : 
     331        2839 :                 c = 0;
     332       15053 :                 FOR_EACH_CHUNK(stripe_req, chunk) {
     333       12214 :                         if (chunk != dest_chunk) {
     334        9375 :                                 stripe_req->chunk_xor_md_buffers[c] = chunk->md_buf;
     335        9375 :                                 c++;
     336        9375 :                         }
     337       12214 :                 }
     338             : 
     339        5678 :                 ret = spdk_accel_submit_xor(stripe_req->r5ch->accel_ch, dest_chunk->md_buf,
     340        2839 :                                             stripe_req->chunk_xor_md_buffers, n_src, len,
     341        2839 :                                             raid5f_xor_stripe_md_cb, stripe_req);
     342        2839 :                 if (spdk_unlikely(ret)) {
     343           0 :                         if (ret == -ENOMEM) {
     344           0 :                                 TAILQ_INSERT_HEAD(&stripe_req->r5ch->xor_retry_queue, stripe_req, link);
     345           0 :                         } else {
     346           0 :                                 stripe_req->xor.status = ret;
     347           0 :                                 raid5f_xor_stripe_done(stripe_req);
     348             :                         }
     349           0 :                         return;
     350             :                 }
     351        2839 :         }
     352             : 
     353        8517 :         raid5f_xor_stripe_continue(stripe_req);
     354        8517 : }
     355             : 
     356             : static void
     357           0 : raid5f_xor_stripe_retry(struct stripe_request *stripe_req)
     358             : {
     359           0 :         if (stripe_req->xor.remaining_md) {
     360           0 :                 raid5f_xor_stripe(stripe_req, stripe_req->xor.cb);
     361           0 :         } else {
     362           0 :                 raid5f_xor_stripe_continue(stripe_req);
     363             :         }
     364           0 : }
     365             : 
     366             : static void
     367       27216 : raid5f_stripe_request_chunk_write_complete(struct stripe_request *stripe_req,
     368             :                 enum spdk_bdev_io_status status)
     369             : {
     370       27216 :         if (raid_bdev_io_complete_part(stripe_req->raid_io, 1, status)) {
     371        7239 :                 raid5f_stripe_request_release(stripe_req);
     372        7239 :         }
     373       27216 : }
     374             : 
     375             : static void
     376        3213 : raid5f_stripe_request_chunk_read_complete(struct stripe_request *stripe_req,
     377             :                 enum spdk_bdev_io_status status)
     378             : {
     379        3213 :         struct raid_bdev_io *raid_io = stripe_req->raid_io;
     380             : 
     381        3213 :         raid_bdev_io_complete_part(raid_io, 1, status);
     382        3213 : }
     383             : 
     384             : static void
     385       30429 : raid5f_chunk_complete_bdev_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
     386             : {
     387       30429 :         struct chunk *chunk = cb_arg;
     388       30429 :         struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk);
     389       30429 :         enum spdk_bdev_io_status status = success ? SPDK_BDEV_IO_STATUS_SUCCESS :
     390             :                                           SPDK_BDEV_IO_STATUS_FAILED;
     391             : 
     392       30429 :         spdk_bdev_free_io(bdev_io);
     393             : 
     394       30429 :         if (spdk_likely(stripe_req->type == STRIPE_REQ_WRITE)) {
     395       27216 :                 raid5f_stripe_request_chunk_write_complete(stripe_req, status);
     396       30429 :         } else if (stripe_req->type == STRIPE_REQ_RECONSTRUCT) {
     397        3213 :                 raid5f_stripe_request_chunk_read_complete(stripe_req, status);
     398        3213 :         } else {
     399           0 :                 assert(false);
     400             :         }
     401       30429 : }
     402             : 
     403             : static void raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req);
     404             : 
     405             : static void
     406        3831 : raid5f_chunk_submit_retry(void *_raid_io)
     407             : {
     408        3831 :         struct raid_bdev_io *raid_io = _raid_io;
     409        3831 :         struct stripe_request *stripe_req = raid_io->module_private;
     410             : 
     411        3831 :         raid5f_stripe_request_submit_chunks(stripe_req);
     412        3831 : }
     413             : 
     414             : static inline void
     415       46575 : raid5f_init_ext_io_opts(struct spdk_bdev_ext_io_opts *opts, struct raid_bdev_io *raid_io)
     416             : {
     417       46575 :         memset(opts, 0, sizeof(*opts));
     418       46575 :         opts->size = sizeof(*opts);
     419       46575 :         opts->memory_domain = raid_io->memory_domain;
     420       46575 :         opts->memory_domain_ctx = raid_io->memory_domain_ctx;
     421       46575 :         opts->metadata = raid_io->md_buf;
     422       46575 : }
     423             : 
     424             : static int
     425       38301 : raid5f_chunk_submit(struct chunk *chunk)
     426             : {
     427       38301 :         struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk);
     428       38301 :         struct raid_bdev_io *raid_io = stripe_req->raid_io;
     429       38301 :         struct raid_bdev *raid_bdev = raid_io->raid_bdev;
     430       38301 :         struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk->index];
     431       76602 :         struct spdk_io_channel *base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch,
     432       38301 :                                           chunk->index);
     433       38301 :         uint64_t base_offset_blocks = (stripe_req->stripe_index << raid_bdev->strip_size_shift);
     434       38301 :         struct spdk_bdev_ext_io_opts io_opts;
     435       38301 :         int ret;
     436             : 
     437       38301 :         raid5f_init_ext_io_opts(&io_opts, raid_io);
     438       38301 :         io_opts.metadata = chunk->md_buf;
     439             : 
     440       38301 :         raid_io->base_bdev_io_submitted++;
     441             : 
     442       38301 :         switch (stripe_req->type) {
     443             :         case STRIPE_REQ_WRITE:
     444       34089 :                 if (base_ch == NULL) {
     445         366 :                         raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS);
     446         366 :                         return 0;
     447             :                 }
     448             : 
     449       67446 :                 ret = raid_bdev_writev_blocks_ext(base_info, base_ch, chunk->iovs, chunk->iovcnt,
     450       33723 :                                                   base_offset_blocks, raid_bdev->strip_size,
     451       33723 :                                                   raid5f_chunk_complete_bdev_io, chunk, &io_opts);
     452       33723 :                 break;
     453             :         case STRIPE_REQ_RECONSTRUCT:
     454        4212 :                 if (chunk == stripe_req->reconstruct.chunk) {
     455         999 :                         raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS);
     456         999 :                         return 0;
     457             :                 }
     458             : 
     459        3213 :                 base_offset_blocks += stripe_req->reconstruct.chunk_offset;
     460             : 
     461        6426 :                 ret = raid_bdev_readv_blocks_ext(base_info, base_ch, chunk->iovs, chunk->iovcnt,
     462        3213 :                                                  base_offset_blocks, raid_io->num_blocks,
     463        3213 :                                                  raid5f_chunk_complete_bdev_io, chunk, &io_opts);
     464        3213 :                 break;
     465             :         default:
     466           0 :                 assert(false);
     467             :                 ret = -EINVAL;
     468             :                 break;
     469             :         }
     470             : 
     471       36936 :         if (spdk_unlikely(ret)) {
     472        6507 :                 raid_io->base_bdev_io_submitted--;
     473        6507 :                 if (ret == -ENOMEM) {
     474        3831 :                         raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
     475        3831 :                                                 base_ch, raid5f_chunk_submit_retry);
     476        3831 :                 } else {
     477             :                         /*
     478             :                          * Implicitly complete any I/Os not yet submitted as FAILED. If completing
     479             :                          * these means there are no more to complete for the stripe request, we can
     480             :                          * release the stripe request as well.
     481             :                          */
     482        2676 :                         uint64_t base_bdev_io_not_submitted;
     483             : 
     484        2676 :                         if (stripe_req->type == STRIPE_REQ_WRITE) {
     485        5352 :                                 base_bdev_io_not_submitted = raid_bdev->num_base_bdevs -
     486        2676 :                                                              raid_io->base_bdev_io_submitted;
     487        2676 :                         } else {
     488           0 :                                 base_bdev_io_not_submitted = raid5f_stripe_data_chunks_num(raid_bdev) -
     489           0 :                                                              raid_io->base_bdev_io_submitted;
     490             :                         }
     491             : 
     492        2676 :                         if (raid_bdev_io_complete_part(raid_io, base_bdev_io_not_submitted,
     493             :                                                        SPDK_BDEV_IO_STATUS_FAILED)) {
     494         366 :                                 raid5f_stripe_request_release(stripe_req);
     495         366 :                         }
     496        2676 :                 }
     497        6507 :         }
     498             : 
     499       36936 :         return ret;
     500       38301 : }
     501             : 
     502             : static int
     503       26466 : raid5f_chunk_set_iovcnt(struct chunk *chunk, int iovcnt)
     504             : {
     505       26466 :         if (iovcnt > chunk->iovcnt_max) {
     506          99 :                 struct iovec *iovs = chunk->iovs;
     507             : 
     508          99 :                 iovs = realloc(iovs, iovcnt * sizeof(*iovs));
     509          99 :                 if (!iovs) {
     510           0 :                         return -ENOMEM;
     511             :                 }
     512          99 :                 chunk->iovs = iovs;
     513          99 :                 chunk->iovcnt_max = iovcnt;
     514          99 :         }
     515       26466 :         chunk->iovcnt = iovcnt;
     516             : 
     517       26466 :         return 0;
     518       26466 : }
     519             : 
     520             : static int
     521        7704 : raid5f_stripe_request_map_iovecs(struct stripe_request *stripe_req)
     522             : {
     523        7704 :         struct raid_bdev_io *raid_io = stripe_req->raid_io;
     524        7704 :         struct raid_bdev *raid_bdev = raid_io->raid_bdev;
     525        7704 :         struct raid5f_info *r5f_info = raid_bdev->module_private;
     526        7704 :         struct chunk *chunk;
     527        7704 :         int raid_io_iov_idx = 0;
     528        7704 :         size_t raid_io_offset = 0;
     529        7704 :         size_t raid_io_iov_offset = 0;
     530        7704 :         int i;
     531             : 
     532       33171 :         FOR_EACH_DATA_CHUNK(stripe_req, chunk) {
     533       25467 :                 int chunk_iovcnt = 0;
     534       25467 :                 uint64_t len = raid_bdev->strip_size * raid_bdev->bdev.blocklen;
     535       25467 :                 size_t off = raid_io_iov_offset;
     536       25467 :                 int ret;
     537             : 
     538       71229 :                 for (i = raid_io_iov_idx; i < raid_io->iovcnt; i++) {
     539       71229 :                         chunk_iovcnt++;
     540       71229 :                         off += raid_io->iovs[i].iov_len;
     541       71229 :                         if (off >= raid_io_offset + len) {
     542       25467 :                                 break;
     543             :                         }
     544       45762 :                 }
     545             : 
     546       25467 :                 assert(raid_io_iov_idx + chunk_iovcnt <= raid_io->iovcnt);
     547             : 
     548       25467 :                 ret = raid5f_chunk_set_iovcnt(chunk, chunk_iovcnt);
     549       25467 :                 if (ret) {
     550           0 :                         return ret;
     551             :                 }
     552             : 
     553       25467 :                 if (raid_io->md_buf != NULL) {
     554       16780 :                         chunk->md_buf = raid_io->md_buf +
     555        8390 :                                         (raid_io_offset >> r5f_info->blocklen_shift) * raid_bdev->bdev.md_len;
     556        8390 :                 }
     557             : 
     558       96696 :                 for (i = 0; i < chunk_iovcnt; i++) {
     559       71229 :                         struct iovec *chunk_iov = &chunk->iovs[i];
     560       71229 :                         const struct iovec *raid_io_iov = &raid_io->iovs[raid_io_iov_idx];
     561       71229 :                         size_t chunk_iov_offset = raid_io_offset - raid_io_iov_offset;
     562             : 
     563       71229 :                         chunk_iov->iov_base = raid_io_iov->iov_base + chunk_iov_offset;
     564       71229 :                         chunk_iov->iov_len = spdk_min(len, raid_io_iov->iov_len - chunk_iov_offset);
     565       71229 :                         raid_io_offset += chunk_iov->iov_len;
     566       71229 :                         len -= chunk_iov->iov_len;
     567             : 
     568       71229 :                         if (raid_io_offset >= raid_io_iov_offset + raid_io_iov->iov_len) {
     569       53466 :                                 raid_io_iov_idx++;
     570       53466 :                                 raid_io_iov_offset += raid_io_iov->iov_len;
     571       53466 :                         }
     572       71229 :                 }
     573             : 
     574       25467 :                 if (spdk_unlikely(len > 0)) {
     575           0 :                         return -EINVAL;
     576             :                 }
     577       25467 :         }
     578             : 
     579        7704 :         stripe_req->parity_chunk->iovs[0].iov_base = stripe_req->write.parity_buf;
     580        7704 :         stripe_req->parity_chunk->iovs[0].iov_len = raid_bdev->strip_size * raid_bdev->bdev.blocklen;
     581        7704 :         stripe_req->parity_chunk->iovcnt = 1;
     582        7704 :         stripe_req->parity_chunk->md_buf = stripe_req->write.parity_md_buf;
     583             : 
     584        7704 :         return 0;
     585        7704 : }
     586             : 
     587             : static void
     588       12435 : raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req)
     589             : {
     590       12435 :         struct raid_bdev_io *raid_io = stripe_req->raid_io;
     591       12435 :         struct chunk *start = &stripe_req->chunks[raid_io->base_bdev_io_submitted];
     592       12435 :         struct chunk *chunk;
     593             : 
     594       44229 :         FOR_EACH_CHUNK_FROM(stripe_req, chunk, start) {
     595       38301 :                 if (spdk_unlikely(raid5f_chunk_submit(chunk) != 0)) {
     596        6507 :                         break;
     597             :                 }
     598       31794 :         }
     599       12435 : }
     600             : 
     601             : static inline void
     602        8604 : raid5f_stripe_request_init(struct stripe_request *stripe_req, struct raid_bdev_io *raid_io,
     603             :                            uint64_t stripe_index)
     604             : {
     605        8604 :         stripe_req->raid_io = raid_io;
     606        8604 :         stripe_req->stripe_index = stripe_index;
     607        8604 :         stripe_req->parity_chunk = &stripe_req->chunks[raid5f_stripe_parity_chunk_index(raid_io->raid_bdev,
     608        8604 :                                    stripe_index)];
     609        8604 : }
     610             : 
     611             : static void
     612        7605 : raid5f_stripe_write_request_xor_done(struct stripe_request *stripe_req, int status)
     613             : {
     614        7605 :         struct raid_bdev_io *raid_io = stripe_req->raid_io;
     615             : 
     616        7605 :         if (status != 0) {
     617           0 :                 raid5f_stripe_request_release(stripe_req);
     618           0 :                 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
     619           0 :         } else {
     620        7605 :                 raid5f_stripe_request_submit_chunks(stripe_req);
     621             :         }
     622        7605 : }
     623             : 
     624             : static int
     625        7605 : raid5f_submit_write_request(struct raid_bdev_io *raid_io, uint64_t stripe_index)
     626             : {
     627        7605 :         struct raid_bdev *raid_bdev = raid_io->raid_bdev;
     628        7605 :         struct raid5f_io_channel *r5ch = raid_bdev_channel_get_module_ctx(raid_io->raid_ch);
     629        7605 :         struct stripe_request *stripe_req;
     630        7605 :         int ret;
     631             : 
     632        7605 :         stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests.write);
     633        7605 :         if (!stripe_req) {
     634           0 :                 return -ENOMEM;
     635             :         }
     636             : 
     637        7605 :         raid5f_stripe_request_init(stripe_req, raid_io, stripe_index);
     638             : 
     639        7605 :         ret = raid5f_stripe_request_map_iovecs(stripe_req);
     640        7605 :         if (spdk_unlikely(ret)) {
     641           0 :                 return ret;
     642             :         }
     643             : 
     644        7605 :         TAILQ_REMOVE(&r5ch->free_stripe_requests.write, stripe_req, link);
     645             : 
     646        7605 :         raid_io->module_private = stripe_req;
     647        7605 :         raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
     648             : 
     649        7605 :         if (raid_bdev_channel_get_base_channel(raid_io->raid_ch, stripe_req->parity_chunk->index) != NULL) {
     650        7518 :                 raid5f_xor_stripe(stripe_req, raid5f_stripe_write_request_xor_done);
     651        7518 :         } else {
     652          87 :                 raid5f_stripe_write_request_xor_done(stripe_req, 0);
     653             :         }
     654             : 
     655        7605 :         return 0;
     656        7605 : }
     657             : 
     658             : static void
     659        7275 : raid5f_chunk_read_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
     660             : {
     661        7275 :         struct raid_bdev_io *raid_io = cb_arg;
     662             : 
     663        7275 :         spdk_bdev_free_io(bdev_io);
     664             : 
     665        7275 :         raid_bdev_io_complete(raid_io, success ? SPDK_BDEV_IO_STATUS_SUCCESS :
     666             :                               SPDK_BDEV_IO_STATUS_FAILED);
     667        7275 : }
     668             : 
     669             : static void raid5f_submit_rw_request(struct raid_bdev_io *raid_io);
     670             : 
     671             : static void
     672           0 : _raid5f_submit_rw_request(void *_raid_io)
     673             : {
     674           0 :         struct raid_bdev_io *raid_io = _raid_io;
     675             : 
     676           0 :         raid5f_submit_rw_request(raid_io);
     677           0 : }
     678             : 
     679             : static void
     680         999 : raid5f_stripe_request_reconstruct_xor_done(struct stripe_request *stripe_req, int status)
     681             : {
     682         999 :         struct raid_bdev_io *raid_io = stripe_req->raid_io;
     683             : 
     684         999 :         raid5f_stripe_request_release(stripe_req);
     685             : 
     686        1998 :         raid_bdev_io_complete(raid_io,
     687         999 :                               status == 0 ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED);
     688         999 : }
     689             : 
     690             : static void
     691         999 : raid5f_reconstruct_reads_completed_cb(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status)
     692             : {
     693         999 :         struct stripe_request *stripe_req = raid_io->module_private;
     694             : 
     695         999 :         raid_io->completion_cb = NULL;
     696             : 
     697         999 :         if (status != SPDK_BDEV_IO_STATUS_SUCCESS) {
     698           0 :                 stripe_req->xor.cb(stripe_req, -EIO);
     699           0 :                 return;
     700             :         }
     701             : 
     702         999 :         raid5f_xor_stripe(stripe_req, stripe_req->xor.cb);
     703         999 : }
     704             : 
     705             : static int
     706         999 : raid5f_submit_reconstruct_read(struct raid_bdev_io *raid_io, uint64_t stripe_index,
     707             :                                uint8_t chunk_idx, uint64_t chunk_offset, stripe_req_xor_cb cb)
     708             : {
     709         999 :         struct raid_bdev *raid_bdev = raid_io->raid_bdev;
     710         999 :         struct raid5f_io_channel *r5ch = raid_bdev_channel_get_module_ctx(raid_io->raid_ch);
     711         999 :         void *raid_io_md = raid_io->md_buf;
     712         999 :         struct stripe_request *stripe_req;
     713         999 :         struct chunk *chunk;
     714         999 :         int buf_idx;
     715             : 
     716         999 :         assert(cb != NULL);
     717             : 
     718         999 :         stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests.reconstruct);
     719         999 :         if (!stripe_req) {
     720           0 :                 return -ENOMEM;
     721             :         }
     722             : 
     723         999 :         raid5f_stripe_request_init(stripe_req, raid_io, stripe_index);
     724             : 
     725         999 :         stripe_req->reconstruct.chunk = &stripe_req->chunks[chunk_idx];
     726         999 :         stripe_req->reconstruct.chunk_offset = chunk_offset;
     727         999 :         stripe_req->xor.cb = cb;
     728         999 :         buf_idx = 0;
     729             : 
     730        5211 :         FOR_EACH_CHUNK(stripe_req, chunk) {
     731        4212 :                 if (chunk == stripe_req->reconstruct.chunk) {
     732         999 :                         int i;
     733         999 :                         int ret;
     734             : 
     735         999 :                         ret = raid5f_chunk_set_iovcnt(chunk, raid_io->iovcnt);
     736         999 :                         if (ret) {
     737           0 :                                 return ret;
     738             :                         }
     739             : 
     740        7992 :                         for (i = 0; i < raid_io->iovcnt; i++) {
     741        6993 :                                 chunk->iovs[i] = raid_io->iovs[i];
     742        6993 :                         }
     743             : 
     744         999 :                         chunk->md_buf = raid_io_md;
     745         999 :                 } else {
     746        3213 :                         struct iovec *iov = &chunk->iovs[0];
     747             : 
     748        3213 :                         iov->iov_base = stripe_req->reconstruct.chunk_buffers[buf_idx];
     749        3213 :                         iov->iov_len = raid_io->num_blocks * raid_bdev->bdev.blocklen;
     750        3213 :                         chunk->iovcnt = 1;
     751             : 
     752        3213 :                         if (raid_io_md) {
     753        1071 :                                 chunk->md_buf = stripe_req->reconstruct.chunk_md_buffers[buf_idx];
     754        1071 :                         }
     755             : 
     756        3213 :                         buf_idx++;
     757        3213 :                 }
     758        4212 :         }
     759             : 
     760         999 :         raid_io->module_private = stripe_req;
     761         999 :         raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
     762         999 :         raid_io->completion_cb = raid5f_reconstruct_reads_completed_cb;
     763             : 
     764         999 :         TAILQ_REMOVE(&r5ch->free_stripe_requests.reconstruct, stripe_req, link);
     765             : 
     766         999 :         raid5f_stripe_request_submit_chunks(stripe_req);
     767             : 
     768         999 :         return 0;
     769         999 : }
     770             : 
     771             : static int
     772        8274 : raid5f_submit_read_request(struct raid_bdev_io *raid_io, uint64_t stripe_index,
     773             :                            uint64_t stripe_offset)
     774             : {
     775        8274 :         struct raid_bdev *raid_bdev = raid_io->raid_bdev;
     776        8274 :         uint8_t chunk_data_idx = stripe_offset >> raid_bdev->strip_size_shift;
     777        8274 :         uint8_t p_idx = raid5f_stripe_parity_chunk_index(raid_bdev, stripe_index);
     778        8274 :         uint8_t chunk_idx = chunk_data_idx < p_idx ? chunk_data_idx : chunk_data_idx + 1;
     779        8274 :         struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk_idx];
     780        8274 :         struct spdk_io_channel *base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch, chunk_idx);
     781        8274 :         uint64_t chunk_offset = stripe_offset - (chunk_data_idx << raid_bdev->strip_size_shift);
     782        8274 :         uint64_t base_offset_blocks = (stripe_index << raid_bdev->strip_size_shift) + chunk_offset;
     783        8274 :         struct spdk_bdev_ext_io_opts io_opts;
     784        8274 :         int ret;
     785             : 
     786        8274 :         raid5f_init_ext_io_opts(&io_opts, raid_io);
     787        8274 :         if (base_ch == NULL) {
     788         999 :                 return raid5f_submit_reconstruct_read(raid_io, stripe_index, chunk_idx, chunk_offset,
     789             :                                                       raid5f_stripe_request_reconstruct_xor_done);
     790             :         }
     791             : 
     792       14550 :         ret = raid_bdev_readv_blocks_ext(base_info, base_ch, raid_io->iovs, raid_io->iovcnt,
     793        7275 :                                          base_offset_blocks, raid_io->num_blocks,
     794        7275 :                                          raid5f_chunk_read_complete, raid_io, &io_opts);
     795        7275 :         if (spdk_unlikely(ret == -ENOMEM)) {
     796           0 :                 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
     797           0 :                                         base_ch, _raid5f_submit_rw_request);
     798           0 :                 return 0;
     799             :         }
     800             : 
     801        7275 :         return ret;
     802        8274 : }
     803             : 
     804             : static void
     805       15879 : raid5f_submit_rw_request(struct raid_bdev_io *raid_io)
     806             : {
     807       15879 :         struct raid_bdev *raid_bdev = raid_io->raid_bdev;
     808       15879 :         struct raid5f_info *r5f_info = raid_bdev->module_private;
     809       15879 :         uint64_t stripe_index = raid_io->offset_blocks / r5f_info->stripe_blocks;
     810       15879 :         uint64_t stripe_offset = raid_io->offset_blocks % r5f_info->stripe_blocks;
     811       15879 :         int ret;
     812             : 
     813       15879 :         switch (raid_io->type) {
     814             :         case SPDK_BDEV_IO_TYPE_READ:
     815        8274 :                 assert(raid_io->num_blocks <= raid_bdev->strip_size);
     816        8274 :                 ret = raid5f_submit_read_request(raid_io, stripe_index, stripe_offset);
     817        8274 :                 break;
     818             :         case SPDK_BDEV_IO_TYPE_WRITE:
     819        7605 :                 assert(stripe_offset == 0);
     820        7605 :                 assert(raid_io->num_blocks == r5f_info->stripe_blocks);
     821        7605 :                 ret = raid5f_submit_write_request(raid_io, stripe_index);
     822        7605 :                 break;
     823             :         default:
     824           0 :                 ret = -EINVAL;
     825           0 :                 break;
     826             :         }
     827             : 
     828       15879 :         if (spdk_unlikely(ret)) {
     829           0 :                 raid_bdev_io_complete(raid_io, ret == -ENOMEM ? SPDK_BDEV_IO_STATUS_NOMEM :
     830             :                                       SPDK_BDEV_IO_STATUS_FAILED);
     831           0 :         }
     832       15879 : }
     833             : 
     834             : static void
     835       44451 : raid5f_stripe_request_free(struct stripe_request *stripe_req)
     836             : {
     837       44451 :         struct chunk *chunk;
     838             : 
     839      222255 :         FOR_EACH_CHUNK(stripe_req, chunk) {
     840      177804 :                 free(chunk->iovs);
     841      177804 :         }
     842             : 
     843       44451 :         if (stripe_req->type == STRIPE_REQ_WRITE) {
     844       22275 :                 spdk_dma_free(stripe_req->write.parity_buf);
     845       22275 :                 spdk_dma_free(stripe_req->write.parity_md_buf);
     846       44451 :         } else if (stripe_req->type == STRIPE_REQ_RECONSTRUCT) {
     847       22176 :                 struct raid5f_info *r5f_info = raid5f_ch_to_r5f_info(stripe_req->r5ch);
     848       22176 :                 struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
     849       22176 :                 uint8_t i;
     850             : 
     851       22176 :                 if (stripe_req->reconstruct.chunk_buffers) {
     852       88704 :                         for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) {
     853       66528 :                                 spdk_dma_free(stripe_req->reconstruct.chunk_buffers[i]);
     854       66528 :                         }
     855       22176 :                         free(stripe_req->reconstruct.chunk_buffers);
     856       22176 :                 }
     857             : 
     858       22176 :                 if (stripe_req->reconstruct.chunk_md_buffers) {
     859       29568 :                         for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) {
     860       22176 :                                 spdk_dma_free(stripe_req->reconstruct.chunk_md_buffers[i]);
     861       22176 :                         }
     862        7392 :                         free(stripe_req->reconstruct.chunk_md_buffers);
     863        7392 :                 }
     864       22176 :         } else {
     865           0 :                 assert(false);
     866             :         }
     867             : 
     868       44451 :         free(stripe_req->chunk_xor_buffers);
     869       44451 :         free(stripe_req->chunk_xor_md_buffers);
     870       44451 :         free(stripe_req->chunk_iov_iters);
     871             : 
     872       44451 :         free(stripe_req);
     873       44451 : }
     874             : 
     875             : static struct stripe_request *
     876       44451 : raid5f_stripe_request_alloc(struct raid5f_io_channel *r5ch, enum stripe_request_type type)
     877             : {
     878       44451 :         struct raid5f_info *r5f_info = raid5f_ch_to_r5f_info(r5ch);
     879       44451 :         struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
     880       44451 :         uint32_t raid_io_md_size = raid_bdev->bdev.md_interleave ? 0 : raid_bdev->bdev.md_len;
     881       44451 :         struct stripe_request *stripe_req;
     882       44451 :         struct chunk *chunk;
     883       44451 :         size_t chunk_len;
     884             : 
     885       44451 :         stripe_req = calloc(1, sizeof(*stripe_req) + sizeof(*chunk) * raid_bdev->num_base_bdevs);
     886       44451 :         if (!stripe_req) {
     887           0 :                 return NULL;
     888             :         }
     889             : 
     890       44451 :         stripe_req->r5ch = r5ch;
     891       44451 :         stripe_req->type = type;
     892             : 
     893      222255 :         FOR_EACH_CHUNK(stripe_req, chunk) {
     894      177804 :                 chunk->index = chunk - stripe_req->chunks;
     895      177804 :                 chunk->iovcnt_max = 4;
     896      177804 :                 chunk->iovs = calloc(chunk->iovcnt_max, sizeof(chunk->iovs[0]));
     897      177804 :                 if (!chunk->iovs) {
     898           0 :                         goto err;
     899             :                 }
     900      177804 :         }
     901             : 
     902       44451 :         chunk_len = raid_bdev->strip_size * raid_bdev->bdev.blocklen;
     903             : 
     904       44451 :         if (type == STRIPE_REQ_WRITE) {
     905       22275 :                 stripe_req->write.parity_buf = spdk_dma_malloc(chunk_len, r5f_info->buf_alignment, NULL);
     906       22275 :                 if (!stripe_req->write.parity_buf) {
     907           0 :                         goto err;
     908             :                 }
     909             : 
     910       22275 :                 if (raid_io_md_size != 0) {
     911       14850 :                         stripe_req->write.parity_md_buf = spdk_dma_malloc(raid_bdev->strip_size * raid_io_md_size,
     912        7425 :                                                           r5f_info->buf_alignment, NULL);
     913        7425 :                         if (!stripe_req->write.parity_md_buf) {
     914           0 :                                 goto err;
     915             :                         }
     916        7425 :                 }
     917       44451 :         } else if (type == STRIPE_REQ_RECONSTRUCT) {
     918       22176 :                 uint8_t n = raid5f_stripe_data_chunks_num(raid_bdev);
     919       22176 :                 void *buf;
     920       22176 :                 uint8_t i;
     921             : 
     922       22176 :                 stripe_req->reconstruct.chunk_buffers = calloc(n, sizeof(void *));
     923       22176 :                 if (!stripe_req->reconstruct.chunk_buffers) {
     924           0 :                         goto err;
     925             :                 }
     926             : 
     927       88704 :                 for (i = 0; i < n; i++) {
     928       66528 :                         buf = spdk_dma_malloc(chunk_len, r5f_info->buf_alignment, NULL);
     929       66528 :                         if (!buf) {
     930           0 :                                 goto err;
     931             :                         }
     932       66528 :                         stripe_req->reconstruct.chunk_buffers[i] = buf;
     933       66528 :                 }
     934             : 
     935       22176 :                 if (raid_io_md_size != 0) {
     936        7392 :                         stripe_req->reconstruct.chunk_md_buffers = calloc(n, sizeof(void *));
     937        7392 :                         if (!stripe_req->reconstruct.chunk_md_buffers) {
     938           0 :                                 goto err;
     939             :                         }
     940             : 
     941       29568 :                         for (i = 0; i < n; i++) {
     942       22176 :                                 buf = spdk_dma_malloc(raid_bdev->strip_size * raid_io_md_size, r5f_info->buf_alignment, NULL);
     943       22176 :                                 if (!buf) {
     944           0 :                                         goto err;
     945             :                                 }
     946       22176 :                                 stripe_req->reconstruct.chunk_md_buffers[i] = buf;
     947       22176 :                         }
     948        7392 :                 }
     949       22176 :         } else {
     950           0 :                 assert(false);
     951             :                 return NULL;
     952             :         }
     953             : 
     954       44451 :         stripe_req->chunk_iov_iters = malloc(SPDK_IOVITER_SIZE(raid_bdev->num_base_bdevs));
     955       44451 :         if (!stripe_req->chunk_iov_iters) {
     956           0 :                 goto err;
     957             :         }
     958             : 
     959       44451 :         stripe_req->chunk_xor_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
     960             :                                                sizeof(stripe_req->chunk_xor_buffers[0]));
     961       44451 :         if (!stripe_req->chunk_xor_buffers) {
     962           0 :                 goto err;
     963             :         }
     964             : 
     965       44451 :         stripe_req->chunk_xor_md_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
     966             :                                            sizeof(stripe_req->chunk_xor_md_buffers[0]));
     967       44451 :         if (!stripe_req->chunk_xor_md_buffers) {
     968           0 :                 goto err;
     969             :         }
     970             : 
     971       44451 :         return stripe_req;
     972             : err:
     973           0 :         raid5f_stripe_request_free(stripe_req);
     974           0 :         return NULL;
     975       44451 : }
     976             : 
     977             : static void
     978         693 : raid5f_ioch_destroy(void *io_device, void *ctx_buf)
     979             : {
     980         693 :         struct raid5f_io_channel *r5ch = ctx_buf;
     981         693 :         struct stripe_request *stripe_req;
     982             : 
     983         693 :         assert(TAILQ_EMPTY(&r5ch->xor_retry_queue));
     984             : 
     985       22869 :         while ((stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests.write))) {
     986       22176 :                 TAILQ_REMOVE(&r5ch->free_stripe_requests.write, stripe_req, link);
     987       22176 :                 raid5f_stripe_request_free(stripe_req);
     988             :         }
     989             : 
     990       22869 :         while ((stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests.reconstruct))) {
     991       22176 :                 TAILQ_REMOVE(&r5ch->free_stripe_requests.reconstruct, stripe_req, link);
     992       22176 :                 raid5f_stripe_request_free(stripe_req);
     993             :         }
     994             : 
     995         693 :         if (r5ch->accel_ch) {
     996         693 :                 spdk_put_io_channel(r5ch->accel_ch);
     997         693 :         }
     998             : 
     999         693 :         free(r5ch->chunk_xor_buffers);
    1000         693 :         free(r5ch->chunk_xor_iovs);
    1001         693 :         free(r5ch->chunk_xor_iovcnt);
    1002         693 : }
    1003             : 
    1004             : static int
    1005         693 : raid5f_ioch_create(void *io_device, void *ctx_buf)
    1006             : {
    1007         693 :         struct raid5f_io_channel *r5ch = ctx_buf;
    1008         693 :         struct raid5f_info *r5f_info = io_device;
    1009         693 :         struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
    1010         693 :         struct stripe_request *stripe_req;
    1011         693 :         int i;
    1012             : 
    1013         693 :         TAILQ_INIT(&r5ch->free_stripe_requests.write);
    1014         693 :         TAILQ_INIT(&r5ch->free_stripe_requests.reconstruct);
    1015         693 :         TAILQ_INIT(&r5ch->xor_retry_queue);
    1016             : 
    1017       22869 :         for (i = 0; i < RAID5F_MAX_STRIPES; i++) {
    1018       22176 :                 stripe_req = raid5f_stripe_request_alloc(r5ch, STRIPE_REQ_WRITE);
    1019       22176 :                 if (!stripe_req) {
    1020           0 :                         goto err;
    1021             :                 }
    1022             : 
    1023       22176 :                 TAILQ_INSERT_HEAD(&r5ch->free_stripe_requests.write, stripe_req, link);
    1024       22176 :         }
    1025             : 
    1026       22869 :         for (i = 0; i < RAID5F_MAX_STRIPES; i++) {
    1027       22176 :                 stripe_req = raid5f_stripe_request_alloc(r5ch, STRIPE_REQ_RECONSTRUCT);
    1028       22176 :                 if (!stripe_req) {
    1029           0 :                         goto err;
    1030             :                 }
    1031             : 
    1032       22176 :                 TAILQ_INSERT_HEAD(&r5ch->free_stripe_requests.reconstruct, stripe_req, link);
    1033       22176 :         }
    1034             : 
    1035         693 :         r5ch->accel_ch = spdk_accel_get_io_channel();
    1036         693 :         if (!r5ch->accel_ch) {
    1037           0 :                 SPDK_ERRLOG("Failed to get accel framework's IO channel\n");
    1038           0 :                 goto err;
    1039             :         }
    1040             : 
    1041         693 :         r5ch->chunk_xor_buffers = calloc(raid_bdev->num_base_bdevs, sizeof(*r5ch->chunk_xor_buffers));
    1042         693 :         if (!r5ch->chunk_xor_buffers) {
    1043           0 :                 goto err;
    1044             :         }
    1045             : 
    1046         693 :         r5ch->chunk_xor_iovs = calloc(raid_bdev->num_base_bdevs, sizeof(*r5ch->chunk_xor_iovs));
    1047         693 :         if (!r5ch->chunk_xor_iovs) {
    1048           0 :                 goto err;
    1049             :         }
    1050             : 
    1051         693 :         r5ch->chunk_xor_iovcnt = calloc(raid_bdev->num_base_bdevs, sizeof(*r5ch->chunk_xor_iovcnt));
    1052         693 :         if (!r5ch->chunk_xor_iovcnt) {
    1053           0 :                 goto err;
    1054             :         }
    1055             : 
    1056         693 :         return 0;
    1057             : err:
    1058           0 :         SPDK_ERRLOG("Failed to initialize io channel\n");
    1059           0 :         raid5f_ioch_destroy(r5f_info, r5ch);
    1060           0 :         return -ENOMEM;
    1061         693 : }
    1062             : 
    1063             : static int
    1064         792 : raid5f_start(struct raid_bdev *raid_bdev)
    1065             : {
    1066         792 :         uint64_t min_blockcnt = UINT64_MAX;
    1067         792 :         uint64_t base_bdev_data_size;
    1068         792 :         struct raid_base_bdev_info *base_info;
    1069         792 :         struct spdk_bdev *base_bdev;
    1070         792 :         struct raid5f_info *r5f_info;
    1071         792 :         size_t alignment = 0;
    1072             : 
    1073         792 :         r5f_info = calloc(1, sizeof(*r5f_info));
    1074         792 :         if (!r5f_info) {
    1075           0 :                 SPDK_ERRLOG("Failed to allocate r5f_info\n");
    1076           0 :                 return -ENOMEM;
    1077             :         }
    1078         792 :         r5f_info->raid_bdev = raid_bdev;
    1079             : 
    1080        3960 :         RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
    1081        3168 :                 min_blockcnt = spdk_min(min_blockcnt, base_info->data_size);
    1082        3168 :                 if (base_info->desc) {
    1083        3168 :                         base_bdev = spdk_bdev_desc_get_bdev(base_info->desc);
    1084        3168 :                         alignment = spdk_max(alignment, spdk_bdev_get_buf_align(base_bdev));
    1085        3168 :                 }
    1086        3168 :         }
    1087             : 
    1088         792 :         base_bdev_data_size = (min_blockcnt / raid_bdev->strip_size) * raid_bdev->strip_size;
    1089             : 
    1090        3960 :         RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
    1091        3168 :                 base_info->data_size = base_bdev_data_size;
    1092        3168 :         }
    1093             : 
    1094         792 :         r5f_info->total_stripes = min_blockcnt / raid_bdev->strip_size;
    1095         792 :         r5f_info->stripe_blocks = raid_bdev->strip_size * raid5f_stripe_data_chunks_num(raid_bdev);
    1096         792 :         r5f_info->buf_alignment = alignment;
    1097         792 :         if (!raid_bdev->bdev.md_interleave) {
    1098         528 :                 r5f_info->blocklen_shift = spdk_u32log2(raid_bdev->bdev.blocklen);
    1099         528 :         }
    1100             : 
    1101         792 :         raid_bdev->bdev.blockcnt = r5f_info->stripe_blocks * r5f_info->total_stripes;
    1102         792 :         raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size;
    1103         792 :         raid_bdev->bdev.split_on_optimal_io_boundary = true;
    1104         792 :         raid_bdev->bdev.write_unit_size = r5f_info->stripe_blocks;
    1105         792 :         raid_bdev->bdev.split_on_write_unit = true;
    1106             : 
    1107         792 :         raid_bdev->module_private = r5f_info;
    1108             : 
    1109         792 :         spdk_io_device_register(r5f_info, raid5f_ioch_create, raid5f_ioch_destroy,
    1110             :                                 sizeof(struct raid5f_io_channel), NULL);
    1111             : 
    1112         792 :         return 0;
    1113         792 : }
    1114             : 
    1115             : static void
    1116         792 : raid5f_io_device_unregister_done(void *io_device)
    1117             : {
    1118         792 :         struct raid5f_info *r5f_info = io_device;
    1119             : 
    1120         792 :         raid_bdev_module_stop_done(r5f_info->raid_bdev);
    1121             : 
    1122         792 :         free(r5f_info);
    1123         792 : }
    1124             : 
    1125             : static bool
    1126         792 : raid5f_stop(struct raid_bdev *raid_bdev)
    1127             : {
    1128         792 :         struct raid5f_info *r5f_info = raid_bdev->module_private;
    1129             : 
    1130         792 :         spdk_io_device_unregister(r5f_info, raid5f_io_device_unregister_done);
    1131             : 
    1132         792 :         return false;
    1133         792 : }
    1134             : 
    1135             : static struct spdk_io_channel *
    1136         693 : raid5f_get_io_channel(struct raid_bdev *raid_bdev)
    1137             : {
    1138         693 :         struct raid5f_info *r5f_info = raid_bdev->module_private;
    1139             : 
    1140        1386 :         return spdk_get_io_channel(r5f_info);
    1141         693 : }
    1142             : 
    1143             : static void
    1144           0 : raid5f_process_write_completed(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
    1145             : {
    1146           0 :         struct raid_bdev_process_request *process_req = cb_arg;
    1147             : 
    1148           0 :         spdk_bdev_free_io(bdev_io);
    1149             : 
    1150           0 :         raid_bdev_process_request_complete(process_req, success ? 0 : -EIO);
    1151           0 : }
    1152             : 
    1153             : static void raid5f_process_submit_write(struct raid_bdev_process_request *process_req);
    1154             : 
    1155             : static void
    1156           0 : _raid5f_process_submit_write(void *ctx)
    1157             : {
    1158           0 :         struct raid_bdev_process_request *process_req = ctx;
    1159             : 
    1160           0 :         raid5f_process_submit_write(process_req);
    1161           0 : }
    1162             : 
    1163             : static void
    1164           0 : raid5f_process_submit_write(struct raid_bdev_process_request *process_req)
    1165             : {
    1166           0 :         struct raid_bdev_io *raid_io = &process_req->raid_io;
    1167           0 :         struct raid_bdev *raid_bdev = raid_io->raid_bdev;
    1168           0 :         struct raid5f_info *r5f_info = raid_bdev->module_private;
    1169           0 :         uint64_t stripe_index = process_req->offset_blocks / r5f_info->stripe_blocks;
    1170           0 :         struct spdk_bdev_ext_io_opts io_opts;
    1171           0 :         int ret;
    1172             : 
    1173           0 :         raid5f_init_ext_io_opts(&io_opts, raid_io);
    1174           0 :         ret = raid_bdev_writev_blocks_ext(process_req->target, process_req->target_ch,
    1175           0 :                                           raid_io->iovs, raid_io->iovcnt,
    1176           0 :                                           stripe_index << raid_bdev->strip_size_shift, raid_bdev->strip_size,
    1177           0 :                                           raid5f_process_write_completed, process_req, &io_opts);
    1178           0 :         if (spdk_unlikely(ret != 0)) {
    1179           0 :                 if (ret == -ENOMEM) {
    1180           0 :                         raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(process_req->target->desc),
    1181           0 :                                                 process_req->target_ch, _raid5f_process_submit_write);
    1182           0 :                 } else {
    1183           0 :                         raid_bdev_process_request_complete(process_req, ret);
    1184             :                 }
    1185           0 :         }
    1186           0 : }
    1187             : 
    1188             : static void
    1189           0 : raid5f_process_stripe_request_reconstruct_xor_done(struct stripe_request *stripe_req, int status)
    1190             : {
    1191           0 :         struct raid_bdev_io *raid_io = stripe_req->raid_io;
    1192           0 :         struct raid_bdev_process_request *process_req = SPDK_CONTAINEROF(raid_io,
    1193             :                         struct raid_bdev_process_request, raid_io);
    1194             : 
    1195           0 :         raid5f_stripe_request_release(stripe_req);
    1196             : 
    1197           0 :         if (status != 0) {
    1198           0 :                 raid_bdev_process_request_complete(process_req, status);
    1199           0 :                 return;
    1200             :         }
    1201             : 
    1202           0 :         raid5f_process_submit_write(process_req);
    1203           0 : }
    1204             : 
    1205             : static int
    1206           0 : raid5f_submit_process_request(struct raid_bdev_process_request *process_req,
    1207             :                               struct raid_bdev_io_channel *raid_ch)
    1208             : {
    1209           0 :         struct spdk_io_channel *ch = spdk_io_channel_from_ctx(raid_ch);
    1210           0 :         struct raid_bdev *raid_bdev = spdk_io_channel_get_io_device(ch);
    1211           0 :         struct raid5f_info *r5f_info = raid_bdev->module_private;
    1212           0 :         struct raid_bdev_io *raid_io = &process_req->raid_io;
    1213           0 :         uint8_t chunk_idx = raid_bdev_base_bdev_slot(process_req->target);
    1214           0 :         uint64_t stripe_index = process_req->offset_blocks / r5f_info->stripe_blocks;
    1215           0 :         int ret;
    1216             : 
    1217           0 :         assert((process_req->offset_blocks % r5f_info->stripe_blocks) == 0);
    1218             : 
    1219           0 :         if (process_req->num_blocks < r5f_info->stripe_blocks) {
    1220           0 :                 return 0;
    1221             :         }
    1222             : 
    1223           0 :         raid_bdev_io_init(raid_io, raid_ch, SPDK_BDEV_IO_TYPE_READ,
    1224           0 :                           process_req->offset_blocks, raid_bdev->strip_size,
    1225           0 :                           &process_req->iov, 1, process_req->md_buf, NULL, NULL);
    1226             : 
    1227           0 :         ret = raid5f_submit_reconstruct_read(raid_io, stripe_index, chunk_idx, 0,
    1228             :                                              raid5f_process_stripe_request_reconstruct_xor_done);
    1229           0 :         if (spdk_likely(ret == 0)) {
    1230           0 :                 return r5f_info->stripe_blocks;
    1231           0 :         } else if (ret < 0) {
    1232           0 :                 return ret;
    1233             :         } else {
    1234           0 :                 return -EINVAL;
    1235             :         }
    1236           0 : }
    1237             : 
    1238             : static struct raid_bdev_module g_raid5f_module = {
    1239             :         .level = RAID5F,
    1240             :         .base_bdevs_min = 3,
    1241             :         .base_bdevs_constraint = {CONSTRAINT_MAX_BASE_BDEVS_REMOVED, 1},
    1242             :         .start = raid5f_start,
    1243             :         .stop = raid5f_stop,
    1244             :         .submit_rw_request = raid5f_submit_rw_request,
    1245             :         .get_io_channel = raid5f_get_io_channel,
    1246             :         .submit_process_request = raid5f_submit_process_request,
    1247             : };
    1248           1 : RAID_MODULE_REGISTER(&g_raid5f_module)
    1249             : 
    1250           1 : SPDK_LOG_REGISTER_COMPONENT(bdev_raid5f)

Generated by: LCOV version 1.15