Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2022 Intel Corporation.
3 : * All rights reserved.
4 : */
5 :
6 : #include "bdev_raid.h"
7 :
8 : #include "spdk/env.h"
9 : #include "spdk/thread.h"
10 : #include "spdk/string.h"
11 : #include "spdk/util.h"
12 : #include "spdk/likely.h"
13 : #include "spdk/log.h"
14 : #include "spdk/accel.h"
15 :
16 : /* Maximum concurrent full stripe writes per io channel */
17 : #define RAID5F_MAX_STRIPES 32
18 :
19 : struct chunk {
20 : /* Corresponds to base_bdev index */
21 : uint8_t index;
22 :
23 : /* Array of iovecs */
24 : struct iovec *iovs;
25 :
26 : /* Number of used iovecs */
27 : int iovcnt;
28 :
29 : /* Total number of available iovecs in the array */
30 : int iovcnt_max;
31 :
32 : /* Pointer to buffer with I/O metadata */
33 : void *md_buf;
34 : };
35 :
36 : struct stripe_request;
37 : typedef void (*stripe_req_xor_cb)(struct stripe_request *stripe_req, int status);
38 :
39 : struct stripe_request {
40 : enum stripe_request_type {
41 : STRIPE_REQ_WRITE,
42 : STRIPE_REQ_RECONSTRUCT,
43 : } type;
44 :
45 : struct raid5f_io_channel *r5ch;
46 :
47 : /* The associated raid_bdev_io */
48 : struct raid_bdev_io *raid_io;
49 :
50 : /* The stripe's index in the raid array. */
51 : uint64_t stripe_index;
52 :
53 : /* The stripe's parity chunk */
54 : struct chunk *parity_chunk;
55 :
56 : union {
57 : struct {
58 : /* Buffer for stripe parity */
59 : void *parity_buf;
60 :
61 : /* Buffer for stripe io metadata parity */
62 : void *parity_md_buf;
63 : } write;
64 :
65 : struct {
66 : /* Array of buffers for reading chunk data */
67 : void **chunk_buffers;
68 :
69 : /* Array of buffers for reading chunk metadata */
70 : void **chunk_md_buffers;
71 :
72 : /* Chunk to reconstruct from parity */
73 : struct chunk *chunk;
74 :
75 : /* Offset from chunk start */
76 : uint64_t chunk_offset;
77 : } reconstruct;
78 : };
79 :
80 : /* Array of iovec iterators for each chunk */
81 : struct spdk_ioviter *chunk_iov_iters;
82 :
83 : /* Array of source buffer pointers for parity calculation */
84 : void **chunk_xor_buffers;
85 :
86 : /* Array of source buffer pointers for parity calculation of io metadata */
87 : void **chunk_xor_md_buffers;
88 :
89 : struct {
90 : size_t len;
91 : size_t remaining;
92 : size_t remaining_md;
93 : int status;
94 : stripe_req_xor_cb cb;
95 : } xor;
96 :
97 : TAILQ_ENTRY(stripe_request) link;
98 :
99 : /* Array of chunks corresponding to base_bdevs */
100 : struct chunk chunks[0];
101 : };
102 :
103 : struct raid5f_info {
104 : /* The parent raid bdev */
105 : struct raid_bdev *raid_bdev;
106 :
107 : /* Number of data blocks in a stripe (without parity) */
108 : uint64_t stripe_blocks;
109 :
110 : /* Number of stripes on this array */
111 : uint64_t total_stripes;
112 :
113 : /* Alignment for buffer allocation */
114 : size_t buf_alignment;
115 : };
116 :
117 : struct raid5f_io_channel {
118 : /* All available stripe requests on this channel */
119 : struct {
120 : TAILQ_HEAD(, stripe_request) write;
121 : TAILQ_HEAD(, stripe_request) reconstruct;
122 : } free_stripe_requests;
123 :
124 : /* accel_fw channel */
125 : struct spdk_io_channel *accel_ch;
126 :
127 : /* For retrying xor if accel_ch runs out of resources */
128 : TAILQ_HEAD(, stripe_request) xor_retry_queue;
129 :
130 : /* For iterating over chunk iovecs during xor calculation */
131 : void **chunk_xor_buffers;
132 : struct iovec **chunk_xor_iovs;
133 : size_t *chunk_xor_iovcnt;
134 : };
135 :
136 : #define __CHUNK_IN_RANGE(req, c) \
137 : c < req->chunks + raid5f_ch_to_r5f_info(req->r5ch)->raid_bdev->num_base_bdevs
138 :
139 : #define FOR_EACH_CHUNK_FROM(req, c, from) \
140 : for (c = from; __CHUNK_IN_RANGE(req, c); c++)
141 :
142 : #define FOR_EACH_CHUNK(req, c) \
143 : FOR_EACH_CHUNK_FROM(req, c, req->chunks)
144 :
145 : #define __NEXT_DATA_CHUNK(req, c) \
146 : c == req->parity_chunk ? c+1 : c
147 :
148 : #define FOR_EACH_DATA_CHUNK(req, c) \
149 : for (c = __NEXT_DATA_CHUNK(req, req->chunks); __CHUNK_IN_RANGE(req, c); \
150 : c = __NEXT_DATA_CHUNK(req, c+1))
151 :
152 : static inline struct raid5f_info *
153 440991 : raid5f_ch_to_r5f_info(struct raid5f_io_channel *r5ch)
154 : {
155 440991 : return spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(r5ch));
156 : }
157 :
158 : static inline struct stripe_request *
159 70444 : raid5f_chunk_stripe_req(struct chunk *chunk)
160 : {
161 70444 : return SPDK_CONTAINEROF((chunk - chunk->index), struct stripe_request, chunks);
162 : }
163 :
164 : static inline uint8_t
165 239031 : raid5f_stripe_data_chunks_num(const struct raid_bdev *raid_bdev)
166 : {
167 239031 : return raid_bdev->min_base_bdevs_operational;
168 : }
169 :
170 : static inline uint8_t
171 11496 : raid5f_stripe_parity_chunk_index(const struct raid_bdev *raid_bdev, uint64_t stripe_index)
172 : {
173 11496 : return raid5f_stripe_data_chunks_num(raid_bdev) - stripe_index % raid_bdev->num_base_bdevs;
174 : }
175 :
176 : static inline void
177 5736 : raid5f_stripe_request_release(struct stripe_request *stripe_req)
178 : {
179 5736 : if (spdk_likely(stripe_req->type == STRIPE_REQ_WRITE)) {
180 5070 : TAILQ_INSERT_HEAD(&stripe_req->r5ch->free_stripe_requests.write, stripe_req, link);
181 5736 : } else if (stripe_req->type == STRIPE_REQ_RECONSTRUCT) {
182 666 : TAILQ_INSERT_HEAD(&stripe_req->r5ch->free_stripe_requests.reconstruct, stripe_req, link);
183 666 : } else {
184 0 : assert(false);
185 : }
186 5736 : }
187 :
188 : static void raid5f_xor_stripe_retry(struct stripe_request *stripe_req);
189 :
190 : static void
191 5678 : raid5f_xor_stripe_done(struct stripe_request *stripe_req)
192 : {
193 5678 : struct raid5f_io_channel *r5ch = stripe_req->r5ch;
194 :
195 5678 : if (stripe_req->xor.status != 0) {
196 0 : SPDK_ERRLOG("stripe xor failed: %s\n", spdk_strerror(-stripe_req->xor.status));
197 0 : }
198 :
199 5678 : stripe_req->xor.cb(stripe_req, stripe_req->xor.status);
200 :
201 5678 : if (!TAILQ_EMPTY(&r5ch->xor_retry_queue)) {
202 0 : stripe_req = TAILQ_FIRST(&r5ch->xor_retry_queue);
203 0 : TAILQ_REMOVE(&r5ch->xor_retry_queue, stripe_req, link);
204 0 : raid5f_xor_stripe_retry(stripe_req);
205 0 : }
206 5678 : }
207 :
208 : static void raid5f_xor_stripe_continue(struct stripe_request *stripe_req);
209 :
210 : static void
211 42585 : _raid5f_xor_stripe_cb(struct stripe_request *stripe_req, int status)
212 : {
213 42585 : if (status != 0) {
214 0 : stripe_req->xor.status = status;
215 0 : }
216 :
217 42585 : if (stripe_req->xor.remaining + stripe_req->xor.remaining_md == 0) {
218 5678 : raid5f_xor_stripe_done(stripe_req);
219 5678 : }
220 42585 : }
221 :
222 : static void
223 39746 : raid5f_xor_stripe_cb(void *_stripe_req, int status)
224 : {
225 39746 : struct stripe_request *stripe_req = _stripe_req;
226 :
227 39746 : stripe_req->xor.remaining -= stripe_req->xor.len;
228 :
229 39746 : if (stripe_req->xor.remaining > 0) {
230 68136 : stripe_req->xor.len = spdk_ioviter_nextv(stripe_req->chunk_iov_iters,
231 34068 : stripe_req->r5ch->chunk_xor_buffers);
232 34068 : raid5f_xor_stripe_continue(stripe_req);
233 34068 : }
234 :
235 39746 : _raid5f_xor_stripe_cb(stripe_req, status);
236 39746 : }
237 :
238 : static void
239 2839 : raid5f_xor_stripe_md_cb(void *_stripe_req, int status)
240 : {
241 2839 : struct stripe_request *stripe_req = _stripe_req;
242 :
243 2839 : stripe_req->xor.remaining_md = 0;
244 :
245 2839 : _raid5f_xor_stripe_cb(stripe_req, status);
246 2839 : }
247 :
248 : static void
249 39746 : raid5f_xor_stripe_continue(struct stripe_request *stripe_req)
250 : {
251 39746 : struct raid5f_io_channel *r5ch = stripe_req->r5ch;
252 39746 : struct raid_bdev_io *raid_io = stripe_req->raid_io;
253 39746 : struct raid_bdev *raid_bdev = raid_io->raid_bdev;
254 39746 : uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev);
255 39746 : uint8_t i;
256 39746 : int ret;
257 :
258 39746 : assert(stripe_req->xor.len > 0);
259 :
260 170996 : for (i = 0; i < n_src; i++) {
261 131250 : stripe_req->chunk_xor_buffers[i] = r5ch->chunk_xor_buffers[i];
262 131250 : }
263 :
264 79492 : ret = spdk_accel_submit_xor(r5ch->accel_ch, r5ch->chunk_xor_buffers[n_src],
265 39746 : stripe_req->chunk_xor_buffers, n_src, stripe_req->xor.len,
266 39746 : raid5f_xor_stripe_cb, stripe_req);
267 39746 : if (spdk_unlikely(ret)) {
268 0 : if (ret == -ENOMEM) {
269 0 : TAILQ_INSERT_HEAD(&r5ch->xor_retry_queue, stripe_req, link);
270 0 : } else {
271 0 : stripe_req->xor.status = ret;
272 0 : raid5f_xor_stripe_done(stripe_req);
273 : }
274 0 : }
275 39746 : }
276 :
277 : static void
278 5678 : raid5f_xor_stripe(struct stripe_request *stripe_req, stripe_req_xor_cb cb)
279 : {
280 5678 : struct raid5f_io_channel *r5ch = stripe_req->r5ch;
281 5678 : struct raid_bdev_io *raid_io = stripe_req->raid_io;
282 5678 : struct raid_bdev *raid_bdev = raid_io->raid_bdev;
283 5678 : struct chunk *chunk;
284 5678 : struct chunk *dest_chunk = NULL;
285 5678 : uint64_t num_blocks = 0;
286 5678 : uint8_t c;
287 :
288 5678 : assert(cb != NULL);
289 :
290 5678 : if (spdk_likely(stripe_req->type == STRIPE_REQ_WRITE)) {
291 5012 : num_blocks = raid_bdev->strip_size;
292 5012 : dest_chunk = stripe_req->parity_chunk;
293 5678 : } else if (stripe_req->type == STRIPE_REQ_RECONSTRUCT) {
294 666 : num_blocks = raid_io->num_blocks;
295 666 : dest_chunk = stripe_req->reconstruct.chunk;
296 666 : } else {
297 0 : assert(false);
298 : }
299 :
300 5678 : c = 0;
301 30106 : FOR_EACH_CHUNK(stripe_req, chunk) {
302 24428 : if (chunk == dest_chunk) {
303 5678 : continue;
304 : }
305 18750 : r5ch->chunk_xor_iovs[c] = chunk->iovs;
306 18750 : r5ch->chunk_xor_iovcnt[c] = chunk->iovcnt;
307 18750 : c++;
308 18750 : }
309 5678 : r5ch->chunk_xor_iovs[c] = dest_chunk->iovs;
310 5678 : r5ch->chunk_xor_iovcnt[c] = dest_chunk->iovcnt;
311 :
312 11356 : stripe_req->xor.len = spdk_ioviter_firstv(stripe_req->chunk_iov_iters,
313 5678 : raid_bdev->num_base_bdevs,
314 5678 : r5ch->chunk_xor_iovs,
315 5678 : r5ch->chunk_xor_iovcnt,
316 5678 : r5ch->chunk_xor_buffers);
317 5678 : stripe_req->xor.remaining = num_blocks << raid_bdev->blocklen_shift;
318 5678 : stripe_req->xor.status = 0;
319 5678 : stripe_req->xor.cb = cb;
320 :
321 5678 : if (raid_io->md_buf != NULL) {
322 2839 : uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev);
323 2839 : uint64_t len = num_blocks * spdk_bdev_get_md_size(&raid_bdev->bdev);
324 2839 : int ret;
325 :
326 2839 : stripe_req->xor.remaining_md = len;
327 :
328 2839 : c = 0;
329 15053 : FOR_EACH_CHUNK(stripe_req, chunk) {
330 12214 : if (chunk != dest_chunk) {
331 9375 : stripe_req->chunk_xor_md_buffers[c] = chunk->md_buf;
332 9375 : c++;
333 9375 : }
334 12214 : }
335 :
336 5678 : ret = spdk_accel_submit_xor(stripe_req->r5ch->accel_ch, dest_chunk->md_buf,
337 2839 : stripe_req->chunk_xor_md_buffers, n_src, len,
338 2839 : raid5f_xor_stripe_md_cb, stripe_req);
339 2839 : if (spdk_unlikely(ret)) {
340 0 : if (ret == -ENOMEM) {
341 0 : TAILQ_INSERT_HEAD(&stripe_req->r5ch->xor_retry_queue, stripe_req, link);
342 0 : } else {
343 0 : stripe_req->xor.status = ret;
344 0 : raid5f_xor_stripe_done(stripe_req);
345 : }
346 0 : return;
347 : }
348 2839 : }
349 :
350 5678 : raid5f_xor_stripe_continue(stripe_req);
351 5678 : }
352 :
353 : static void
354 0 : raid5f_xor_stripe_retry(struct stripe_request *stripe_req)
355 : {
356 0 : if (stripe_req->xor.remaining_md) {
357 0 : raid5f_xor_stripe(stripe_req, stripe_req->xor.cb);
358 0 : } else {
359 0 : raid5f_xor_stripe_continue(stripe_req);
360 : }
361 0 : }
362 :
363 : static void
364 18144 : raid5f_stripe_request_chunk_write_complete(struct stripe_request *stripe_req,
365 : enum spdk_bdev_io_status status)
366 : {
367 18144 : if (raid_bdev_io_complete_part(stripe_req->raid_io, 1, status)) {
368 4826 : raid5f_stripe_request_release(stripe_req);
369 4826 : }
370 18144 : }
371 :
372 : static void
373 2142 : raid5f_stripe_request_chunk_read_complete(struct stripe_request *stripe_req,
374 : enum spdk_bdev_io_status status)
375 : {
376 2142 : struct raid_bdev_io *raid_io = stripe_req->raid_io;
377 :
378 2142 : raid_bdev_io_complete_part(raid_io, 1, status);
379 2142 : }
380 :
381 : static void
382 20286 : raid5f_chunk_complete_bdev_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
383 : {
384 20286 : struct chunk *chunk = cb_arg;
385 20286 : struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk);
386 20286 : enum spdk_bdev_io_status status = success ? SPDK_BDEV_IO_STATUS_SUCCESS :
387 : SPDK_BDEV_IO_STATUS_FAILED;
388 :
389 20286 : spdk_bdev_free_io(bdev_io);
390 :
391 20286 : if (spdk_likely(stripe_req->type == STRIPE_REQ_WRITE)) {
392 18144 : raid5f_stripe_request_chunk_write_complete(stripe_req, status);
393 20286 : } else if (stripe_req->type == STRIPE_REQ_RECONSTRUCT) {
394 2142 : raid5f_stripe_request_chunk_read_complete(stripe_req, status);
395 2142 : } else {
396 0 : assert(false);
397 : }
398 20286 : }
399 :
400 : static void raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req);
401 :
402 : static void
403 2554 : raid5f_chunk_submit_retry(void *_raid_io)
404 : {
405 2554 : struct raid_bdev_io *raid_io = _raid_io;
406 2554 : struct stripe_request *stripe_req = raid_io->module_private;
407 :
408 2554 : raid5f_stripe_request_submit_chunks(stripe_req);
409 2554 : }
410 :
411 : static inline void
412 31050 : raid5f_init_ext_io_opts(struct spdk_bdev_ext_io_opts *opts, struct raid_bdev_io *raid_io)
413 : {
414 31050 : memset(opts, 0, sizeof(*opts));
415 31050 : opts->size = sizeof(*opts);
416 31050 : opts->memory_domain = raid_io->memory_domain;
417 31050 : opts->memory_domain_ctx = raid_io->memory_domain_ctx;
418 31050 : opts->metadata = raid_io->md_buf;
419 31050 : }
420 :
421 : static int
422 25534 : raid5f_chunk_submit(struct chunk *chunk)
423 : {
424 25534 : struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk);
425 25534 : struct raid_bdev_io *raid_io = stripe_req->raid_io;
426 25534 : struct raid_bdev *raid_bdev = raid_io->raid_bdev;
427 25534 : struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk->index];
428 51068 : struct spdk_io_channel *base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch,
429 25534 : chunk->index);
430 25534 : uint64_t base_offset_blocks = (stripe_req->stripe_index << raid_bdev->strip_size_shift);
431 25534 : struct spdk_bdev_ext_io_opts io_opts;
432 25534 : int ret;
433 :
434 25534 : raid5f_init_ext_io_opts(&io_opts, raid_io);
435 25534 : io_opts.metadata = chunk->md_buf;
436 :
437 25534 : raid_io->base_bdev_io_submitted++;
438 :
439 25534 : switch (stripe_req->type) {
440 : case STRIPE_REQ_WRITE:
441 22726 : if (base_ch == NULL) {
442 244 : raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS);
443 244 : return 0;
444 : }
445 :
446 44964 : ret = raid_bdev_writev_blocks_ext(base_info, base_ch, chunk->iovs, chunk->iovcnt,
447 22482 : base_offset_blocks, raid_bdev->strip_size,
448 22482 : raid5f_chunk_complete_bdev_io, chunk, &io_opts);
449 22482 : break;
450 : case STRIPE_REQ_RECONSTRUCT:
451 2808 : if (chunk == stripe_req->reconstruct.chunk) {
452 666 : raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS);
453 666 : return 0;
454 : }
455 :
456 2142 : base_offset_blocks += stripe_req->reconstruct.chunk_offset;
457 :
458 4284 : ret = raid_bdev_readv_blocks_ext(base_info, base_ch, chunk->iovs, chunk->iovcnt,
459 2142 : base_offset_blocks, raid_io->num_blocks,
460 2142 : raid5f_chunk_complete_bdev_io, chunk, &io_opts);
461 2142 : break;
462 : default:
463 0 : assert(false);
464 : ret = -EINVAL;
465 : break;
466 : }
467 :
468 24624 : if (spdk_unlikely(ret)) {
469 4338 : raid_io->base_bdev_io_submitted--;
470 4338 : if (ret == -ENOMEM) {
471 2554 : raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
472 2554 : base_ch, raid5f_chunk_submit_retry);
473 2554 : } else {
474 : /*
475 : * Implicitly complete any I/Os not yet submitted as FAILED. If completing
476 : * these means there are no more to complete for the stripe request, we can
477 : * release the stripe request as well.
478 : */
479 1784 : uint64_t base_bdev_io_not_submitted;
480 :
481 1784 : if (stripe_req->type == STRIPE_REQ_WRITE) {
482 3568 : base_bdev_io_not_submitted = raid_bdev->num_base_bdevs -
483 1784 : raid_io->base_bdev_io_submitted;
484 1784 : } else {
485 0 : base_bdev_io_not_submitted = raid5f_stripe_data_chunks_num(raid_bdev) -
486 0 : raid_io->base_bdev_io_submitted;
487 : }
488 :
489 1784 : if (raid_bdev_io_complete_part(raid_io, base_bdev_io_not_submitted,
490 : SPDK_BDEV_IO_STATUS_FAILED)) {
491 244 : raid5f_stripe_request_release(stripe_req);
492 244 : }
493 1784 : }
494 4338 : }
495 :
496 24624 : return ret;
497 25534 : }
498 :
499 : static int
500 17644 : raid5f_chunk_set_iovcnt(struct chunk *chunk, int iovcnt)
501 : {
502 17644 : if (iovcnt > chunk->iovcnt_max) {
503 66 : struct iovec *iovs = chunk->iovs;
504 :
505 66 : iovs = realloc(iovs, iovcnt * sizeof(*iovs));
506 66 : if (!iovs) {
507 0 : return -ENOMEM;
508 : }
509 66 : chunk->iovs = iovs;
510 66 : chunk->iovcnt_max = iovcnt;
511 66 : }
512 17644 : chunk->iovcnt = iovcnt;
513 :
514 17644 : return 0;
515 17644 : }
516 :
517 : static int
518 5136 : raid5f_stripe_request_map_iovecs(struct stripe_request *stripe_req)
519 : {
520 5136 : struct raid_bdev_io *raid_io = stripe_req->raid_io;
521 5136 : struct raid_bdev *raid_bdev = raid_io->raid_bdev;
522 5136 : void *raid_io_md = raid_io->md_buf;
523 5136 : uint32_t raid_io_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev);
524 5136 : struct chunk *chunk;
525 5136 : int raid_io_iov_idx = 0;
526 5136 : size_t raid_io_offset = 0;
527 5136 : size_t raid_io_iov_offset = 0;
528 5136 : int i;
529 :
530 22114 : FOR_EACH_DATA_CHUNK(stripe_req, chunk) {
531 16978 : int chunk_iovcnt = 0;
532 16978 : uint64_t len = raid_bdev->strip_size << raid_bdev->blocklen_shift;
533 16978 : size_t off = raid_io_iov_offset;
534 16978 : int ret;
535 :
536 47486 : for (i = raid_io_iov_idx; i < raid_io->iovcnt; i++) {
537 47486 : chunk_iovcnt++;
538 47486 : off += raid_io->iovs[i].iov_len;
539 47486 : if (off >= raid_io_offset + len) {
540 16978 : break;
541 : }
542 30508 : }
543 :
544 16978 : assert(raid_io_iov_idx + chunk_iovcnt <= raid_io->iovcnt);
545 :
546 16978 : ret = raid5f_chunk_set_iovcnt(chunk, chunk_iovcnt);
547 16978 : if (ret) {
548 0 : return ret;
549 : }
550 :
551 16978 : if (raid_io_md) {
552 16780 : chunk->md_buf = raid_io_md +
553 8390 : (raid_io_offset >> raid_bdev->blocklen_shift) * raid_io_md_size;
554 8390 : }
555 :
556 64464 : for (i = 0; i < chunk_iovcnt; i++) {
557 47486 : struct iovec *chunk_iov = &chunk->iovs[i];
558 47486 : const struct iovec *raid_io_iov = &raid_io->iovs[raid_io_iov_idx];
559 47486 : size_t chunk_iov_offset = raid_io_offset - raid_io_iov_offset;
560 :
561 47486 : chunk_iov->iov_base = raid_io_iov->iov_base + chunk_iov_offset;
562 47486 : chunk_iov->iov_len = spdk_min(len, raid_io_iov->iov_len - chunk_iov_offset);
563 47486 : raid_io_offset += chunk_iov->iov_len;
564 47486 : len -= chunk_iov->iov_len;
565 :
566 47486 : if (raid_io_offset >= raid_io_iov_offset + raid_io_iov->iov_len) {
567 35644 : raid_io_iov_idx++;
568 35644 : raid_io_iov_offset += raid_io_iov->iov_len;
569 35644 : }
570 47486 : }
571 :
572 16978 : if (spdk_unlikely(len > 0)) {
573 0 : return -EINVAL;
574 : }
575 16978 : }
576 :
577 5136 : stripe_req->parity_chunk->iovs[0].iov_base = stripe_req->write.parity_buf;
578 5136 : stripe_req->parity_chunk->iovs[0].iov_len = raid_bdev->strip_size << raid_bdev->blocklen_shift;
579 5136 : stripe_req->parity_chunk->iovcnt = 1;
580 5136 : stripe_req->parity_chunk->md_buf = stripe_req->write.parity_md_buf;
581 :
582 5136 : return 0;
583 5136 : }
584 :
585 : static void
586 8290 : raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req)
587 : {
588 8290 : struct raid_bdev_io *raid_io = stripe_req->raid_io;
589 8290 : struct chunk *start = &stripe_req->chunks[raid_io->base_bdev_io_submitted];
590 8290 : struct chunk *chunk;
591 :
592 29486 : FOR_EACH_CHUNK_FROM(stripe_req, chunk, start) {
593 25534 : if (spdk_unlikely(raid5f_chunk_submit(chunk) != 0)) {
594 4338 : break;
595 : }
596 21196 : }
597 8290 : }
598 :
599 : static inline void
600 5736 : raid5f_stripe_request_init(struct stripe_request *stripe_req, struct raid_bdev_io *raid_io,
601 : uint64_t stripe_index)
602 : {
603 5736 : stripe_req->raid_io = raid_io;
604 5736 : stripe_req->stripe_index = stripe_index;
605 5736 : stripe_req->parity_chunk = &stripe_req->chunks[raid5f_stripe_parity_chunk_index(raid_io->raid_bdev,
606 5736 : stripe_index)];
607 5736 : }
608 :
609 : static void
610 5070 : raid5f_stripe_write_request_xor_done(struct stripe_request *stripe_req, int status)
611 : {
612 5070 : struct raid_bdev_io *raid_io = stripe_req->raid_io;
613 :
614 5070 : if (status != 0) {
615 0 : raid5f_stripe_request_release(stripe_req);
616 0 : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
617 0 : } else {
618 5070 : raid5f_stripe_request_submit_chunks(stripe_req);
619 : }
620 5070 : }
621 :
622 : static int
623 5070 : raid5f_submit_write_request(struct raid_bdev_io *raid_io, uint64_t stripe_index)
624 : {
625 5070 : struct raid_bdev *raid_bdev = raid_io->raid_bdev;
626 5070 : struct raid5f_io_channel *r5ch = raid_bdev_channel_get_module_ctx(raid_io->raid_ch);
627 5070 : struct stripe_request *stripe_req;
628 5070 : int ret;
629 :
630 5070 : stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests.write);
631 5070 : if (!stripe_req) {
632 0 : return -ENOMEM;
633 : }
634 :
635 5070 : raid5f_stripe_request_init(stripe_req, raid_io, stripe_index);
636 :
637 5070 : ret = raid5f_stripe_request_map_iovecs(stripe_req);
638 5070 : if (spdk_unlikely(ret)) {
639 0 : return ret;
640 : }
641 :
642 5070 : TAILQ_REMOVE(&r5ch->free_stripe_requests.write, stripe_req, link);
643 :
644 5070 : raid_io->module_private = stripe_req;
645 5070 : raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
646 :
647 5070 : if (raid_bdev_channel_get_base_channel(raid_io->raid_ch, stripe_req->parity_chunk->index) != NULL) {
648 5012 : raid5f_xor_stripe(stripe_req, raid5f_stripe_write_request_xor_done);
649 5012 : } else {
650 58 : raid5f_stripe_write_request_xor_done(stripe_req, 0);
651 : }
652 :
653 5070 : return 0;
654 5070 : }
655 :
656 : static void
657 4850 : raid5f_chunk_read_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
658 : {
659 4850 : struct raid_bdev_io *raid_io = cb_arg;
660 :
661 4850 : spdk_bdev_free_io(bdev_io);
662 :
663 4850 : raid_bdev_io_complete(raid_io, success ? SPDK_BDEV_IO_STATUS_SUCCESS :
664 : SPDK_BDEV_IO_STATUS_FAILED);
665 4850 : }
666 :
667 : static void raid5f_submit_rw_request(struct raid_bdev_io *raid_io);
668 :
669 : static void
670 0 : _raid5f_submit_rw_request(void *_raid_io)
671 : {
672 0 : struct raid_bdev_io *raid_io = _raid_io;
673 :
674 0 : raid5f_submit_rw_request(raid_io);
675 0 : }
676 :
677 : static void
678 666 : raid5f_stripe_request_reconstruct_xor_done(struct stripe_request *stripe_req, int status)
679 : {
680 666 : struct raid_bdev_io *raid_io = stripe_req->raid_io;
681 :
682 666 : raid5f_stripe_request_release(stripe_req);
683 :
684 1332 : raid_bdev_io_complete(raid_io,
685 666 : status == 0 ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED);
686 666 : }
687 :
688 : static void
689 666 : raid5f_reconstruct_reads_completed_cb(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status)
690 : {
691 666 : struct stripe_request *stripe_req = raid_io->module_private;
692 :
693 666 : raid_io->completion_cb = NULL;
694 :
695 666 : if (status != SPDK_BDEV_IO_STATUS_SUCCESS) {
696 0 : raid5f_stripe_request_release(stripe_req);
697 0 : raid_bdev_io_complete(raid_io, status);
698 0 : return;
699 : }
700 :
701 666 : raid5f_xor_stripe(stripe_req, raid5f_stripe_request_reconstruct_xor_done);
702 666 : }
703 :
704 : static int
705 666 : raid5f_submit_reconstruct_read(struct raid_bdev_io *raid_io, uint64_t stripe_index,
706 : uint8_t chunk_idx, uint64_t chunk_offset,
707 : raid_bdev_io_completion_cb completion_cb)
708 : {
709 666 : struct raid_bdev *raid_bdev = raid_io->raid_bdev;
710 666 : struct raid5f_io_channel *r5ch = raid_bdev_channel_get_module_ctx(raid_io->raid_ch);
711 666 : void *raid_io_md = raid_io->md_buf;
712 666 : struct stripe_request *stripe_req;
713 666 : struct chunk *chunk;
714 666 : int buf_idx;
715 :
716 666 : stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests.reconstruct);
717 666 : if (!stripe_req) {
718 0 : return -ENOMEM;
719 : }
720 :
721 666 : raid5f_stripe_request_init(stripe_req, raid_io, stripe_index);
722 :
723 666 : stripe_req->reconstruct.chunk = &stripe_req->chunks[chunk_idx];
724 666 : stripe_req->reconstruct.chunk_offset = chunk_offset;
725 666 : buf_idx = 0;
726 :
727 3474 : FOR_EACH_CHUNK(stripe_req, chunk) {
728 2808 : if (chunk == stripe_req->reconstruct.chunk) {
729 666 : int i;
730 666 : int ret;
731 :
732 666 : ret = raid5f_chunk_set_iovcnt(chunk, raid_io->iovcnt);
733 666 : if (ret) {
734 0 : return ret;
735 : }
736 :
737 5328 : for (i = 0; i < raid_io->iovcnt; i++) {
738 4662 : chunk->iovs[i] = raid_io->iovs[i];
739 4662 : }
740 :
741 666 : chunk->md_buf = raid_io_md;
742 666 : } else {
743 2142 : struct iovec *iov = &chunk->iovs[0];
744 :
745 2142 : iov->iov_base = stripe_req->reconstruct.chunk_buffers[buf_idx];
746 2142 : iov->iov_len = raid_io->num_blocks << raid_bdev->blocklen_shift;
747 2142 : chunk->iovcnt = 1;
748 :
749 2142 : if (raid_io_md) {
750 1071 : chunk->md_buf = stripe_req->reconstruct.chunk_md_buffers[buf_idx];
751 1071 : }
752 :
753 2142 : buf_idx++;
754 2142 : }
755 2808 : }
756 :
757 666 : raid_io->module_private = stripe_req;
758 666 : raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
759 666 : raid_io->completion_cb = completion_cb;
760 :
761 666 : TAILQ_REMOVE(&r5ch->free_stripe_requests.reconstruct, stripe_req, link);
762 :
763 666 : raid5f_stripe_request_submit_chunks(stripe_req);
764 :
765 666 : return 0;
766 666 : }
767 :
768 : static int
769 5516 : raid5f_submit_read_request(struct raid_bdev_io *raid_io, uint64_t stripe_index,
770 : uint64_t stripe_offset)
771 : {
772 5516 : struct raid_bdev *raid_bdev = raid_io->raid_bdev;
773 5516 : uint8_t chunk_data_idx = stripe_offset >> raid_bdev->strip_size_shift;
774 5516 : uint8_t p_idx = raid5f_stripe_parity_chunk_index(raid_bdev, stripe_index);
775 5516 : uint8_t chunk_idx = chunk_data_idx < p_idx ? chunk_data_idx : chunk_data_idx + 1;
776 5516 : struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk_idx];
777 5516 : struct spdk_io_channel *base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch, chunk_idx);
778 5516 : uint64_t chunk_offset = stripe_offset - (chunk_data_idx << raid_bdev->strip_size_shift);
779 5516 : uint64_t base_offset_blocks = (stripe_index << raid_bdev->strip_size_shift) + chunk_offset;
780 5516 : struct spdk_bdev_ext_io_opts io_opts;
781 5516 : int ret;
782 :
783 5516 : raid5f_init_ext_io_opts(&io_opts, raid_io);
784 5516 : if (base_ch == NULL) {
785 666 : return raid5f_submit_reconstruct_read(raid_io, stripe_index, chunk_idx, chunk_offset,
786 : raid5f_reconstruct_reads_completed_cb);
787 : }
788 :
789 9700 : ret = raid_bdev_readv_blocks_ext(base_info, base_ch, raid_io->iovs, raid_io->iovcnt,
790 4850 : base_offset_blocks, raid_io->num_blocks,
791 4850 : raid5f_chunk_read_complete, raid_io, &io_opts);
792 4850 : if (spdk_unlikely(ret == -ENOMEM)) {
793 0 : raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
794 0 : base_ch, _raid5f_submit_rw_request);
795 0 : return 0;
796 : }
797 :
798 4850 : return ret;
799 5516 : }
800 :
801 : static void
802 10586 : raid5f_submit_rw_request(struct raid_bdev_io *raid_io)
803 : {
804 10586 : struct raid_bdev *raid_bdev = raid_io->raid_bdev;
805 10586 : struct raid5f_info *r5f_info = raid_bdev->module_private;
806 10586 : uint64_t stripe_index = raid_io->offset_blocks / r5f_info->stripe_blocks;
807 10586 : uint64_t stripe_offset = raid_io->offset_blocks % r5f_info->stripe_blocks;
808 10586 : int ret;
809 :
810 10586 : switch (raid_io->type) {
811 : case SPDK_BDEV_IO_TYPE_READ:
812 5516 : assert(raid_io->num_blocks <= raid_bdev->strip_size);
813 5516 : ret = raid5f_submit_read_request(raid_io, stripe_index, stripe_offset);
814 5516 : break;
815 : case SPDK_BDEV_IO_TYPE_WRITE:
816 5070 : assert(stripe_offset == 0);
817 5070 : assert(raid_io->num_blocks == r5f_info->stripe_blocks);
818 5070 : ret = raid5f_submit_write_request(raid_io, stripe_index);
819 5070 : break;
820 : default:
821 0 : ret = -EINVAL;
822 0 : break;
823 : }
824 :
825 10586 : if (spdk_unlikely(ret)) {
826 0 : raid_bdev_io_complete(raid_io, ret == -ENOMEM ? SPDK_BDEV_IO_STATUS_NOMEM :
827 : SPDK_BDEV_IO_STATUS_FAILED);
828 0 : }
829 10586 : }
830 :
831 : static void
832 29634 : raid5f_stripe_request_free(struct stripe_request *stripe_req)
833 : {
834 29634 : struct chunk *chunk;
835 :
836 148170 : FOR_EACH_CHUNK(stripe_req, chunk) {
837 118536 : free(chunk->iovs);
838 118536 : }
839 :
840 29634 : if (stripe_req->type == STRIPE_REQ_WRITE) {
841 14850 : spdk_dma_free(stripe_req->write.parity_buf);
842 14850 : spdk_dma_free(stripe_req->write.parity_md_buf);
843 29634 : } else if (stripe_req->type == STRIPE_REQ_RECONSTRUCT) {
844 14784 : struct raid5f_info *r5f_info = raid5f_ch_to_r5f_info(stripe_req->r5ch);
845 14784 : struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
846 14784 : uint8_t i;
847 :
848 14784 : if (stripe_req->reconstruct.chunk_buffers) {
849 59136 : for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) {
850 44352 : spdk_dma_free(stripe_req->reconstruct.chunk_buffers[i]);
851 44352 : }
852 14784 : free(stripe_req->reconstruct.chunk_buffers);
853 14784 : }
854 :
855 14784 : if (stripe_req->reconstruct.chunk_md_buffers) {
856 29568 : for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) {
857 22176 : spdk_dma_free(stripe_req->reconstruct.chunk_md_buffers[i]);
858 22176 : }
859 7392 : free(stripe_req->reconstruct.chunk_md_buffers);
860 7392 : }
861 14784 : } else {
862 0 : assert(false);
863 : }
864 :
865 29634 : free(stripe_req->chunk_xor_buffers);
866 29634 : free(stripe_req->chunk_xor_md_buffers);
867 29634 : free(stripe_req->chunk_iov_iters);
868 :
869 29634 : free(stripe_req);
870 29634 : }
871 :
872 : static struct stripe_request *
873 29634 : raid5f_stripe_request_alloc(struct raid5f_io_channel *r5ch, enum stripe_request_type type)
874 : {
875 29634 : struct raid5f_info *r5f_info = raid5f_ch_to_r5f_info(r5ch);
876 29634 : struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
877 29634 : uint32_t raid_io_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev);
878 29634 : struct stripe_request *stripe_req;
879 29634 : struct chunk *chunk;
880 29634 : size_t chunk_len;
881 :
882 29634 : stripe_req = calloc(1, sizeof(*stripe_req) + sizeof(*chunk) * raid_bdev->num_base_bdevs);
883 29634 : if (!stripe_req) {
884 0 : return NULL;
885 : }
886 :
887 29634 : stripe_req->r5ch = r5ch;
888 29634 : stripe_req->type = type;
889 :
890 148170 : FOR_EACH_CHUNK(stripe_req, chunk) {
891 118536 : chunk->index = chunk - stripe_req->chunks;
892 118536 : chunk->iovcnt_max = 4;
893 118536 : chunk->iovs = calloc(chunk->iovcnt_max, sizeof(chunk->iovs[0]));
894 118536 : if (!chunk->iovs) {
895 0 : goto err;
896 : }
897 118536 : }
898 :
899 29634 : chunk_len = raid_bdev->strip_size << raid_bdev->blocklen_shift;
900 :
901 29634 : if (type == STRIPE_REQ_WRITE) {
902 14850 : stripe_req->write.parity_buf = spdk_dma_malloc(chunk_len, r5f_info->buf_alignment, NULL);
903 14850 : if (!stripe_req->write.parity_buf) {
904 0 : goto err;
905 : }
906 :
907 14850 : if (raid_io_md_size != 0) {
908 14850 : stripe_req->write.parity_md_buf = spdk_dma_malloc(raid_bdev->strip_size * raid_io_md_size,
909 7425 : r5f_info->buf_alignment, NULL);
910 7425 : if (!stripe_req->write.parity_md_buf) {
911 0 : goto err;
912 : }
913 7425 : }
914 29634 : } else if (type == STRIPE_REQ_RECONSTRUCT) {
915 14784 : uint8_t n = raid5f_stripe_data_chunks_num(raid_bdev);
916 14784 : void *buf;
917 14784 : uint8_t i;
918 :
919 14784 : stripe_req->reconstruct.chunk_buffers = calloc(n, sizeof(void *));
920 14784 : if (!stripe_req->reconstruct.chunk_buffers) {
921 0 : goto err;
922 : }
923 :
924 59136 : for (i = 0; i < n; i++) {
925 44352 : buf = spdk_dma_malloc(chunk_len, r5f_info->buf_alignment, NULL);
926 44352 : if (!buf) {
927 0 : goto err;
928 : }
929 44352 : stripe_req->reconstruct.chunk_buffers[i] = buf;
930 44352 : }
931 :
932 14784 : if (raid_io_md_size != 0) {
933 7392 : stripe_req->reconstruct.chunk_md_buffers = calloc(n, sizeof(void *));
934 7392 : if (!stripe_req->reconstruct.chunk_md_buffers) {
935 0 : goto err;
936 : }
937 :
938 29568 : for (i = 0; i < n; i++) {
939 22176 : buf = spdk_dma_malloc(raid_bdev->strip_size * raid_io_md_size, r5f_info->buf_alignment, NULL);
940 22176 : if (!buf) {
941 0 : goto err;
942 : }
943 22176 : stripe_req->reconstruct.chunk_md_buffers[i] = buf;
944 22176 : }
945 7392 : }
946 14784 : } else {
947 0 : assert(false);
948 : return NULL;
949 : }
950 :
951 29634 : stripe_req->chunk_iov_iters = malloc(SPDK_IOVITER_SIZE(raid_bdev->num_base_bdevs));
952 29634 : if (!stripe_req->chunk_iov_iters) {
953 0 : goto err;
954 : }
955 :
956 29634 : stripe_req->chunk_xor_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
957 : sizeof(stripe_req->chunk_xor_buffers[0]));
958 29634 : if (!stripe_req->chunk_xor_buffers) {
959 0 : goto err;
960 : }
961 :
962 29634 : stripe_req->chunk_xor_md_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
963 : sizeof(stripe_req->chunk_xor_md_buffers[0]));
964 29634 : if (!stripe_req->chunk_xor_md_buffers) {
965 0 : goto err;
966 : }
967 :
968 29634 : return stripe_req;
969 : err:
970 0 : raid5f_stripe_request_free(stripe_req);
971 0 : return NULL;
972 29634 : }
973 :
974 : static void
975 462 : raid5f_ioch_destroy(void *io_device, void *ctx_buf)
976 : {
977 462 : struct raid5f_io_channel *r5ch = ctx_buf;
978 462 : struct stripe_request *stripe_req;
979 :
980 462 : assert(TAILQ_EMPTY(&r5ch->xor_retry_queue));
981 :
982 15246 : while ((stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests.write))) {
983 14784 : TAILQ_REMOVE(&r5ch->free_stripe_requests.write, stripe_req, link);
984 14784 : raid5f_stripe_request_free(stripe_req);
985 : }
986 :
987 15246 : while ((stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests.reconstruct))) {
988 14784 : TAILQ_REMOVE(&r5ch->free_stripe_requests.reconstruct, stripe_req, link);
989 14784 : raid5f_stripe_request_free(stripe_req);
990 : }
991 :
992 462 : if (r5ch->accel_ch) {
993 462 : spdk_put_io_channel(r5ch->accel_ch);
994 462 : }
995 :
996 462 : free(r5ch->chunk_xor_buffers);
997 462 : free(r5ch->chunk_xor_iovs);
998 462 : free(r5ch->chunk_xor_iovcnt);
999 462 : }
1000 :
1001 : static int
1002 462 : raid5f_ioch_create(void *io_device, void *ctx_buf)
1003 : {
1004 462 : struct raid5f_io_channel *r5ch = ctx_buf;
1005 462 : struct raid5f_info *r5f_info = io_device;
1006 462 : struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
1007 462 : struct stripe_request *stripe_req;
1008 462 : int i;
1009 :
1010 462 : TAILQ_INIT(&r5ch->free_stripe_requests.write);
1011 462 : TAILQ_INIT(&r5ch->free_stripe_requests.reconstruct);
1012 462 : TAILQ_INIT(&r5ch->xor_retry_queue);
1013 :
1014 15246 : for (i = 0; i < RAID5F_MAX_STRIPES; i++) {
1015 14784 : stripe_req = raid5f_stripe_request_alloc(r5ch, STRIPE_REQ_WRITE);
1016 14784 : if (!stripe_req) {
1017 0 : goto err;
1018 : }
1019 :
1020 14784 : TAILQ_INSERT_HEAD(&r5ch->free_stripe_requests.write, stripe_req, link);
1021 14784 : }
1022 :
1023 15246 : for (i = 0; i < RAID5F_MAX_STRIPES; i++) {
1024 14784 : stripe_req = raid5f_stripe_request_alloc(r5ch, STRIPE_REQ_RECONSTRUCT);
1025 14784 : if (!stripe_req) {
1026 0 : goto err;
1027 : }
1028 :
1029 14784 : TAILQ_INSERT_HEAD(&r5ch->free_stripe_requests.reconstruct, stripe_req, link);
1030 14784 : }
1031 :
1032 462 : r5ch->accel_ch = spdk_accel_get_io_channel();
1033 462 : if (!r5ch->accel_ch) {
1034 0 : SPDK_ERRLOG("Failed to get accel framework's IO channel\n");
1035 0 : goto err;
1036 : }
1037 :
1038 462 : r5ch->chunk_xor_buffers = calloc(raid_bdev->num_base_bdevs, sizeof(*r5ch->chunk_xor_buffers));
1039 462 : if (!r5ch->chunk_xor_buffers) {
1040 0 : goto err;
1041 : }
1042 :
1043 462 : r5ch->chunk_xor_iovs = calloc(raid_bdev->num_base_bdevs, sizeof(*r5ch->chunk_xor_iovs));
1044 462 : if (!r5ch->chunk_xor_iovs) {
1045 0 : goto err;
1046 : }
1047 :
1048 462 : r5ch->chunk_xor_iovcnt = calloc(raid_bdev->num_base_bdevs, sizeof(*r5ch->chunk_xor_iovcnt));
1049 462 : if (!r5ch->chunk_xor_iovcnt) {
1050 0 : goto err;
1051 : }
1052 :
1053 462 : return 0;
1054 : err:
1055 0 : SPDK_ERRLOG("Failed to initialize io channel\n");
1056 0 : raid5f_ioch_destroy(r5f_info, r5ch);
1057 0 : return -ENOMEM;
1058 462 : }
1059 :
1060 : static int
1061 528 : raid5f_start(struct raid_bdev *raid_bdev)
1062 : {
1063 528 : uint64_t min_blockcnt = UINT64_MAX;
1064 528 : uint64_t base_bdev_data_size;
1065 528 : struct raid_base_bdev_info *base_info;
1066 528 : struct spdk_bdev *base_bdev;
1067 528 : struct raid5f_info *r5f_info;
1068 528 : size_t alignment = 0;
1069 :
1070 528 : r5f_info = calloc(1, sizeof(*r5f_info));
1071 528 : if (!r5f_info) {
1072 0 : SPDK_ERRLOG("Failed to allocate r5f_info\n");
1073 0 : return -ENOMEM;
1074 : }
1075 528 : r5f_info->raid_bdev = raid_bdev;
1076 :
1077 2640 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1078 2112 : min_blockcnt = spdk_min(min_blockcnt, base_info->data_size);
1079 2112 : if (base_info->desc) {
1080 2112 : base_bdev = spdk_bdev_desc_get_bdev(base_info->desc);
1081 2112 : alignment = spdk_max(alignment, spdk_bdev_get_buf_align(base_bdev));
1082 2112 : }
1083 2112 : }
1084 :
1085 528 : base_bdev_data_size = (min_blockcnt / raid_bdev->strip_size) * raid_bdev->strip_size;
1086 :
1087 2640 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1088 2112 : base_info->data_size = base_bdev_data_size;
1089 2112 : }
1090 :
1091 528 : r5f_info->total_stripes = min_blockcnt / raid_bdev->strip_size;
1092 528 : r5f_info->stripe_blocks = raid_bdev->strip_size * raid5f_stripe_data_chunks_num(raid_bdev);
1093 528 : r5f_info->buf_alignment = alignment;
1094 :
1095 528 : raid_bdev->bdev.blockcnt = r5f_info->stripe_blocks * r5f_info->total_stripes;
1096 528 : raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size;
1097 528 : raid_bdev->bdev.split_on_optimal_io_boundary = true;
1098 528 : raid_bdev->bdev.write_unit_size = r5f_info->stripe_blocks;
1099 528 : raid_bdev->bdev.split_on_write_unit = true;
1100 :
1101 528 : raid_bdev->module_private = r5f_info;
1102 :
1103 528 : spdk_io_device_register(r5f_info, raid5f_ioch_create, raid5f_ioch_destroy,
1104 : sizeof(struct raid5f_io_channel), NULL);
1105 :
1106 528 : return 0;
1107 528 : }
1108 :
1109 : static void
1110 528 : raid5f_io_device_unregister_done(void *io_device)
1111 : {
1112 528 : struct raid5f_info *r5f_info = io_device;
1113 :
1114 528 : raid_bdev_module_stop_done(r5f_info->raid_bdev);
1115 :
1116 528 : free(r5f_info);
1117 528 : }
1118 :
1119 : static bool
1120 528 : raid5f_stop(struct raid_bdev *raid_bdev)
1121 : {
1122 528 : struct raid5f_info *r5f_info = raid_bdev->module_private;
1123 :
1124 528 : spdk_io_device_unregister(r5f_info, raid5f_io_device_unregister_done);
1125 :
1126 528 : return false;
1127 528 : }
1128 :
1129 : static struct spdk_io_channel *
1130 462 : raid5f_get_io_channel(struct raid_bdev *raid_bdev)
1131 : {
1132 462 : struct raid5f_info *r5f_info = raid_bdev->module_private;
1133 :
1134 924 : return spdk_get_io_channel(r5f_info);
1135 462 : }
1136 :
1137 : static void
1138 0 : raid5f_process_write_completed(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
1139 : {
1140 0 : struct raid_bdev_process_request *process_req = cb_arg;
1141 :
1142 0 : spdk_bdev_free_io(bdev_io);
1143 :
1144 0 : raid_bdev_process_request_complete(process_req, success ? 0 : -EIO);
1145 0 : }
1146 :
1147 : static void raid5f_process_submit_write(struct raid_bdev_process_request *process_req);
1148 :
1149 : static void
1150 0 : _raid5f_process_submit_write(void *ctx)
1151 : {
1152 0 : struct raid_bdev_process_request *process_req = ctx;
1153 :
1154 0 : raid5f_process_submit_write(process_req);
1155 0 : }
1156 :
1157 : static void
1158 0 : raid5f_process_submit_write(struct raid_bdev_process_request *process_req)
1159 : {
1160 0 : struct raid_bdev_io *raid_io = &process_req->raid_io;
1161 0 : struct raid_bdev *raid_bdev = raid_io->raid_bdev;
1162 0 : struct raid5f_info *r5f_info = raid_bdev->module_private;
1163 0 : uint64_t stripe_index = process_req->offset_blocks / r5f_info->stripe_blocks;
1164 0 : struct spdk_bdev_ext_io_opts io_opts;
1165 0 : int ret;
1166 :
1167 0 : raid5f_init_ext_io_opts(&io_opts, raid_io);
1168 0 : ret = raid_bdev_writev_blocks_ext(process_req->target, process_req->target_ch,
1169 0 : raid_io->iovs, raid_io->iovcnt,
1170 0 : stripe_index << raid_bdev->strip_size_shift, raid_bdev->strip_size,
1171 0 : raid5f_process_write_completed, process_req, &io_opts);
1172 0 : if (spdk_unlikely(ret != 0)) {
1173 0 : if (ret == -ENOMEM) {
1174 0 : raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(process_req->target->desc),
1175 0 : process_req->target_ch, _raid5f_process_submit_write);
1176 0 : } else {
1177 0 : raid_bdev_process_request_complete(process_req, ret);
1178 : }
1179 0 : }
1180 0 : }
1181 :
1182 : static void
1183 0 : raid5f_process_stripe_request_reconstruct_xor_done(struct stripe_request *stripe_req, int status)
1184 : {
1185 0 : struct raid_bdev_io *raid_io = stripe_req->raid_io;
1186 0 : struct raid_bdev_process_request *process_req = SPDK_CONTAINEROF(raid_io,
1187 : struct raid_bdev_process_request, raid_io);
1188 :
1189 0 : raid5f_stripe_request_release(stripe_req);
1190 :
1191 0 : if (status != 0) {
1192 0 : raid_bdev_process_request_complete(process_req, status);
1193 0 : return;
1194 : }
1195 :
1196 0 : raid5f_process_submit_write(process_req);
1197 0 : }
1198 :
1199 : static void
1200 0 : raid5f_process_read_completed(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status)
1201 : {
1202 0 : struct raid_bdev_process_request *process_req = SPDK_CONTAINEROF(raid_io,
1203 : struct raid_bdev_process_request, raid_io);
1204 0 : struct stripe_request *stripe_req = raid_io->module_private;
1205 :
1206 0 : if (status != SPDK_BDEV_IO_STATUS_SUCCESS) {
1207 0 : raid5f_stripe_request_release(stripe_req);
1208 0 : raid_bdev_process_request_complete(process_req, -EIO);
1209 0 : return;
1210 : }
1211 :
1212 0 : raid5f_xor_stripe(stripe_req, raid5f_process_stripe_request_reconstruct_xor_done);
1213 0 : }
1214 :
1215 : static int
1216 0 : raid5f_submit_process_request(struct raid_bdev_process_request *process_req,
1217 : struct raid_bdev_io_channel *raid_ch)
1218 : {
1219 0 : struct spdk_io_channel *ch = spdk_io_channel_from_ctx(raid_ch);
1220 0 : struct raid_bdev *raid_bdev = spdk_io_channel_get_io_device(ch);
1221 0 : struct raid5f_info *r5f_info = raid_bdev->module_private;
1222 0 : struct raid_bdev_io *raid_io = &process_req->raid_io;
1223 0 : uint8_t chunk_idx = raid_bdev_base_bdev_slot(process_req->target);
1224 0 : uint64_t stripe_index = process_req->offset_blocks / r5f_info->stripe_blocks;
1225 0 : int ret;
1226 :
1227 0 : assert((process_req->offset_blocks % r5f_info->stripe_blocks) == 0);
1228 :
1229 0 : if (process_req->num_blocks < r5f_info->stripe_blocks) {
1230 0 : return 0;
1231 : }
1232 :
1233 0 : raid_bdev_io_init(raid_io, raid_ch, SPDK_BDEV_IO_TYPE_READ,
1234 0 : process_req->offset_blocks, raid_bdev->strip_size,
1235 0 : &process_req->iov, 1, process_req->md_buf, NULL, NULL);
1236 :
1237 0 : ret = raid5f_submit_reconstruct_read(raid_io, stripe_index, chunk_idx, 0,
1238 : raid5f_process_read_completed);
1239 0 : if (spdk_likely(ret == 0)) {
1240 0 : return r5f_info->stripe_blocks;
1241 0 : } else if (ret < 0) {
1242 0 : return ret;
1243 : } else {
1244 0 : return -EINVAL;
1245 : }
1246 0 : }
1247 :
1248 : static struct raid_bdev_module g_raid5f_module = {
1249 : .level = RAID5F,
1250 : .base_bdevs_min = 3,
1251 : .base_bdevs_constraint = {CONSTRAINT_MAX_BASE_BDEVS_REMOVED, 1},
1252 : .start = raid5f_start,
1253 : .stop = raid5f_stop,
1254 : .submit_rw_request = raid5f_submit_rw_request,
1255 : .get_io_channel = raid5f_get_io_channel,
1256 : .submit_process_request = raid5f_submit_process_request,
1257 : };
1258 1 : RAID_MODULE_REGISTER(&g_raid5f_module)
1259 :
1260 1 : SPDK_LOG_REGISTER_COMPONENT(bdev_raid5f)
|