Branch data Line data Source code
1 : : /* SPDX-License-Identifier: BSD-3-Clause
2 : : * Copyright (C) 2019 Intel Corporation.
3 : : * All rights reserved.
4 : : * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5 : : */
6 : :
7 : : #include "bdev_raid.h"
8 : :
9 : : #include "spdk/env.h"
10 : : #include "spdk/thread.h"
11 : : #include "spdk/string.h"
12 : : #include "spdk/util.h"
13 : :
14 : : #include "spdk/log.h"
15 : :
16 : : /*
17 : : * brief:
18 : : * raid0_bdev_io_completion function is called by lower layers to notify raid
19 : : * module that particular bdev_io is completed.
20 : : * params:
21 : : * bdev_io - pointer to bdev io submitted to lower layers, like child io
22 : : * success - bdev_io status
23 : : * cb_arg - function callback context (parent raid_bdev_io)
24 : : * returns:
25 : : * none
26 : : */
27 : : static void
28 : 8819611 : raid0_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
29 : : {
30 : 8819611 : struct raid_bdev_io *raid_io = cb_arg;
31 : :
32 : 8819611 : spdk_bdev_free_io(bdev_io);
33 : :
34 [ + + ]: 8819611 : if (success) {
35 : 8819605 : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS);
36 : : } else {
37 : 6 : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
38 : : }
39 : 8819611 : }
40 : :
41 : : static void raid0_submit_rw_request(struct raid_bdev_io *raid_io);
42 : :
43 : : static void
44 : 0 : _raid0_submit_rw_request(void *_raid_io)
45 : : {
46 : 0 : struct raid_bdev_io *raid_io = _raid_io;
47 : :
48 : 0 : raid0_submit_rw_request(raid_io);
49 : 0 : }
50 : :
51 : : /*
52 : : * brief:
53 : : * raid0_submit_rw_request function is used to submit I/O to the correct
54 : : * member disk for raid0 bdevs.
55 : : * params:
56 : : * raid_io
57 : : * returns:
58 : : * none
59 : : */
60 : : static void
61 : 8819611 : raid0_submit_rw_request(struct raid_bdev_io *raid_io)
62 : : {
63 : 8819611 : struct spdk_bdev_ext_io_opts io_opts = {};
64 : 8819611 : struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch;
65 : 8819611 : struct raid_bdev *raid_bdev = raid_io->raid_bdev;
66 : : uint64_t pd_strip;
67 : : uint32_t offset_in_strip;
68 : : uint64_t pd_lba;
69 : : uint64_t pd_blocks;
70 : : uint8_t pd_idx;
71 : 8819611 : int ret = 0;
72 : : uint64_t start_strip;
73 : : uint64_t end_strip;
74 : : struct raid_base_bdev_info *base_info;
75 : : struct spdk_io_channel *base_ch;
76 : :
77 [ - + ]: 8819611 : start_strip = raid_io->offset_blocks >> raid_bdev->strip_size_shift;
78 [ - + ]: 8819611 : end_strip = (raid_io->offset_blocks + raid_io->num_blocks - 1) >>
79 : 8819611 : raid_bdev->strip_size_shift;
80 [ - + - - ]: 8819611 : if (start_strip != end_strip && raid_bdev->num_base_bdevs > 1) {
81 : 0 : assert(false);
82 : : SPDK_ERRLOG("I/O spans strip boundary!\n");
83 : : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
84 : : return;
85 : : }
86 : :
87 [ - + ]: 8819611 : pd_strip = start_strip / raid_bdev->num_base_bdevs;
88 [ - + ]: 8819611 : pd_idx = start_strip % raid_bdev->num_base_bdevs;
89 : 8819611 : offset_in_strip = raid_io->offset_blocks & (raid_bdev->strip_size - 1);
90 [ - + ]: 8819611 : pd_lba = (pd_strip << raid_bdev->strip_size_shift) + offset_in_strip;
91 : 8819611 : pd_blocks = raid_io->num_blocks;
92 : 8819611 : base_info = &raid_bdev->base_bdev_info[pd_idx];
93 [ - + ]: 8819611 : if (base_info->desc == NULL) {
94 : 0 : SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx);
95 : 0 : assert(0);
96 : : }
97 : :
98 : : /*
99 : : * Submit child io to bdev layer with using base bdev descriptors, base
100 : : * bdev lba, base bdev child io length in blocks, buffer, completion
101 : : * function and function callback context
102 : : */
103 [ - + ]: 8819611 : assert(raid_ch != NULL);
104 : 8819611 : base_ch = raid_bdev_channel_get_base_channel(raid_ch, pd_idx);
105 : :
106 : 8819611 : io_opts.size = sizeof(io_opts);
107 : 8819611 : io_opts.memory_domain = raid_io->memory_domain;
108 : 8819611 : io_opts.memory_domain_ctx = raid_io->memory_domain_ctx;
109 : 8819611 : io_opts.metadata = raid_io->md_buf;
110 : :
111 [ + + ]: 8819611 : if (raid_io->type == SPDK_BDEV_IO_TYPE_READ) {
112 : 3594790 : ret = raid_bdev_readv_blocks_ext(base_info, base_ch,
113 : : raid_io->iovs, raid_io->iovcnt,
114 : : pd_lba, pd_blocks, raid0_bdev_io_completion,
115 : : raid_io, &io_opts);
116 [ + - ]: 5224821 : } else if (raid_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
117 : 5224821 : ret = raid_bdev_writev_blocks_ext(base_info, base_ch,
118 : : raid_io->iovs, raid_io->iovcnt,
119 : : pd_lba, pd_blocks, raid0_bdev_io_completion,
120 : : raid_io, &io_opts);
121 : : } else {
122 : 0 : SPDK_ERRLOG("Recvd not supported io type %u\n", raid_io->type);
123 : 0 : assert(0);
124 : : }
125 : :
126 [ - + ]: 8819611 : if (ret == -ENOMEM) {
127 : 0 : raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
128 : : base_ch, _raid0_submit_rw_request);
129 [ - + ]: 8819611 : } else if (ret != 0) {
130 : 0 : SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
131 : 0 : assert(false);
132 : : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
133 : : }
134 : : }
135 : :
136 : : /* raid0 IO range */
137 : : struct raid_bdev_io_range {
138 : : uint64_t strip_size;
139 : : uint64_t start_strip_in_disk;
140 : : uint64_t end_strip_in_disk;
141 : : uint64_t start_offset_in_strip;
142 : : uint64_t end_offset_in_strip;
143 : : uint8_t start_disk;
144 : : uint8_t end_disk;
145 : : uint8_t n_disks_involved;
146 : : };
147 : :
148 : : static inline void
149 : 777617 : _raid0_get_io_range(struct raid_bdev_io_range *io_range,
150 : : uint8_t num_base_bdevs, uint64_t strip_size, uint64_t strip_size_shift,
151 : : uint64_t offset_blocks, uint64_t num_blocks)
152 : : {
153 : : uint64_t start_strip;
154 : : uint64_t end_strip;
155 : : uint64_t total_blocks;
156 : :
157 : 777617 : io_range->strip_size = strip_size;
158 : 777617 : total_blocks = offset_blocks + num_blocks - (num_blocks > 0);
159 : :
160 : : /* The start and end strip index in raid0 bdev scope */
161 [ - + ]: 777617 : start_strip = offset_blocks >> strip_size_shift;
162 [ - + ]: 777617 : end_strip = total_blocks >> strip_size_shift;
163 [ - + ]: 777617 : io_range->start_strip_in_disk = start_strip / num_base_bdevs;
164 [ - + ]: 777617 : io_range->end_strip_in_disk = end_strip / num_base_bdevs;
165 : :
166 : : /* The first strip may have unaligned start LBA offset.
167 : : * The end strip may have unaligned end LBA offset.
168 : : * Strips between them certainly have aligned offset and length to boundaries.
169 : : */
170 [ - + ]: 777617 : io_range->start_offset_in_strip = offset_blocks % strip_size;
171 [ - + ]: 777617 : io_range->end_offset_in_strip = total_blocks % strip_size;
172 : :
173 : : /* The base bdev indexes in which start and end strips are located */
174 [ - + ]: 777617 : io_range->start_disk = start_strip % num_base_bdevs;
175 [ - + ]: 777617 : io_range->end_disk = end_strip % num_base_bdevs;
176 : :
177 : : /* Calculate how many base_bdevs are involved in io operation.
178 : : * Number of base bdevs involved is between 1 and num_base_bdevs.
179 : : * It will be 1 if the first strip and last strip are the same one.
180 : : */
181 : 777617 : io_range->n_disks_involved = spdk_min((end_strip - start_strip + 1), num_base_bdevs);
182 : 777617 : }
183 : :
184 : : static inline void
185 : 878132 : _raid0_split_io_range(struct raid_bdev_io_range *io_range, uint8_t disk_idx,
186 : : uint64_t *_offset_in_disk, uint64_t *_nblocks_in_disk)
187 : : {
188 : : uint64_t n_strips_in_disk;
189 : : uint64_t start_offset_in_disk;
190 : : uint64_t end_offset_in_disk;
191 : : uint64_t offset_in_disk;
192 : : uint64_t nblocks_in_disk;
193 : : uint64_t start_strip_in_disk;
194 : : uint64_t end_strip_in_disk;
195 : :
196 : 878132 : start_strip_in_disk = io_range->start_strip_in_disk;
197 [ + + ]: 878132 : if (disk_idx < io_range->start_disk) {
198 : 44282 : start_strip_in_disk += 1;
199 : : }
200 : :
201 : 878132 : end_strip_in_disk = io_range->end_strip_in_disk;
202 [ + + ]: 878132 : if (disk_idx > io_range->end_disk) {
203 : 20902 : end_strip_in_disk -= 1;
204 : : }
205 : :
206 [ - + ]: 878132 : assert(end_strip_in_disk >= start_strip_in_disk);
207 : 878132 : n_strips_in_disk = end_strip_in_disk - start_strip_in_disk + 1;
208 : :
209 [ + + ]: 878132 : if (disk_idx == io_range->start_disk) {
210 : 777617 : start_offset_in_disk = io_range->start_offset_in_strip;
211 : : } else {
212 : 100515 : start_offset_in_disk = 0;
213 : : }
214 : :
215 [ + + ]: 878132 : if (disk_idx == io_range->end_disk) {
216 : 777617 : end_offset_in_disk = io_range->end_offset_in_strip;
217 : : } else {
218 : 100515 : end_offset_in_disk = io_range->strip_size - 1;
219 : : }
220 : :
221 : 878132 : offset_in_disk = start_offset_in_disk + start_strip_in_disk * io_range->strip_size;
222 : 878132 : nblocks_in_disk = (n_strips_in_disk - 1) * io_range->strip_size
223 : 878132 : + end_offset_in_disk - start_offset_in_disk + 1;
224 : :
225 [ - + - + ]: 878132 : SPDK_DEBUGLOG(bdev_raid0,
226 : : "raid_bdev (strip_size 0x%" PRIx64 ") splits IO to base_bdev (%u) at (0x%" PRIx64 ", 0x%" PRIx64
227 : : ").\n",
228 : : io_range->strip_size, disk_idx, offset_in_disk, nblocks_in_disk);
229 : :
230 : 878132 : *_offset_in_disk = offset_in_disk;
231 : 878132 : *_nblocks_in_disk = nblocks_in_disk;
232 : 878132 : }
233 : :
234 : : static void raid0_submit_null_payload_request(struct raid_bdev_io *raid_io);
235 : :
236 : : static void
237 : 0 : _raid0_submit_null_payload_request(void *_raid_io)
238 : : {
239 : 0 : struct raid_bdev_io *raid_io = _raid_io;
240 : :
241 : 0 : raid0_submit_null_payload_request(raid_io);
242 : 0 : }
243 : :
244 : : static void
245 : 878132 : raid0_base_io_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
246 : : {
247 : 878132 : struct raid_bdev_io *raid_io = cb_arg;
248 : :
249 [ + - ]: 878132 : raid_bdev_io_complete_part(raid_io, 1, success ?
250 : : SPDK_BDEV_IO_STATUS_SUCCESS :
251 : : SPDK_BDEV_IO_STATUS_FAILED);
252 : :
253 : 878132 : spdk_bdev_free_io(bdev_io);
254 : 878132 : }
255 : :
256 : : /*
257 : : * brief:
258 : : * raid0_submit_null_payload_request function submits the next batch of
259 : : * io requests with range but without payload, like FLUSH and UNMAP, to member disks;
260 : : * it will submit as many as possible unless one base io request fails with -ENOMEM,
261 : : * in which case it will queue itself for later submission.
262 : : * params:
263 : : * bdev_io - pointer to parent bdev_io on raid bdev device
264 : : * returns:
265 : : * none
266 : : */
267 : : static void
268 : 777617 : raid0_submit_null_payload_request(struct raid_bdev_io *raid_io)
269 : : {
270 : : struct raid_bdev *raid_bdev;
271 : 648679 : struct raid_bdev_io_range io_range;
272 : : int ret;
273 : : struct raid_base_bdev_info *base_info;
274 : : struct spdk_io_channel *base_ch;
275 : :
276 : 777617 : raid_bdev = raid_io->raid_bdev;
277 : :
278 : 1148237 : _raid0_get_io_range(&io_range, raid_bdev->num_base_bdevs,
279 : 777617 : raid_bdev->strip_size, raid_bdev->strip_size_shift,
280 : : raid_io->offset_blocks, raid_io->num_blocks);
281 : :
282 [ + - ]: 777617 : if (raid_io->base_bdev_io_remaining == 0) {
283 : 777617 : raid_io->base_bdev_io_remaining = io_range.n_disks_involved;
284 : : }
285 : :
286 [ + + ]: 1655749 : while (raid_io->base_bdev_io_submitted < io_range.n_disks_involved) {
287 : : uint8_t disk_idx;
288 : 732420 : uint64_t offset_in_disk;
289 : 732420 : uint64_t nblocks_in_disk;
290 : :
291 : : /* base_bdev is started from start_disk to end_disk.
292 : : * It is possible that index of start_disk is larger than end_disk's.
293 : : */
294 [ - + ]: 878132 : disk_idx = (io_range.start_disk + raid_io->base_bdev_io_submitted) % raid_bdev->num_base_bdevs;
295 : 878132 : base_info = &raid_bdev->base_bdev_info[disk_idx];
296 : 878132 : base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch, disk_idx);
297 : :
298 : 878132 : _raid0_split_io_range(&io_range, disk_idx, &offset_in_disk, &nblocks_in_disk);
299 : :
300 [ + + - ]: 878132 : switch (raid_io->type) {
301 : 878070 : case SPDK_BDEV_IO_TYPE_UNMAP:
302 : 878070 : ret = raid_bdev_unmap_blocks(base_info, base_ch,
303 : : offset_in_disk, nblocks_in_disk,
304 : : raid0_base_io_complete, raid_io);
305 : 878070 : break;
306 : :
307 : 62 : case SPDK_BDEV_IO_TYPE_FLUSH:
308 : 62 : ret = raid_bdev_flush_blocks(base_info, base_ch,
309 : : offset_in_disk, nblocks_in_disk,
310 : : raid0_base_io_complete, raid_io);
311 : 62 : break;
312 : :
313 : 0 : default:
314 : 0 : SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", raid_io->type);
315 : 0 : assert(false);
316 : : ret = -EIO;
317 : : }
318 : :
319 [ + - ]: 878132 : if (ret == 0) {
320 : 878132 : raid_io->base_bdev_io_submitted++;
321 [ # # ]: 0 : } else if (ret == -ENOMEM) {
322 : 0 : raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
323 : : base_ch, _raid0_submit_null_payload_request);
324 : 0 : return;
325 : : } else {
326 : 0 : SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
327 : 0 : assert(false);
328 : : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
329 : : return;
330 : : }
331 : : }
332 : : }
333 : :
334 : : static int
335 : 250 : raid0_start(struct raid_bdev *raid_bdev)
336 : : {
337 : 250 : uint64_t min_blockcnt = UINT64_MAX;
338 : : uint64_t base_bdev_data_size;
339 : : struct raid_base_bdev_info *base_info;
340 : :
341 [ + + ]: 4230 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
342 : : /* Calculate minimum block count from all base bdevs */
343 : 3980 : min_blockcnt = spdk_min(min_blockcnt, base_info->data_size);
344 : : }
345 : :
346 [ - + - + ]: 250 : base_bdev_data_size = (min_blockcnt >> raid_bdev->strip_size_shift) << raid_bdev->strip_size_shift;
347 : :
348 [ + + ]: 4230 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
349 : 3980 : base_info->data_size = base_bdev_data_size;
350 : : }
351 : :
352 : : /*
353 : : * Take the minimum block count based approach where total block count
354 : : * of raid bdev is the number of base bdev times the minimum block count
355 : : * of any base bdev.
356 : : */
357 [ - + - + ]: 250 : SPDK_DEBUGLOG(bdev_raid0, "min blockcount %" PRIu64 ", numbasedev %u, strip size shift %u\n",
358 : : min_blockcnt, raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift);
359 : :
360 : 250 : raid_bdev->bdev.blockcnt = base_bdev_data_size * raid_bdev->num_base_bdevs;
361 : :
362 [ + - ]: 250 : if (raid_bdev->num_base_bdevs > 1) {
363 : 250 : raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size;
364 : 250 : raid_bdev->bdev.split_on_optimal_io_boundary = true;
365 : : } else {
366 : : /* Do not need to split reads/writes on single bdev RAID modules. */
367 : 0 : raid_bdev->bdev.optimal_io_boundary = 0;
368 : 0 : raid_bdev->bdev.split_on_optimal_io_boundary = false;
369 : : }
370 : :
371 : 250 : return 0;
372 : : }
373 : :
374 : : static void
375 : 10 : raid0_resize(struct raid_bdev *raid_bdev)
376 : : {
377 : : uint64_t blockcnt;
378 : : int rc;
379 : 10 : uint64_t min_blockcnt = UINT64_MAX;
380 : : struct raid_base_bdev_info *base_info;
381 : : uint64_t base_bdev_data_size;
382 : :
383 [ + + ]: 30 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
384 : 20 : struct spdk_bdev *base_bdev = spdk_bdev_desc_get_bdev(base_info->desc);
385 : :
386 : 20 : min_blockcnt = spdk_min(min_blockcnt, base_bdev->blockcnt - base_info->data_offset);
387 : : }
388 : :
389 [ - + - + ]: 10 : base_bdev_data_size = (min_blockcnt >> raid_bdev->strip_size_shift) << raid_bdev->strip_size_shift;
390 : 10 : blockcnt = base_bdev_data_size * raid_bdev->num_base_bdevs;
391 : :
392 [ + + ]: 10 : if (blockcnt == raid_bdev->bdev.blockcnt) {
393 : 5 : return;
394 : : }
395 : :
396 : 5 : rc = spdk_bdev_notify_blockcnt_change(&raid_bdev->bdev, blockcnt);
397 [ - + ]: 5 : if (rc != 0) {
398 : 0 : SPDK_ERRLOG("Failed to notify blockcount change\n");
399 : 0 : return;
400 : : }
401 : :
402 : 5 : SPDK_NOTICELOG("raid0 '%s': min blockcount was changed from %" PRIu64 " to %" PRIu64 "\n",
403 : : raid_bdev->bdev.name,
404 : : raid_bdev->bdev.blockcnt,
405 : : blockcnt);
406 : :
407 [ + + ]: 15 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
408 : 10 : base_info->data_size = base_bdev_data_size;
409 : : }
410 : : }
411 : :
412 : : static struct raid_bdev_module g_raid0_module = {
413 : : .level = RAID0,
414 : : .base_bdevs_min = 1,
415 : : .memory_domains_supported = true,
416 : : .start = raid0_start,
417 : : .submit_rw_request = raid0_submit_rw_request,
418 : : .submit_null_payload_request = raid0_submit_null_payload_request,
419 : : .resize = raid0_resize,
420 : : };
421 : 2065 : RAID_MODULE_REGISTER(&g_raid0_module)
422 : :
423 : 2065 : SPDK_LOG_REGISTER_COMPONENT(bdev_raid0)
|