Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2022 Intel Corporation.
3 : * Copyright (c) Peng Yu yupeng0921@gmail.com.
4 : * All rights reserved.
5 : */
6 :
7 : #include "bdev_raid.h"
8 :
9 : #include "spdk/env.h"
10 : #include "spdk/thread.h"
11 : #include "spdk/string.h"
12 : #include "spdk/util.h"
13 :
14 : #include "spdk/log.h"
15 :
16 : struct concat_block_range {
17 : uint64_t start;
18 : uint64_t length;
19 : };
20 :
21 : /*
22 : * brief:
23 : * concat_bdev_io_completion function is called by lower layers to notify raid
24 : * module that particular bdev_io is completed.
25 : * params:
26 : * bdev_io - pointer to bdev io submitted to lower layers, like child io
27 : * success - bdev_io status
28 : * cb_arg - function callback context (parent raid_bdev_io)
29 : * returns:
30 : * none
31 : */
32 : static void
33 264 : concat_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
34 : {
35 264 : struct raid_bdev_io *raid_io = cb_arg;
36 :
37 264 : spdk_bdev_free_io(bdev_io);
38 :
39 264 : if (success) {
40 264 : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS);
41 264 : } else {
42 0 : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
43 : }
44 264 : }
45 :
46 : static void concat_submit_rw_request(struct raid_bdev_io *raid_io);
47 :
48 : static void
49 264 : _concat_submit_rw_request(void *_raid_io)
50 : {
51 264 : struct raid_bdev_io *raid_io = _raid_io;
52 :
53 264 : concat_submit_rw_request(raid_io);
54 264 : }
55 :
56 : /*
57 : * brief:
58 : * concat_submit_rw_request function is used to submit I/O to the correct
59 : * member disk for concat bdevs.
60 : * params:
61 : * raid_io
62 : * returns:
63 : * none
64 : */
65 : static void
66 528 : concat_submit_rw_request(struct raid_bdev_io *raid_io)
67 : {
68 528 : struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch;
69 528 : struct raid_bdev *raid_bdev = raid_io->raid_bdev;
70 528 : struct concat_block_range *block_range = raid_bdev->module_private;
71 528 : uint64_t pd_lba;
72 528 : uint64_t pd_blocks;
73 528 : int pd_idx;
74 528 : int ret = 0;
75 528 : struct raid_base_bdev_info *base_info;
76 528 : struct spdk_io_channel *base_ch;
77 528 : struct spdk_bdev_ext_io_opts io_opts = {};
78 528 : int i;
79 :
80 528 : pd_idx = -1;
81 1892 : for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
82 1760 : if (block_range[i].start > raid_io->offset_blocks) {
83 396 : break;
84 : }
85 1364 : pd_idx = i;
86 1364 : }
87 528 : assert(pd_idx >= 0);
88 528 : assert(raid_io->offset_blocks >= block_range[pd_idx].start);
89 528 : pd_lba = raid_io->offset_blocks - block_range[pd_idx].start;
90 528 : pd_blocks = raid_io->num_blocks;
91 528 : base_info = &raid_bdev->base_bdev_info[pd_idx];
92 528 : if (base_info->desc == NULL) {
93 0 : SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx);
94 0 : assert(0);
95 : }
96 :
97 : /*
98 : * Submit child io to bdev layer with using base bdev descriptors, base
99 : * bdev lba, base bdev child io length in blocks, buffer, completion
100 : * function and function callback context
101 : */
102 528 : assert(raid_ch != NULL);
103 528 : base_ch = raid_bdev_channel_get_base_channel(raid_ch, pd_idx);
104 :
105 528 : io_opts.size = sizeof(io_opts);
106 528 : io_opts.memory_domain = raid_io->memory_domain;
107 528 : io_opts.memory_domain_ctx = raid_io->memory_domain_ctx;
108 528 : io_opts.metadata = raid_io->md_buf;
109 :
110 528 : if (raid_io->type == SPDK_BDEV_IO_TYPE_READ) {
111 528 : ret = raid_bdev_readv_blocks_ext(base_info, base_ch,
112 264 : raid_io->iovs, raid_io->iovcnt,
113 264 : pd_lba, pd_blocks, concat_bdev_io_completion,
114 264 : raid_io, &io_opts);
115 528 : } else if (raid_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
116 528 : ret = raid_bdev_writev_blocks_ext(base_info, base_ch,
117 264 : raid_io->iovs, raid_io->iovcnt,
118 264 : pd_lba, pd_blocks, concat_bdev_io_completion,
119 264 : raid_io, &io_opts);
120 264 : } else {
121 0 : SPDK_ERRLOG("Recvd not supported io type %u\n", raid_io->type);
122 0 : assert(0);
123 : }
124 :
125 528 : if (ret == -ENOMEM) {
126 264 : raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
127 264 : base_ch, _concat_submit_rw_request);
128 528 : } else if (ret != 0) {
129 0 : SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
130 0 : assert(false);
131 : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
132 : }
133 528 : }
134 :
135 : static void concat_submit_null_payload_request(struct raid_bdev_io *raid_io);
136 :
137 : static void
138 66 : _concat_submit_null_payload_request(void *_raid_io)
139 : {
140 66 : struct raid_bdev_io *raid_io = _raid_io;
141 :
142 66 : concat_submit_null_payload_request(raid_io);
143 66 : }
144 :
145 : static void
146 130 : concat_base_io_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
147 : {
148 130 : struct raid_bdev_io *raid_io = cb_arg;
149 :
150 130 : raid_bdev_io_complete_part(raid_io, 1, success ?
151 : SPDK_BDEV_IO_STATUS_SUCCESS :
152 : SPDK_BDEV_IO_STATUS_FAILED);
153 :
154 130 : spdk_bdev_free_io(bdev_io);
155 130 : }
156 :
157 : /*
158 : * brief:
159 : * concat_submit_null_payload_request function submits the next batch of
160 : * io requests with range but without payload, like FLUSH and UNMAP, to member disks;
161 : * it will submit as many as possible unless one base io request fails with -ENOMEM,
162 : * in which case it will queue itself for later submission.
163 : * params:
164 : * bdev_io - pointer to parent bdev_io on raid bdev device
165 : * returns:
166 : * none
167 : */
168 : static void
169 132 : concat_submit_null_payload_request(struct raid_bdev_io *raid_io)
170 : {
171 132 : struct raid_bdev *raid_bdev;
172 132 : int ret;
173 132 : struct raid_base_bdev_info *base_info;
174 132 : struct spdk_io_channel *base_ch;
175 132 : uint64_t pd_lba;
176 132 : uint64_t pd_blocks;
177 132 : uint64_t offset_blocks;
178 132 : uint64_t num_blocks;
179 132 : struct concat_block_range *block_range;
180 132 : int i, start_idx, stop_idx;
181 :
182 132 : raid_bdev = raid_io->raid_bdev;
183 132 : block_range = raid_bdev->module_private;
184 :
185 132 : offset_blocks = raid_io->offset_blocks;
186 132 : num_blocks = raid_io->num_blocks;
187 132 : start_idx = -1;
188 132 : stop_idx = -1;
189 : /*
190 : * Go through all base bdevs, find the first bdev and the last bdev
191 : */
192 404 : for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
193 : /* skip the bdevs before the offset_blocks */
194 404 : if (offset_blocks >= block_range[i].start + block_range[i].length) {
195 144 : continue;
196 : }
197 260 : if (start_idx == -1) {
198 132 : start_idx = i;
199 132 : } else {
200 : /*
201 : * The offset_blocks might be at the middle of the first bdev.
202 : * Besides the first bdev, the offset_blocks should be always
203 : * at the start of the bdev.
204 : */
205 128 : assert(offset_blocks == block_range[i].start);
206 : }
207 260 : pd_lba = offset_blocks - block_range[i].start;
208 260 : pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba);
209 260 : offset_blocks += pd_blocks;
210 260 : num_blocks -= pd_blocks;
211 260 : if (num_blocks == 0) {
212 132 : stop_idx = i;
213 132 : break;
214 : }
215 128 : }
216 132 : assert(start_idx >= 0);
217 132 : assert(stop_idx >= 0);
218 :
219 132 : if (raid_io->base_bdev_io_remaining == 0) {
220 66 : raid_io->base_bdev_io_remaining = stop_idx - start_idx + 1;
221 66 : }
222 132 : offset_blocks = raid_io->offset_blocks;
223 132 : num_blocks = raid_io->num_blocks;
224 262 : for (i = start_idx; i <= stop_idx; i++) {
225 196 : assert(offset_blocks >= block_range[i].start);
226 196 : assert(offset_blocks < block_range[i].start + block_range[i].length);
227 196 : pd_lba = offset_blocks - block_range[i].start;
228 196 : pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba);
229 196 : offset_blocks += pd_blocks;
230 196 : num_blocks -= pd_blocks;
231 : /*
232 : * Skip the IOs we have submitted
233 : */
234 196 : if (i < start_idx + raid_io->base_bdev_io_submitted) {
235 0 : continue;
236 : }
237 196 : base_info = &raid_bdev->base_bdev_info[i];
238 196 : base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch, i);
239 196 : switch (raid_io->type) {
240 : case SPDK_BDEV_IO_TYPE_UNMAP:
241 196 : ret = raid_bdev_unmap_blocks(base_info, base_ch,
242 98 : pd_lba, pd_blocks,
243 98 : concat_base_io_complete, raid_io);
244 98 : break;
245 : case SPDK_BDEV_IO_TYPE_FLUSH:
246 196 : ret = raid_bdev_flush_blocks(base_info, base_ch,
247 98 : pd_lba, pd_blocks,
248 98 : concat_base_io_complete, raid_io);
249 98 : break;
250 : default:
251 0 : SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", raid_io->type);
252 0 : assert(false);
253 : ret = -EIO;
254 : }
255 196 : if (ret == 0) {
256 130 : raid_io->base_bdev_io_submitted++;
257 196 : } else if (ret == -ENOMEM) {
258 66 : raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
259 66 : base_ch, _concat_submit_null_payload_request);
260 66 : return;
261 : } else {
262 0 : SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
263 0 : assert(false);
264 : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
265 : return;
266 : }
267 130 : }
268 132 : }
269 :
270 : static int
271 363 : concat_start(struct raid_bdev *raid_bdev)
272 : {
273 363 : uint64_t total_blockcnt = 0;
274 363 : struct raid_base_bdev_info *base_info;
275 363 : struct concat_block_range *block_range;
276 :
277 363 : block_range = calloc(raid_bdev->num_base_bdevs, sizeof(struct concat_block_range));
278 363 : if (!block_range) {
279 0 : SPDK_ERRLOG("Can not allocate block_range, num_base_bdevs: %u",
280 : raid_bdev->num_base_bdevs);
281 0 : return -ENOMEM;
282 : }
283 :
284 363 : int idx = 0;
285 1859 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
286 1496 : uint64_t strip_cnt = base_info->data_size >> raid_bdev->strip_size_shift;
287 1496 : uint64_t pd_block_cnt = strip_cnt << raid_bdev->strip_size_shift;
288 :
289 1496 : base_info->data_size = pd_block_cnt;
290 :
291 1496 : block_range[idx].start = total_blockcnt;
292 1496 : block_range[idx].length = pd_block_cnt;
293 1496 : total_blockcnt += pd_block_cnt;
294 1496 : idx++;
295 1496 : }
296 :
297 363 : raid_bdev->module_private = block_range;
298 :
299 363 : SPDK_DEBUGLOG(bdev_concat, "total blockcount %" PRIu64 ", numbasedev %u, strip size shift %u\n",
300 : total_blockcnt, raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift);
301 363 : raid_bdev->bdev.blockcnt = total_blockcnt;
302 :
303 363 : raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size;
304 363 : raid_bdev->bdev.split_on_optimal_io_boundary = true;
305 :
306 363 : return 0;
307 363 : }
308 :
309 : static bool
310 363 : concat_stop(struct raid_bdev *raid_bdev)
311 : {
312 363 : struct concat_block_range *block_range = raid_bdev->module_private;
313 :
314 363 : free(block_range);
315 :
316 363 : return true;
317 363 : }
318 :
319 : static struct raid_bdev_module g_concat_module = {
320 : .level = CONCAT,
321 : .base_bdevs_min = 1,
322 : .memory_domains_supported = true,
323 : .start = concat_start,
324 : .stop = concat_stop,
325 : .submit_rw_request = concat_submit_rw_request,
326 : .submit_null_payload_request = concat_submit_null_payload_request,
327 : };
328 1 : RAID_MODULE_REGISTER(&g_concat_module)
329 :
330 1 : SPDK_LOG_REGISTER_COMPONENT(bdev_concat)
|