Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2022 Intel Corporation.
3 : * Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
4 : * All rights reserved.
5 : */
6 :
7 : #include "accel_dsa.h"
8 :
9 : #include "spdk/stdinc.h"
10 :
11 : #include "spdk/accel_module.h"
12 : #include "spdk/log.h"
13 : #include "spdk_internal/idxd.h"
14 :
15 : #include "spdk/env.h"
16 : #include "spdk/event.h"
17 : #include "spdk/likely.h"
18 : #include "spdk/thread.h"
19 : #include "spdk/idxd.h"
20 : #include "spdk/util.h"
21 : #include "spdk/json.h"
22 : #include "spdk/trace.h"
23 : #include "spdk_internal/trace_defs.h"
24 :
25 : static bool g_dsa_enable = false;
26 : static bool g_kernel_mode = false;
27 :
28 : enum channel_state {
29 : IDXD_CHANNEL_ACTIVE,
30 : IDXD_CHANNEL_ERROR,
31 : };
32 :
33 : static bool g_dsa_initialized = false;
34 :
35 : struct idxd_device {
36 : struct spdk_idxd_device *dsa;
37 : TAILQ_ENTRY(idxd_device) tailq;
38 : };
39 : static TAILQ_HEAD(, idxd_device) g_dsa_devices = TAILQ_HEAD_INITIALIZER(g_dsa_devices);
40 : static struct idxd_device *g_next_dev = NULL;
41 : static uint32_t g_num_devices = 0;
42 : static pthread_mutex_t g_dev_lock = PTHREAD_MUTEX_INITIALIZER;
43 :
44 : struct idxd_task {
45 : struct spdk_accel_task task;
46 : struct idxd_io_channel *chan;
47 : };
48 :
49 : struct idxd_io_channel {
50 : struct spdk_idxd_io_channel *chan;
51 : struct idxd_device *dev;
52 : enum channel_state state;
53 : struct spdk_poller *poller;
54 : uint32_t num_outstanding;
55 : STAILQ_HEAD(, spdk_accel_task) queued_tasks;
56 : };
57 :
58 : static struct spdk_io_channel *dsa_get_io_channel(void);
59 :
60 : static struct idxd_device *
61 0 : idxd_select_device(struct idxd_io_channel *chan)
62 : {
63 0 : uint32_t count = 0;
64 : struct idxd_device *dev;
65 0 : uint32_t socket_id = spdk_env_get_socket_id(spdk_env_get_current_core());
66 :
67 : /*
68 : * We allow channels to share underlying devices,
69 : * selection is round-robin based with a limitation
70 : * on how many channel can share one device.
71 : */
72 : do {
73 : /* select next device */
74 0 : pthread_mutex_lock(&g_dev_lock);
75 0 : g_next_dev = TAILQ_NEXT(g_next_dev, tailq);
76 0 : if (g_next_dev == NULL) {
77 0 : g_next_dev = TAILQ_FIRST(&g_dsa_devices);
78 : }
79 0 : dev = g_next_dev;
80 0 : pthread_mutex_unlock(&g_dev_lock);
81 :
82 0 : if (socket_id != spdk_idxd_get_socket(dev->dsa)) {
83 0 : continue;
84 : }
85 :
86 : /*
87 : * Now see if a channel is available on this one. We only
88 : * allow a specific number of channels to share a device
89 : * to limit outstanding IO for flow control purposes.
90 : */
91 0 : chan->chan = spdk_idxd_get_channel(dev->dsa);
92 0 : if (chan->chan != NULL) {
93 0 : SPDK_DEBUGLOG(accel_dsa, "On socket %d using device on socket %d\n",
94 : socket_id, spdk_idxd_get_socket(dev->dsa));
95 0 : return dev;
96 : }
97 0 : } while (++count < g_num_devices);
98 :
99 : /* We are out of available channels and/or devices for the local socket. We fix the number
100 : * of channels that we allocate per device and only allocate devices on the same socket
101 : * that the current thread is on. If on a 2 socket system it may be possible to avoid
102 : * this situation by spreading threads across the sockets.
103 : */
104 0 : SPDK_ERRLOG("No more DSA devices available on the local socket.\n");
105 0 : return NULL;
106 : }
107 :
108 : static void
109 0 : dsa_done(void *cb_arg, int status)
110 : {
111 0 : struct idxd_task *idxd_task = cb_arg;
112 : struct idxd_io_channel *chan;
113 : int rc;
114 :
115 0 : chan = idxd_task->chan;
116 :
117 : /* If the DSA DIF Check operation detects an error, detailed info about
118 : * this error (like actual/expected values) needs to be obtained by
119 : * calling the software DIF Verify operation.
120 : */
121 0 : if (spdk_unlikely(status == -EIO)) {
122 0 : if (idxd_task->task.op_code == SPDK_ACCEL_OPC_DIF_VERIFY ||
123 0 : idxd_task->task.op_code == SPDK_ACCEL_OPC_DIF_VERIFY_COPY) {
124 0 : rc = spdk_dif_verify(idxd_task->task.s.iovs, idxd_task->task.s.iovcnt,
125 : idxd_task->task.dif.num_blocks,
126 : idxd_task->task.dif.ctx, idxd_task->task.dif.err);
127 0 : if (rc != 0) {
128 0 : SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
129 : idxd_task->task.dif.err->err_type,
130 : idxd_task->task.dif.err->err_offset);
131 : }
132 : }
133 : }
134 :
135 0 : assert(chan->num_outstanding > 0);
136 0 : spdk_trace_record(TRACE_ACCEL_DSA_OP_COMPLETE, 0, 0, 0, chan->num_outstanding - 1);
137 0 : chan->num_outstanding--;
138 :
139 0 : spdk_accel_task_complete(&idxd_task->task, status);
140 0 : }
141 :
142 : static int
143 0 : idxd_submit_dualcast(struct idxd_io_channel *ch, struct idxd_task *idxd_task, int flags)
144 : {
145 0 : struct spdk_accel_task *task = &idxd_task->task;
146 :
147 0 : if (spdk_unlikely(task->d.iovcnt != 1 || task->d2.iovcnt != 1 || task->s.iovcnt != 1)) {
148 0 : return -EINVAL;
149 : }
150 :
151 0 : if (spdk_unlikely(task->d.iovs[0].iov_len != task->s.iovs[0].iov_len ||
152 : task->d.iovs[0].iov_len != task->d2.iovs[0].iov_len)) {
153 0 : return -EINVAL;
154 : }
155 :
156 0 : return spdk_idxd_submit_dualcast(ch->chan, task->d.iovs[0].iov_base,
157 0 : task->d2.iovs[0].iov_base, task->s.iovs[0].iov_base,
158 0 : task->d.iovs[0].iov_len, flags, dsa_done, idxd_task);
159 : }
160 :
161 : static int
162 0 : check_dsa_dif_strip_overlap_bufs(struct spdk_accel_task *task)
163 : {
164 : uint64_t src_seg_addr_end_ext;
165 : uint64_t dst_seg_addr_end_ext;
166 : size_t i;
167 :
168 : /* The number of source and destination iovecs must be the same.
169 : * If so, one of them can be used to iterate over both vectors
170 : * later in the loop. */
171 0 : if (task->d.iovcnt != task->s.iovcnt) {
172 0 : SPDK_ERRLOG("Mismatched iovcnts: src=%d, dst=%d\n",
173 : task->s.iovcnt, task->d.iovcnt);
174 0 : return -EINVAL;
175 : }
176 :
177 0 : for (i = 0; i < task->s.iovcnt; i++) {
178 0 : src_seg_addr_end_ext = (uint64_t)task->s.iovs[i].iov_base +
179 0 : task->s.iovs[i].iov_len;
180 :
181 0 : dst_seg_addr_end_ext = (uint64_t)task->d.iovs[i].iov_base +
182 0 : task->s.iovs[i].iov_len;
183 :
184 0 : if ((dst_seg_addr_end_ext >= (uint64_t)task->s.iovs[i].iov_base) &&
185 : (dst_seg_addr_end_ext <= src_seg_addr_end_ext)) {
186 0 : return -EFAULT;
187 : }
188 : }
189 :
190 0 : return 0;
191 : }
192 :
193 : static void
194 0 : spdk_accel_sw_task_complete(void *ctx)
195 : {
196 0 : struct spdk_accel_task *task = (struct spdk_accel_task *)ctx;
197 :
198 0 : spdk_accel_task_complete(task, task->status);
199 0 : }
200 :
201 : static int
202 0 : _process_single_task(struct spdk_io_channel *ch, struct spdk_accel_task *task)
203 : {
204 0 : struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
205 : struct idxd_task *idxd_task;
206 0 : int rc = 0, flags = 0;
207 :
208 0 : idxd_task = SPDK_CONTAINEROF(task, struct idxd_task, task);
209 0 : idxd_task->chan = chan;
210 :
211 0 : switch (task->op_code) {
212 0 : case SPDK_ACCEL_OPC_COPY:
213 0 : rc = spdk_idxd_submit_copy(chan->chan, task->d.iovs, task->d.iovcnt,
214 : task->s.iovs, task->s.iovcnt, flags, dsa_done, idxd_task);
215 0 : break;
216 0 : case SPDK_ACCEL_OPC_DUALCAST:
217 0 : rc = idxd_submit_dualcast(chan, idxd_task, flags);
218 0 : break;
219 0 : case SPDK_ACCEL_OPC_COMPARE:
220 0 : rc = spdk_idxd_submit_compare(chan->chan, task->s.iovs, task->s.iovcnt,
221 0 : task->s2.iovs, task->s2.iovcnt, flags,
222 : dsa_done, idxd_task);
223 0 : break;
224 0 : case SPDK_ACCEL_OPC_FILL:
225 0 : rc = spdk_idxd_submit_fill(chan->chan, task->d.iovs, task->d.iovcnt,
226 : task->fill_pattern, flags, dsa_done, idxd_task);
227 0 : break;
228 0 : case SPDK_ACCEL_OPC_CRC32C:
229 0 : rc = spdk_idxd_submit_crc32c(chan->chan, task->s.iovs, task->s.iovcnt, task->seed,
230 : task->crc_dst, flags, dsa_done, idxd_task);
231 0 : break;
232 0 : case SPDK_ACCEL_OPC_COPY_CRC32C:
233 0 : rc = spdk_idxd_submit_copy_crc32c(chan->chan, task->d.iovs, task->d.iovcnt,
234 0 : task->s.iovs, task->s.iovcnt,
235 : task->seed, task->crc_dst, flags,
236 : dsa_done, idxd_task);
237 0 : break;
238 0 : case SPDK_ACCEL_OPC_DIF_VERIFY:
239 0 : rc = spdk_idxd_submit_dif_check(chan->chan,
240 0 : task->s.iovs, task->s.iovcnt,
241 : task->dif.num_blocks, task->dif.ctx, flags,
242 : dsa_done, idxd_task);
243 0 : break;
244 0 : case SPDK_ACCEL_OPC_DIF_GENERATE_COPY:
245 0 : rc = spdk_idxd_submit_dif_insert(chan->chan,
246 0 : task->d.iovs, task->d.iovcnt,
247 0 : task->s.iovs, task->s.iovcnt,
248 : task->dif.num_blocks, task->dif.ctx, flags,
249 : dsa_done, idxd_task);
250 0 : break;
251 0 : case SPDK_ACCEL_OPC_DIF_VERIFY_COPY:
252 : /* For DIF strip operations, DSA may incorrectly report an overlapping buffer
253 : * error if the destination buffer immediately precedes the source buffer.
254 : * This is because DSA uses the transfer size in the descriptor for both
255 : * the source and destination buffers when checking for buffer overlap.
256 : * Since the transfer size applies to the source buffer, which is larger
257 : * than the destination buffer by metadata, it should not be used as
258 : * the destination buffer size. To avoid reporting errors by DSA, the software
259 : * checks whether such an error condition can occur, and if so the software
260 : * fallback is performed. */
261 0 : rc = check_dsa_dif_strip_overlap_bufs(task);
262 0 : if (rc == 0) {
263 0 : rc = spdk_idxd_submit_dif_strip(chan->chan,
264 0 : task->d.iovs, task->d.iovcnt,
265 0 : task->s.iovs, task->s.iovcnt,
266 : task->dif.num_blocks, task->dif.ctx, flags,
267 : dsa_done, idxd_task);
268 0 : } else if (rc == -EFAULT) {
269 0 : rc = spdk_dif_verify_copy(task->d.iovs,
270 0 : task->d.iovcnt,
271 : task->s.iovs,
272 0 : task->s.iovcnt,
273 : task->dif.num_blocks,
274 : task->dif.ctx,
275 : task->dif.err);
276 0 : idxd_task->task.status = rc;
277 0 : spdk_thread_send_msg(spdk_get_thread(), spdk_accel_sw_task_complete, (void *)&idxd_task->task);
278 0 : rc = 0;
279 : }
280 0 : break;
281 0 : default:
282 0 : assert(false);
283 : rc = -EINVAL;
284 : break;
285 : }
286 :
287 0 : if (rc == 0) {
288 0 : chan->num_outstanding++;
289 0 : spdk_trace_record(TRACE_ACCEL_DSA_OP_SUBMIT, 0, 0, 0, chan->num_outstanding);
290 : }
291 :
292 0 : return rc;
293 : }
294 :
295 : static int
296 0 : dsa_submit_task(struct spdk_io_channel *ch, struct spdk_accel_task *task)
297 : {
298 0 : struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
299 0 : int rc = 0;
300 :
301 0 : assert(STAILQ_NEXT(task, link) == NULL);
302 :
303 0 : if (spdk_unlikely(chan->state == IDXD_CHANNEL_ERROR)) {
304 0 : spdk_accel_task_complete(task, -EINVAL);
305 0 : return 0;
306 : }
307 :
308 0 : if (!STAILQ_EMPTY(&chan->queued_tasks)) {
309 0 : STAILQ_INSERT_TAIL(&chan->queued_tasks, task, link);
310 0 : return 0;
311 : }
312 :
313 0 : rc = _process_single_task(ch, task);
314 0 : if (rc == -EBUSY) {
315 0 : STAILQ_INSERT_TAIL(&chan->queued_tasks, task, link);
316 0 : } else if (rc) {
317 0 : spdk_accel_task_complete(task, rc);
318 : }
319 :
320 0 : return 0;
321 : }
322 :
323 : static int
324 0 : dsa_submit_queued_tasks(struct idxd_io_channel *chan)
325 : {
326 : struct spdk_accel_task *task, *tmp;
327 0 : struct spdk_io_channel *ch = spdk_io_channel_from_ctx(chan);
328 0 : int rc = 0;
329 :
330 0 : if (spdk_unlikely(chan->state == IDXD_CHANNEL_ERROR)) {
331 : /* Complete queued tasks with error and clear the list */
332 0 : while ((task = STAILQ_FIRST(&chan->queued_tasks))) {
333 0 : STAILQ_REMOVE_HEAD(&chan->queued_tasks, link);
334 0 : spdk_accel_task_complete(task, -EINVAL);
335 : }
336 0 : return 0;
337 : }
338 :
339 0 : STAILQ_FOREACH_SAFE(task, &chan->queued_tasks, link, tmp) {
340 0 : rc = _process_single_task(ch, task);
341 0 : if (rc == -EBUSY) {
342 0 : return rc;
343 : }
344 0 : STAILQ_REMOVE_HEAD(&chan->queued_tasks, link);
345 0 : if (rc) {
346 0 : spdk_accel_task_complete(task, rc);
347 : }
348 : }
349 :
350 0 : return 0;
351 : }
352 :
353 : static int
354 0 : idxd_poll(void *arg)
355 : {
356 0 : struct idxd_io_channel *chan = arg;
357 : int count;
358 :
359 0 : count = spdk_idxd_process_events(chan->chan);
360 :
361 : /* Check if there are any pending ops to process if the channel is active */
362 0 : if (!STAILQ_EMPTY(&chan->queued_tasks)) {
363 0 : dsa_submit_queued_tasks(chan);
364 : }
365 :
366 0 : return count > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
367 : }
368 :
369 : static size_t
370 0 : accel_dsa_get_ctx_size(void)
371 : {
372 0 : return sizeof(struct idxd_task);
373 : }
374 :
375 : static bool
376 0 : dsa_supports_opcode(enum spdk_accel_opcode opc)
377 : {
378 0 : if (!g_dsa_initialized) {
379 0 : assert(0);
380 : return false;
381 : }
382 :
383 0 : switch (opc) {
384 0 : case SPDK_ACCEL_OPC_COPY:
385 : case SPDK_ACCEL_OPC_FILL:
386 : case SPDK_ACCEL_OPC_DUALCAST:
387 : case SPDK_ACCEL_OPC_COMPARE:
388 : case SPDK_ACCEL_OPC_CRC32C:
389 : case SPDK_ACCEL_OPC_COPY_CRC32C:
390 0 : return true;
391 0 : case SPDK_ACCEL_OPC_DIF_VERIFY:
392 : case SPDK_ACCEL_OPC_DIF_GENERATE_COPY:
393 : case SPDK_ACCEL_OPC_DIF_VERIFY_COPY:
394 : /* Supported only if the IOMMU is enabled */
395 0 : return spdk_iommu_is_enabled();
396 0 : default:
397 0 : return false;
398 : }
399 : }
400 :
401 : static int accel_dsa_init(void);
402 : static void accel_dsa_exit(void *ctx);
403 : static void accel_dsa_write_config_json(struct spdk_json_write_ctx *w);
404 :
405 : static struct spdk_accel_module_if g_dsa_module = {
406 : .module_init = accel_dsa_init,
407 : .module_fini = accel_dsa_exit,
408 : .write_config_json = accel_dsa_write_config_json,
409 : .get_ctx_size = accel_dsa_get_ctx_size,
410 : .name = "dsa",
411 : .supports_opcode = dsa_supports_opcode,
412 : .get_io_channel = dsa_get_io_channel,
413 : .submit_tasks = dsa_submit_task
414 : };
415 :
416 : static int
417 0 : dsa_create_cb(void *io_device, void *ctx_buf)
418 : {
419 0 : struct idxd_io_channel *chan = ctx_buf;
420 : struct idxd_device *dsa;
421 :
422 0 : dsa = idxd_select_device(chan);
423 0 : if (dsa == NULL) {
424 0 : SPDK_ERRLOG("Failed to get an idxd channel\n");
425 0 : return -EINVAL;
426 : }
427 :
428 0 : chan->dev = dsa;
429 0 : chan->poller = SPDK_POLLER_REGISTER(idxd_poll, chan, 0);
430 0 : STAILQ_INIT(&chan->queued_tasks);
431 0 : chan->num_outstanding = 0;
432 0 : chan->state = IDXD_CHANNEL_ACTIVE;
433 :
434 0 : return 0;
435 : }
436 :
437 : static void
438 0 : dsa_destroy_cb(void *io_device, void *ctx_buf)
439 : {
440 0 : struct idxd_io_channel *chan = ctx_buf;
441 :
442 0 : spdk_poller_unregister(&chan->poller);
443 0 : spdk_idxd_put_channel(chan->chan);
444 0 : }
445 :
446 : static struct spdk_io_channel *
447 0 : dsa_get_io_channel(void)
448 : {
449 0 : return spdk_get_io_channel(&g_dsa_module);
450 : }
451 :
452 : static void
453 0 : attach_cb(void *cb_ctx, struct spdk_idxd_device *idxd)
454 : {
455 : struct idxd_device *dev;
456 :
457 0 : dev = calloc(1, sizeof(*dev));
458 0 : if (dev == NULL) {
459 0 : SPDK_ERRLOG("Failed to allocate device struct\n");
460 0 : return;
461 : }
462 :
463 0 : dev->dsa = idxd;
464 0 : if (g_next_dev == NULL) {
465 0 : g_next_dev = dev;
466 : }
467 :
468 0 : TAILQ_INSERT_TAIL(&g_dsa_devices, dev, tailq);
469 0 : g_num_devices++;
470 : }
471 :
472 : int
473 0 : accel_dsa_enable_probe(bool kernel_mode)
474 : {
475 : int rc;
476 :
477 0 : if (g_dsa_enable) {
478 0 : return -EALREADY;
479 : }
480 :
481 0 : rc = spdk_idxd_set_config(kernel_mode);
482 0 : if (rc != 0) {
483 0 : return rc;
484 : }
485 :
486 0 : spdk_accel_module_list_add(&g_dsa_module);
487 0 : g_kernel_mode = kernel_mode;
488 0 : g_dsa_enable = true;
489 :
490 0 : return 0;
491 : }
492 :
493 : static bool
494 0 : probe_cb(void *cb_ctx, struct spdk_pci_device *dev)
495 : {
496 0 : if (dev->id.device_id == PCI_DEVICE_ID_INTEL_DSA) {
497 0 : return true;
498 : }
499 :
500 0 : return false;
501 : }
502 :
503 : static int
504 0 : accel_dsa_init(void)
505 : {
506 0 : if (!g_dsa_enable) {
507 0 : return -EINVAL;
508 : }
509 :
510 0 : if (spdk_idxd_probe(NULL, attach_cb, probe_cb) != 0) {
511 0 : SPDK_ERRLOG("spdk_idxd_probe() failed\n");
512 0 : return -EINVAL;
513 : }
514 :
515 0 : if (TAILQ_EMPTY(&g_dsa_devices)) {
516 0 : return -ENODEV;
517 : }
518 :
519 0 : g_dsa_initialized = true;
520 0 : spdk_io_device_register(&g_dsa_module, dsa_create_cb, dsa_destroy_cb,
521 : sizeof(struct idxd_io_channel), "dsa_accel_module");
522 0 : return 0;
523 : }
524 :
525 : static void
526 0 : accel_dsa_exit(void *ctx)
527 : {
528 : struct idxd_device *dev;
529 :
530 0 : if (g_dsa_initialized) {
531 0 : spdk_io_device_unregister(&g_dsa_module, NULL);
532 0 : g_dsa_initialized = false;
533 : }
534 :
535 0 : while (!TAILQ_EMPTY(&g_dsa_devices)) {
536 0 : dev = TAILQ_FIRST(&g_dsa_devices);
537 0 : TAILQ_REMOVE(&g_dsa_devices, dev, tailq);
538 0 : spdk_idxd_detach(dev->dsa);
539 0 : free(dev);
540 : }
541 :
542 0 : spdk_accel_module_finish();
543 0 : }
544 :
545 : static void
546 0 : accel_dsa_write_config_json(struct spdk_json_write_ctx *w)
547 : {
548 0 : if (g_dsa_enable) {
549 0 : spdk_json_write_object_begin(w);
550 0 : spdk_json_write_named_string(w, "method", "dsa_scan_accel_module");
551 0 : spdk_json_write_named_object_begin(w, "params");
552 0 : spdk_json_write_named_bool(w, "config_kernel_mode", g_kernel_mode);
553 0 : spdk_json_write_object_end(w);
554 0 : spdk_json_write_object_end(w);
555 : }
556 0 : }
557 :
558 0 : SPDK_TRACE_REGISTER_FN(dsa_trace, "dsa", TRACE_GROUP_ACCEL_DSA)
559 : {
560 0 : spdk_trace_register_description("DSA_OP_SUBMIT", TRACE_ACCEL_DSA_OP_SUBMIT, OWNER_TYPE_NONE,
561 : OBJECT_NONE, 0,
562 : SPDK_TRACE_ARG_TYPE_INT, "count");
563 0 : spdk_trace_register_description("DSA_OP_COMPLETE", TRACE_ACCEL_DSA_OP_COMPLETE, OWNER_TYPE_NONE,
564 : OBJECT_NONE,
565 : 0, SPDK_TRACE_ARG_TYPE_INT, "count");
566 0 : }
567 :
568 0 : SPDK_LOG_REGISTER_COMPONENT(accel_dsa)
|