Branch data Line data Source code
1 : : /* SPDX-License-Identifier: BSD-3-Clause
2 : : * Copyright (C) 2016 Intel Corporation. All rights reserved.
3 : : * Copyright (c) 2019-2021 Mellanox Technologies LTD. All rights reserved.
4 : : * Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5 : : */
6 : :
7 : : #include "spdk/stdinc.h"
8 : :
9 : : #include "spdk/config.h"
10 : : #include "spdk/thread.h"
11 : : #include "spdk/likely.h"
12 : : #include "spdk/nvmf_transport.h"
13 : : #include "spdk/string.h"
14 : : #include "spdk/trace.h"
15 : : #include "spdk/tree.h"
16 : : #include "spdk/util.h"
17 : :
18 : : #include "spdk_internal/assert.h"
19 : : #include "spdk/log.h"
20 : : #include "spdk_internal/rdma.h"
21 : :
22 : : #include "nvmf_internal.h"
23 : : #include "transport.h"
24 : :
25 : : #include "spdk_internal/trace_defs.h"
26 : :
27 : : struct spdk_nvme_rdma_hooks g_nvmf_hooks = {};
28 : : const struct spdk_nvmf_transport_ops spdk_nvmf_transport_rdma;
29 : :
30 : : /*
31 : : RDMA Connection Resource Defaults
32 : : */
33 : : #define NVMF_DEFAULT_MSDBD 16
34 : : #define NVMF_DEFAULT_TX_SGE SPDK_NVMF_MAX_SGL_ENTRIES
35 : : #define NVMF_DEFAULT_RSP_SGE 1
36 : : #define NVMF_DEFAULT_RX_SGE 2
37 : :
38 : : SPDK_STATIC_ASSERT(NVMF_DEFAULT_MSDBD <= SPDK_NVMF_MAX_SGL_ENTRIES,
39 : : "MSDBD must not exceed SPDK_NVMF_MAX_SGL_ENTRIES");
40 : :
41 : : /* The RDMA completion queue size */
42 : : #define DEFAULT_NVMF_RDMA_CQ_SIZE 4096
43 : : #define MAX_WR_PER_QP(queue_depth) (queue_depth * 3 + 2)
44 : :
45 : : static int g_spdk_nvmf_ibv_query_mask =
46 : : IBV_QP_STATE |
47 : : IBV_QP_PKEY_INDEX |
48 : : IBV_QP_PORT |
49 : : IBV_QP_ACCESS_FLAGS |
50 : : IBV_QP_AV |
51 : : IBV_QP_PATH_MTU |
52 : : IBV_QP_DEST_QPN |
53 : : IBV_QP_RQ_PSN |
54 : : IBV_QP_MAX_DEST_RD_ATOMIC |
55 : : IBV_QP_MIN_RNR_TIMER |
56 : : IBV_QP_SQ_PSN |
57 : : IBV_QP_TIMEOUT |
58 : : IBV_QP_RETRY_CNT |
59 : : IBV_QP_RNR_RETRY |
60 : : IBV_QP_MAX_QP_RD_ATOMIC;
61 : :
62 : : enum spdk_nvmf_rdma_request_state {
63 : : /* The request is not currently in use */
64 : : RDMA_REQUEST_STATE_FREE = 0,
65 : :
66 : : /* Initial state when request first received */
67 : : RDMA_REQUEST_STATE_NEW,
68 : :
69 : : /* The request is queued until a data buffer is available. */
70 : : RDMA_REQUEST_STATE_NEED_BUFFER,
71 : :
72 : : /* The request is waiting on RDMA queue depth availability
73 : : * to transfer data from the host to the controller.
74 : : */
75 : : RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING,
76 : :
77 : : /* The request is currently transferring data from the host to the controller. */
78 : : RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
79 : :
80 : : /* The request is ready to execute at the block device */
81 : : RDMA_REQUEST_STATE_READY_TO_EXECUTE,
82 : :
83 : : /* The request is currently executing at the block device */
84 : : RDMA_REQUEST_STATE_EXECUTING,
85 : :
86 : : /* The request finished executing at the block device */
87 : : RDMA_REQUEST_STATE_EXECUTED,
88 : :
89 : : /* The request is waiting on RDMA queue depth availability
90 : : * to transfer data from the controller to the host.
91 : : */
92 : : RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING,
93 : :
94 : : /* The request is waiting on RDMA queue depth availability
95 : : * to send response to the host.
96 : : */
97 : : RDMA_REQUEST_STATE_READY_TO_COMPLETE_PENDING,
98 : :
99 : : /* The request is ready to send a completion */
100 : : RDMA_REQUEST_STATE_READY_TO_COMPLETE,
101 : :
102 : : /* The request is currently transferring data from the controller to the host. */
103 : : RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
104 : :
105 : : /* The request currently has an outstanding completion without an
106 : : * associated data transfer.
107 : : */
108 : : RDMA_REQUEST_STATE_COMPLETING,
109 : :
110 : : /* The request completed and can be marked free. */
111 : : RDMA_REQUEST_STATE_COMPLETED,
112 : :
113 : : /* Terminator */
114 : : RDMA_REQUEST_NUM_STATES,
115 : : };
116 : :
117 : : static void
118 : 728 : nvmf_trace(void)
119 : : {
120 : 728 : spdk_trace_register_object(OBJECT_NVMF_RDMA_IO, 'r');
121 : 728 : spdk_trace_register_description("RDMA_REQ_NEW", TRACE_RDMA_REQUEST_STATE_NEW,
122 : : OWNER_NONE, OBJECT_NVMF_RDMA_IO, 1,
123 : : SPDK_TRACE_ARG_TYPE_PTR, "qpair");
124 : 728 : spdk_trace_register_description("RDMA_REQ_NEED_BUFFER", TRACE_RDMA_REQUEST_STATE_NEED_BUFFER,
125 : : OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0,
126 : : SPDK_TRACE_ARG_TYPE_PTR, "qpair");
127 : 728 : spdk_trace_register_description("RDMA_REQ_TX_PENDING_C2H",
128 : : TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING,
129 : : OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0,
130 : : SPDK_TRACE_ARG_TYPE_PTR, "qpair");
131 : 728 : spdk_trace_register_description("RDMA_REQ_TX_PENDING_H2C",
132 : : TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING,
133 : : OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0,
134 : : SPDK_TRACE_ARG_TYPE_PTR, "qpair");
135 : 728 : spdk_trace_register_description("RDMA_REQ_TX_H2C",
136 : : TRACE_RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
137 : : OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0,
138 : : SPDK_TRACE_ARG_TYPE_PTR, "qpair");
139 : 728 : spdk_trace_register_description("RDMA_REQ_RDY_TO_EXECUTE",
140 : : TRACE_RDMA_REQUEST_STATE_READY_TO_EXECUTE,
141 : : OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0,
142 : : SPDK_TRACE_ARG_TYPE_PTR, "qpair");
143 : 728 : spdk_trace_register_description("RDMA_REQ_EXECUTING",
144 : : TRACE_RDMA_REQUEST_STATE_EXECUTING,
145 : : OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0,
146 : : SPDK_TRACE_ARG_TYPE_PTR, "qpair");
147 : 728 : spdk_trace_register_description("RDMA_REQ_EXECUTED",
148 : : TRACE_RDMA_REQUEST_STATE_EXECUTED,
149 : : OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0,
150 : : SPDK_TRACE_ARG_TYPE_PTR, "qpair");
151 : 728 : spdk_trace_register_description("RDMA_REQ_RDY_TO_COMPL_PEND",
152 : : TRACE_RDMA_REQUEST_STATE_READY_TO_COMPLETE_PENDING,
153 : : OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0,
154 : : SPDK_TRACE_ARG_TYPE_PTR, "qpair");
155 : 728 : spdk_trace_register_description("RDMA_REQ_RDY_TO_COMPL",
156 : : TRACE_RDMA_REQUEST_STATE_READY_TO_COMPLETE,
157 : : OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0,
158 : : SPDK_TRACE_ARG_TYPE_PTR, "qpair");
159 : 728 : spdk_trace_register_description("RDMA_REQ_COMPLETING_C2H",
160 : : TRACE_RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST,
161 : : OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0,
162 : : SPDK_TRACE_ARG_TYPE_PTR, "qpair");
163 : 728 : spdk_trace_register_description("RDMA_REQ_COMPLETING",
164 : : TRACE_RDMA_REQUEST_STATE_COMPLETING,
165 : : OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0,
166 : : SPDK_TRACE_ARG_TYPE_PTR, "qpair");
167 : 728 : spdk_trace_register_description("RDMA_REQ_COMPLETED",
168 : : TRACE_RDMA_REQUEST_STATE_COMPLETED,
169 : : OWNER_NONE, OBJECT_NVMF_RDMA_IO, 0,
170 : : SPDK_TRACE_ARG_TYPE_PTR, "qpair");
171 : :
172 : 728 : spdk_trace_register_description("RDMA_QP_CREATE", TRACE_RDMA_QP_CREATE,
173 : : OWNER_NONE, OBJECT_NONE, 0,
174 : : SPDK_TRACE_ARG_TYPE_INT, "");
175 : 728 : spdk_trace_register_description("RDMA_IBV_ASYNC_EVENT", TRACE_RDMA_IBV_ASYNC_EVENT,
176 : : OWNER_NONE, OBJECT_NONE, 0,
177 : : SPDK_TRACE_ARG_TYPE_INT, "type");
178 : 728 : spdk_trace_register_description("RDMA_CM_ASYNC_EVENT", TRACE_RDMA_CM_ASYNC_EVENT,
179 : : OWNER_NONE, OBJECT_NONE, 0,
180 : : SPDK_TRACE_ARG_TYPE_INT, "type");
181 : 728 : spdk_trace_register_description("RDMA_QP_STATE_CHANGE", TRACE_RDMA_QP_STATE_CHANGE,
182 : : OWNER_NONE, OBJECT_NONE, 0,
183 : : SPDK_TRACE_ARG_TYPE_PTR, "state");
184 : 728 : spdk_trace_register_description("RDMA_QP_DISCONNECT", TRACE_RDMA_QP_DISCONNECT,
185 : : OWNER_NONE, OBJECT_NONE, 0,
186 : : SPDK_TRACE_ARG_TYPE_INT, "");
187 : 728 : spdk_trace_register_description("RDMA_QP_DESTROY", TRACE_RDMA_QP_DESTROY,
188 : : OWNER_NONE, OBJECT_NONE, 0,
189 : : SPDK_TRACE_ARG_TYPE_INT, "");
190 : 728 : }
191 : 783 : SPDK_TRACE_REGISTER_FN(nvmf_trace, "nvmf_rdma", TRACE_GROUP_NVMF_RDMA)
192 : :
193 : : enum spdk_nvmf_rdma_wr_type {
194 : : RDMA_WR_TYPE_RECV,
195 : : RDMA_WR_TYPE_SEND,
196 : : RDMA_WR_TYPE_DATA,
197 : : };
198 : :
199 : : struct spdk_nvmf_rdma_wr {
200 : : /* Uses enum spdk_nvmf_rdma_wr_type */
201 : : uint8_t type;
202 : : };
203 : :
204 : : /* This structure holds commands as they are received off the wire.
205 : : * It must be dynamically paired with a full request object
206 : : * (spdk_nvmf_rdma_request) to service a request. It is separate
207 : : * from the request because RDMA does not appear to order
208 : : * completions, so occasionally we'll get a new incoming
209 : : * command when there aren't any free request objects.
210 : : */
211 : : struct spdk_nvmf_rdma_recv {
212 : : struct ibv_recv_wr wr;
213 : : struct ibv_sge sgl[NVMF_DEFAULT_RX_SGE];
214 : :
215 : : struct spdk_nvmf_rdma_qpair *qpair;
216 : :
217 : : /* In-capsule data buffer */
218 : : uint8_t *buf;
219 : :
220 : : struct spdk_nvmf_rdma_wr rdma_wr;
221 : : uint64_t receive_tsc;
222 : :
223 : : STAILQ_ENTRY(spdk_nvmf_rdma_recv) link;
224 : : };
225 : :
226 : : struct spdk_nvmf_rdma_request_data {
227 : : struct ibv_send_wr wr;
228 : : struct ibv_sge sgl[SPDK_NVMF_MAX_SGL_ENTRIES];
229 : : };
230 : :
231 : : struct spdk_nvmf_rdma_request {
232 : : struct spdk_nvmf_request req;
233 : :
234 : : bool fused_failed;
235 : :
236 : : struct spdk_nvmf_rdma_wr data_wr;
237 : : struct spdk_nvmf_rdma_wr rsp_wr;
238 : :
239 : : /* Uses enum spdk_nvmf_rdma_request_state */
240 : : uint8_t state;
241 : :
242 : : /* Data offset in req.iov */
243 : : uint32_t offset;
244 : :
245 : : struct spdk_nvmf_rdma_recv *recv;
246 : :
247 : : struct {
248 : : struct ibv_send_wr wr;
249 : : struct ibv_sge sgl[NVMF_DEFAULT_RSP_SGE];
250 : : } rsp;
251 : :
252 : : uint16_t iovpos;
253 : : uint16_t num_outstanding_data_wr;
254 : : /* Used to split Write IO with multi SGL payload */
255 : : uint16_t num_remaining_data_wr;
256 : : uint64_t receive_tsc;
257 : : struct spdk_nvmf_rdma_request *fused_pair;
258 : : STAILQ_ENTRY(spdk_nvmf_rdma_request) state_link;
259 : : struct ibv_send_wr *remaining_tranfer_in_wrs;
260 : : struct ibv_send_wr *transfer_wr;
261 : : struct spdk_nvmf_rdma_request_data data;
262 : : };
263 : :
264 : : struct spdk_nvmf_rdma_resource_opts {
265 : : struct spdk_nvmf_rdma_qpair *qpair;
266 : : /* qp points either to an ibv_qp object or an ibv_srq object depending on the value of shared. */
267 : : void *qp;
268 : : struct spdk_rdma_mem_map *map;
269 : : uint32_t max_queue_depth;
270 : : uint32_t in_capsule_data_size;
271 : : bool shared;
272 : : };
273 : :
274 : : struct spdk_nvmf_rdma_resources {
275 : : /* Array of size "max_queue_depth" containing RDMA requests. */
276 : : struct spdk_nvmf_rdma_request *reqs;
277 : :
278 : : /* Array of size "max_queue_depth" containing RDMA recvs. */
279 : : struct spdk_nvmf_rdma_recv *recvs;
280 : :
281 : : /* Array of size "max_queue_depth" containing 64 byte capsules
282 : : * used for receive.
283 : : */
284 : : union nvmf_h2c_msg *cmds;
285 : :
286 : : /* Array of size "max_queue_depth" containing 16 byte completions
287 : : * to be sent back to the user.
288 : : */
289 : : union nvmf_c2h_msg *cpls;
290 : :
291 : : /* Array of size "max_queue_depth * InCapsuleDataSize" containing
292 : : * buffers to be used for in capsule data.
293 : : */
294 : : void *bufs;
295 : :
296 : : /* Receives that are waiting for a request object */
297 : : STAILQ_HEAD(, spdk_nvmf_rdma_recv) incoming_queue;
298 : :
299 : : /* Queue to track free requests */
300 : : STAILQ_HEAD(, spdk_nvmf_rdma_request) free_queue;
301 : : };
302 : :
303 : : typedef void (*spdk_nvmf_rdma_qpair_ibv_event)(struct spdk_nvmf_rdma_qpair *rqpair);
304 : :
305 : : typedef void (*spdk_poller_destroy_cb)(void *ctx);
306 : :
307 : : struct spdk_nvmf_rdma_ibv_event_ctx {
308 : : struct spdk_nvmf_rdma_qpair *rqpair;
309 : : spdk_nvmf_rdma_qpair_ibv_event cb_fn;
310 : : /* Link to other ibv events associated with this qpair */
311 : : STAILQ_ENTRY(spdk_nvmf_rdma_ibv_event_ctx) link;
312 : : };
313 : :
314 : : struct spdk_nvmf_rdma_qpair {
315 : : struct spdk_nvmf_qpair qpair;
316 : :
317 : : struct spdk_nvmf_rdma_device *device;
318 : : struct spdk_nvmf_rdma_poller *poller;
319 : :
320 : : struct spdk_rdma_qp *rdma_qp;
321 : : struct rdma_cm_id *cm_id;
322 : : struct spdk_rdma_srq *srq;
323 : : struct rdma_cm_id *listen_id;
324 : :
325 : : /* Cache the QP number to improve QP search by RB tree. */
326 : : uint32_t qp_num;
327 : :
328 : : /* The maximum number of I/O outstanding on this connection at one time */
329 : : uint16_t max_queue_depth;
330 : :
331 : : /* The maximum number of active RDMA READ and ATOMIC operations at one time */
332 : : uint16_t max_read_depth;
333 : :
334 : : /* The maximum number of RDMA SEND operations at one time */
335 : : uint32_t max_send_depth;
336 : :
337 : : /* The current number of outstanding WRs from this qpair's
338 : : * recv queue. Should not exceed device->attr.max_queue_depth.
339 : : */
340 : : uint16_t current_recv_depth;
341 : :
342 : : /* The current number of active RDMA READ operations */
343 : : uint16_t current_read_depth;
344 : :
345 : : /* The current number of posted WRs from this qpair's
346 : : * send queue. Should not exceed max_send_depth.
347 : : */
348 : : uint32_t current_send_depth;
349 : :
350 : : /* The maximum number of SGEs per WR on the send queue */
351 : : uint32_t max_send_sge;
352 : :
353 : : /* The maximum number of SGEs per WR on the recv queue */
354 : : uint32_t max_recv_sge;
355 : :
356 : : struct spdk_nvmf_rdma_resources *resources;
357 : :
358 : : STAILQ_HEAD(, spdk_nvmf_rdma_request) pending_rdma_read_queue;
359 : :
360 : : STAILQ_HEAD(, spdk_nvmf_rdma_request) pending_rdma_write_queue;
361 : :
362 : : STAILQ_HEAD(, spdk_nvmf_rdma_request) pending_rdma_send_queue;
363 : :
364 : : /* Number of requests not in the free state */
365 : : uint32_t qd;
366 : :
367 : : RB_ENTRY(spdk_nvmf_rdma_qpair) node;
368 : :
369 : : STAILQ_ENTRY(spdk_nvmf_rdma_qpair) recv_link;
370 : :
371 : : STAILQ_ENTRY(spdk_nvmf_rdma_qpair) send_link;
372 : :
373 : : /* IBV queue pair attributes: they are used to manage
374 : : * qp state and recover from errors.
375 : : */
376 : : enum ibv_qp_state ibv_state;
377 : :
378 : : /* Points to the a request that has fuse bits set to
379 : : * SPDK_NVME_CMD_FUSE_FIRST, when the qpair is waiting
380 : : * for the request that has SPDK_NVME_CMD_FUSE_SECOND.
381 : : */
382 : : struct spdk_nvmf_rdma_request *fused_first;
383 : :
384 : : /*
385 : : * io_channel which is used to destroy qpair when it is removed from poll group
386 : : */
387 : : struct spdk_io_channel *destruct_channel;
388 : :
389 : : /* List of ibv async events */
390 : : STAILQ_HEAD(, spdk_nvmf_rdma_ibv_event_ctx) ibv_events;
391 : :
392 : : /* Lets us know that we have received the last_wqe event. */
393 : : bool last_wqe_reached;
394 : :
395 : : /* Indicate that nvmf_rdma_close_qpair is called */
396 : : bool to_close;
397 : : };
398 : :
399 : : struct spdk_nvmf_rdma_poller_stat {
400 : : uint64_t completions;
401 : : uint64_t polls;
402 : : uint64_t idle_polls;
403 : : uint64_t requests;
404 : : uint64_t request_latency;
405 : : uint64_t pending_free_request;
406 : : uint64_t pending_rdma_read;
407 : : uint64_t pending_rdma_write;
408 : : uint64_t pending_rdma_send;
409 : : struct spdk_rdma_qp_stats qp_stats;
410 : : };
411 : :
412 : : struct spdk_nvmf_rdma_poller {
413 : : struct spdk_nvmf_rdma_device *device;
414 : : struct spdk_nvmf_rdma_poll_group *group;
415 : :
416 : : int num_cqe;
417 : : int required_num_wr;
418 : : struct ibv_cq *cq;
419 : :
420 : : /* The maximum number of I/O outstanding on the shared receive queue at one time */
421 : : uint16_t max_srq_depth;
422 : : bool need_destroy;
423 : :
424 : : /* Shared receive queue */
425 : : struct spdk_rdma_srq *srq;
426 : :
427 : : struct spdk_nvmf_rdma_resources *resources;
428 : : struct spdk_nvmf_rdma_poller_stat stat;
429 : :
430 : : spdk_poller_destroy_cb destroy_cb;
431 : : void *destroy_cb_ctx;
432 : :
433 : : RB_HEAD(qpairs_tree, spdk_nvmf_rdma_qpair) qpairs;
434 : :
435 : : STAILQ_HEAD(, spdk_nvmf_rdma_qpair) qpairs_pending_recv;
436 : :
437 : : STAILQ_HEAD(, spdk_nvmf_rdma_qpair) qpairs_pending_send;
438 : :
439 : : TAILQ_ENTRY(spdk_nvmf_rdma_poller) link;
440 : : };
441 : :
442 : : struct spdk_nvmf_rdma_poll_group_stat {
443 : : uint64_t pending_data_buffer;
444 : : };
445 : :
446 : : struct spdk_nvmf_rdma_poll_group {
447 : : struct spdk_nvmf_transport_poll_group group;
448 : : struct spdk_nvmf_rdma_poll_group_stat stat;
449 : : TAILQ_HEAD(, spdk_nvmf_rdma_poller) pollers;
450 : : TAILQ_ENTRY(spdk_nvmf_rdma_poll_group) link;
451 : : };
452 : :
453 : : struct spdk_nvmf_rdma_conn_sched {
454 : : struct spdk_nvmf_rdma_poll_group *next_admin_pg;
455 : : struct spdk_nvmf_rdma_poll_group *next_io_pg;
456 : : };
457 : :
458 : : /* Assuming rdma_cm uses just one protection domain per ibv_context. */
459 : : struct spdk_nvmf_rdma_device {
460 : : struct ibv_device_attr attr;
461 : : struct ibv_context *context;
462 : :
463 : : struct spdk_rdma_mem_map *map;
464 : : struct ibv_pd *pd;
465 : :
466 : : int num_srq;
467 : : bool need_destroy;
468 : : bool ready_to_destroy;
469 : : bool is_ready;
470 : :
471 : : TAILQ_ENTRY(spdk_nvmf_rdma_device) link;
472 : : };
473 : :
474 : : struct spdk_nvmf_rdma_port {
475 : : const struct spdk_nvme_transport_id *trid;
476 : : struct rdma_cm_id *id;
477 : : struct spdk_nvmf_rdma_device *device;
478 : : TAILQ_ENTRY(spdk_nvmf_rdma_port) link;
479 : : };
480 : :
481 : : struct rdma_transport_opts {
482 : : int num_cqe;
483 : : uint32_t max_srq_depth;
484 : : bool no_srq;
485 : : bool no_wr_batching;
486 : : int acceptor_backlog;
487 : : };
488 : :
489 : : struct spdk_nvmf_rdma_transport {
490 : : struct spdk_nvmf_transport transport;
491 : : struct rdma_transport_opts rdma_opts;
492 : :
493 : : struct spdk_nvmf_rdma_conn_sched conn_sched;
494 : :
495 : : struct rdma_event_channel *event_channel;
496 : :
497 : : struct spdk_mempool *data_wr_pool;
498 : :
499 : : struct spdk_poller *accept_poller;
500 : :
501 : : /* fields used to poll RDMA/IB events */
502 : : nfds_t npoll_fds;
503 : : struct pollfd *poll_fds;
504 : :
505 : : TAILQ_HEAD(, spdk_nvmf_rdma_device) devices;
506 : : TAILQ_HEAD(, spdk_nvmf_rdma_port) ports;
507 : : TAILQ_HEAD(, spdk_nvmf_rdma_poll_group) poll_groups;
508 : :
509 : : /* ports that are removed unexpectedly and need retry listen */
510 : : TAILQ_HEAD(, spdk_nvmf_rdma_port) retry_ports;
511 : : };
512 : :
513 : : struct poller_manage_ctx {
514 : : struct spdk_nvmf_rdma_transport *rtransport;
515 : : struct spdk_nvmf_rdma_poll_group *rgroup;
516 : : struct spdk_nvmf_rdma_poller *rpoller;
517 : : struct spdk_nvmf_rdma_device *device;
518 : :
519 : : struct spdk_thread *thread;
520 : : volatile int *inflight_op_counter;
521 : : };
522 : :
523 : : static const struct spdk_json_object_decoder rdma_transport_opts_decoder[] = {
524 : : {
525 : : "num_cqe", offsetof(struct rdma_transport_opts, num_cqe),
526 : : spdk_json_decode_int32, true
527 : : },
528 : : {
529 : : "max_srq_depth", offsetof(struct rdma_transport_opts, max_srq_depth),
530 : : spdk_json_decode_uint32, true
531 : : },
532 : : {
533 : : "no_srq", offsetof(struct rdma_transport_opts, no_srq),
534 : : spdk_json_decode_bool, true
535 : : },
536 : : {
537 : : "no_wr_batching", offsetof(struct rdma_transport_opts, no_wr_batching),
538 : : spdk_json_decode_bool, true
539 : : },
540 : : {
541 : : "acceptor_backlog", offsetof(struct rdma_transport_opts, acceptor_backlog),
542 : : spdk_json_decode_int32, true
543 : : },
544 : : };
545 : :
546 : : static int
547 : 13126871 : nvmf_rdma_qpair_compare(struct spdk_nvmf_rdma_qpair *rqpair1, struct spdk_nvmf_rdma_qpair *rqpair2)
548 : : {
549 [ + + + - : 13126871 : return rqpair1->qp_num < rqpair2->qp_num ? -1 : rqpair1->qp_num > rqpair2->qp_num;
+ - + - +
+ + - + -
+ - + - ]
550 : : }
551 : :
552 [ + + + + : 13135957 : RB_GENERATE_STATIC(qpairs_tree, spdk_nvmf_rdma_qpair, node, nvmf_rdma_qpair_compare);
+ + + + +
+ + + + +
+ + + + -
+ - + - +
+ + + + +
+ + + + +
+ + + + -
+ + + + +
- + + + +
- - + - +
+ + + + +
- + + # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # ]
553 : :
554 : : static bool nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
555 : : struct spdk_nvmf_rdma_request *rdma_req);
556 : :
557 : : static void _poller_submit_sends(struct spdk_nvmf_rdma_transport *rtransport,
558 : : struct spdk_nvmf_rdma_poller *rpoller);
559 : :
560 : : static void _poller_submit_recvs(struct spdk_nvmf_rdma_transport *rtransport,
561 : : struct spdk_nvmf_rdma_poller *rpoller);
562 : :
563 : : static void _nvmf_rdma_remove_destroyed_device(void *c);
564 : :
565 : : static inline int
566 : 2052 : nvmf_rdma_check_ibv_state(enum ibv_qp_state state)
567 : : {
568 [ + + ]: 2052 : switch (state) {
569 : 2045 : case IBV_QPS_RESET:
570 : : case IBV_QPS_INIT:
571 : : case IBV_QPS_RTR:
572 : : case IBV_QPS_RTS:
573 : : case IBV_QPS_SQD:
574 : : case IBV_QPS_SQE:
575 : : case IBV_QPS_ERR:
576 : 2046 : return 0;
577 : 5 : default:
578 : 6 : return -1;
579 : : }
580 : 2 : }
581 : :
582 : : static inline enum spdk_nvme_media_error_status_code
583 : 0 : nvmf_rdma_dif_error_to_compl_status(uint8_t err_type) {
584 : 0 : enum spdk_nvme_media_error_status_code result;
585 [ # # # # ]: 0 : switch (err_type)
586 : : {
587 : 0 : case SPDK_DIF_REFTAG_ERROR:
588 : 0 : result = SPDK_NVME_SC_REFERENCE_TAG_CHECK_ERROR;
589 : 0 : break;
590 : 0 : case SPDK_DIF_APPTAG_ERROR:
591 : 0 : result = SPDK_NVME_SC_APPLICATION_TAG_CHECK_ERROR;
592 : 0 : break;
593 : 0 : case SPDK_DIF_GUARD_ERROR:
594 : 0 : result = SPDK_NVME_SC_GUARD_CHECK_ERROR;
595 : 0 : break;
596 : 0 : default:
597 [ # # ]: 0 : SPDK_UNREACHABLE();
598 : : }
599 : :
600 : 0 : return result;
601 : 0 : }
602 : :
603 : : static enum ibv_qp_state
604 : 2058 : nvmf_rdma_update_ibv_state(struct spdk_nvmf_rdma_qpair *rqpair) {
605 : 3 : enum ibv_qp_state old_state, new_state;
606 : 15 : struct ibv_qp_attr qp_attr;
607 : 15 : struct ibv_qp_init_attr init_attr;
608 : 3 : int rc;
609 : :
610 [ + - + - ]: 2058 : old_state = rqpair->ibv_state;
611 [ + - + - : 2058 : rc = ibv_query_qp(rqpair->rdma_qp->qp, &qp_attr,
+ - + - ]
612 : 3 : g_spdk_nvmf_ibv_query_mask, &init_attr);
613 : :
614 [ + + ]: 2058 : if (rc)
615 : : {
616 : 6 : SPDK_ERRLOG("Failed to get updated RDMA queue pair state!\n");
617 : 6 : return IBV_QPS_ERR + 1;
618 : : }
619 : :
620 : 2052 : new_state = qp_attr.qp_state;
621 [ + - + - ]: 2052 : rqpair->ibv_state = new_state;
622 [ + - + - ]: 2052 : qp_attr.ah_attr.port_num = qp_attr.port_num;
623 : :
624 : 2052 : rc = nvmf_rdma_check_ibv_state(new_state);
625 [ + + ]: 2052 : if (rc)
626 : : {
627 [ + - + - : 6 : SPDK_ERRLOG("QP#%d: bad state updated: %u, maybe hardware issue\n", rqpair->qpair.qid, new_state);
+ - ]
628 : : /*
629 : : * IBV_QPS_UNKNOWN undefined if lib version smaller than libibverbs-1.1.8
630 : : * IBV_QPS_UNKNOWN is the enum element after IBV_QPS_ERR
631 : : */
632 : 6 : return IBV_QPS_ERR + 1;
633 : : }
634 : :
635 [ + + ]: 2046 : if (old_state != new_state)
636 : : {
637 [ + + + - : 2046 : spdk_trace_record(TRACE_RDMA_QP_STATE_CHANGE, 0, 0, (uintptr_t)rqpair, new_state);
# # # # #
# # # # #
# # # # ]
638 : 1 : }
639 : 2046 : return new_state;
640 : 3 : }
641 : :
642 : : /*
643 : : * Return data_wrs to pool starting from \b data_wr
644 : : * Request's own response and data WR are excluded
645 : : */
646 : : static void
647 : 6687159 : _nvmf_rdma_request_free_data(struct spdk_nvmf_rdma_request *rdma_req,
648 : : struct ibv_send_wr *data_wr,
649 : : struct spdk_mempool *pool)
650 : : {
651 : 35 : struct spdk_nvmf_rdma_request_data *work_requests[SPDK_NVMF_MAX_SGL_ENTRIES];
652 : 7 : struct spdk_nvmf_rdma_request_data *nvmf_data;
653 : 7 : struct ibv_send_wr *next_send_wr;
654 [ + - ]: 6687159 : uint64_t req_wrid = (uint64_t)&rdma_req->data_wr;
655 : 6687159 : uint32_t num_wrs = 0;
656 : :
657 [ + + + + : 13728492 : while (data_wr && data_wr->wr_id == req_wrid) {
+ - + + ]
658 : 7041333 : nvmf_data = SPDK_CONTAINEROF(data_wr, struct spdk_nvmf_rdma_request_data, wr);
659 [ + + + - : 7041333 : memset(nvmf_data->sgl, 0, sizeof(data_wr->sg_list[0]) * data_wr->num_sge);
+ - + - ]
660 [ + - + - ]: 7041333 : data_wr->num_sge = 0;
661 [ + - + - ]: 7041333 : next_send_wr = data_wr->next;
662 [ + + + - : 7041333 : if (data_wr != &rdma_req->data.wr) {
+ + ]
663 [ + - + - ]: 366752 : data_wr->next = NULL;
664 [ + + # # ]: 366752 : assert(num_wrs < SPDK_NVMF_MAX_SGL_ENTRIES);
665 [ + - + - : 366752 : work_requests[num_wrs] = nvmf_data;
+ - ]
666 : 366752 : num_wrs++;
667 : 1 : }
668 [ + + + + : 7041333 : data_wr = (!next_send_wr || next_send_wr == &rdma_req->rsp.wr) ? NULL : next_send_wr;
+ - + + ]
669 : : }
670 : :
671 [ + + ]: 6687159 : if (num_wrs) {
672 : 83750 : spdk_mempool_put_bulk(pool, (void **) work_requests, num_wrs);
673 : 1 : }
674 : 6687159 : }
675 : :
676 : : static void
677 : 6674956 : nvmf_rdma_request_free_data(struct spdk_nvmf_rdma_request *rdma_req,
678 : : struct spdk_nvmf_rdma_transport *rtransport)
679 : : {
680 [ + - + - ]: 6674956 : rdma_req->num_outstanding_data_wr = 0;
681 : :
682 [ + - + - : 6674956 : _nvmf_rdma_request_free_data(rdma_req, rdma_req->transfer_wr, rtransport->data_wr_pool);
+ - + - ]
683 : :
684 [ + - + - : 6674956 : rdma_req->data.wr.next = NULL;
+ - + - ]
685 [ + - + - : 6674956 : rdma_req->rsp.wr.next = NULL;
+ - + - ]
686 : 6674956 : }
687 : :
688 : : static void
689 : 0 : nvmf_rdma_dump_request(struct spdk_nvmf_rdma_request *req)
690 : : {
691 [ # # # # : 0 : SPDK_ERRLOG("\t\tRequest Data From Pool: %d\n", req->req.data_from_pool);
# # # # ]
692 [ # # # # : 0 : if (req->req.cmd) {
# # # # ]
693 [ # # # # : 0 : SPDK_ERRLOG("\t\tRequest opcode: %d\n", req->req.cmd->nvmf_cmd.opcode);
# # # # #
# # # ]
694 : 0 : }
695 [ # # # # : 0 : if (req->recv) {
# # ]
696 [ # # # # : 0 : SPDK_ERRLOG("\t\tRequest recv wr_id%lu\n", req->recv->wr.wr_id);
# # # # #
# ]
697 : 0 : }
698 : 0 : }
699 : :
700 : : static void
701 : 0 : nvmf_rdma_dump_qpair_contents(struct spdk_nvmf_rdma_qpair *rqpair)
702 : : {
703 : 0 : int i;
704 : :
705 [ # # # # : 0 : SPDK_ERRLOG("Dumping contents of queue pair (QID %d)\n", rqpair->qpair.qid);
# # ]
706 [ # # # # : 0 : for (i = 0; i < rqpair->max_queue_depth; i++) {
# # # # ]
707 [ # # # # : 0 : if (rqpair->resources->reqs[i].state != RDMA_REQUEST_STATE_FREE) {
# # # # #
# # # # #
# # ]
708 [ # # # # : 0 : nvmf_rdma_dump_request(&rqpair->resources->reqs[i]);
# # # # #
# ]
709 : 0 : }
710 : 0 : }
711 : 0 : }
712 : :
713 : : static void
714 : 258 : nvmf_rdma_resources_destroy(struct spdk_nvmf_rdma_resources *resources)
715 : : {
716 [ + - + - ]: 258 : spdk_free(resources->cmds);
717 [ + - + - ]: 258 : spdk_free(resources->cpls);
718 [ + - + - ]: 258 : spdk_free(resources->bufs);
719 [ + - + - ]: 258 : spdk_free(resources->reqs);
720 [ + - + - ]: 258 : spdk_free(resources->recvs);
721 : 258 : free(resources);
722 : 258 : }
723 : :
724 : :
725 : : static struct spdk_nvmf_rdma_resources *
726 : 258 : nvmf_rdma_resources_create(struct spdk_nvmf_rdma_resource_opts *opts)
727 : : {
728 : 1 : struct spdk_nvmf_rdma_resources *resources;
729 : 1 : struct spdk_nvmf_rdma_request *rdma_req;
730 : 1 : struct spdk_nvmf_rdma_recv *rdma_recv;
731 : 258 : struct spdk_rdma_qp *qp = NULL;
732 : 258 : struct spdk_rdma_srq *srq = NULL;
733 : 258 : struct ibv_recv_wr *bad_wr = NULL;
734 : 5 : struct spdk_rdma_memory_translation translation;
735 : 1 : uint32_t i;
736 : 258 : int rc = 0;
737 : :
738 : 258 : resources = calloc(1, sizeof(struct spdk_nvmf_rdma_resources));
739 [ + + ]: 258 : if (!resources) {
740 : 0 : SPDK_ERRLOG("Unable to allocate resources for receive queue.\n");
741 : 0 : return NULL;
742 : : }
743 : :
744 [ + - + - : 258 : resources->reqs = spdk_zmalloc(opts->max_queue_depth * sizeof(*resources->reqs),
+ - + - ]
745 : : 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
746 [ + - + - : 258 : resources->recvs = spdk_zmalloc(opts->max_queue_depth * sizeof(*resources->recvs),
+ - + - ]
747 : : 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
748 [ + - + - : 258 : resources->cmds = spdk_zmalloc(opts->max_queue_depth * sizeof(*resources->cmds),
+ - + - ]
749 : : 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
750 [ + - + - : 258 : resources->cpls = spdk_zmalloc(opts->max_queue_depth * sizeof(*resources->cpls),
+ - + - ]
751 : : 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
752 : :
753 [ + - + - : 258 : if (opts->in_capsule_data_size > 0) {
- + ]
754 [ + - + - : 258 : resources->bufs = spdk_zmalloc(opts->max_queue_depth * opts->in_capsule_data_size,
+ - + - +
- + - ]
755 : : 0x1000, NULL, SPDK_ENV_LCORE_ID_ANY,
756 : : SPDK_MALLOC_DMA);
757 : 1 : }
758 : :
759 [ + - + - : 259 : if (!resources->reqs || !resources->recvs || !resources->cmds ||
+ - + - +
- + - + -
+ - + - +
- ]
760 [ + - + - : 258 : !resources->cpls || (opts->in_capsule_data_size && !resources->bufs)) {
+ + + - +
- + - + -
+ - ]
761 : 0 : SPDK_ERRLOG("Unable to allocate sufficient memory for RDMA queue.\n");
762 : 0 : goto cleanup;
763 : : }
764 : :
765 [ + + + + : 258 : SPDK_DEBUGLOG(rdma, "Command Array: %p Length: %lx\n",
+ - # # #
# # # #
# ]
766 : : resources->cmds, opts->max_queue_depth * sizeof(*resources->cmds));
767 [ + + + + : 258 : SPDK_DEBUGLOG(rdma, "Completion Array: %p Length: %lx\n",
+ - # # #
# # # #
# ]
768 : : resources->cpls, opts->max_queue_depth * sizeof(*resources->cpls));
769 [ + - + - : 258 : if (resources->bufs) {
- + ]
770 [ + + + + : 258 : SPDK_DEBUGLOG(rdma, "In Capsule Data Array: %p Length: %x\n",
+ - # # #
# # # # #
# # # # ]
771 : : resources->bufs, opts->max_queue_depth *
772 : : opts->in_capsule_data_size);
773 : 1 : }
774 : :
775 : : /* Initialize queues */
776 [ + - + - : 258 : STAILQ_INIT(&resources->incoming_queue);
+ - + - +
- + - + -
+ - ]
777 [ + - + - : 258 : STAILQ_INIT(&resources->free_queue);
+ - + - +
- + - + -
+ - ]
778 : :
779 [ + + + - : 258 : if (opts->shared) {
+ - + - ]
780 [ + - + - ]: 258 : srq = (struct spdk_rdma_srq *)opts->qp;
781 : 1 : } else {
782 [ # # # # ]: 0 : qp = (struct spdk_rdma_qp *)opts->qp;
783 : : }
784 : :
785 [ + + + - : 1033218 : for (i = 0; i < opts->max_queue_depth; i++) {
+ + ]
786 [ + - + - : 1032960 : rdma_recv = &resources->recvs[i];
+ - ]
787 [ + - + - : 1032960 : rdma_recv->qpair = opts->qpair;
+ - + - ]
788 : :
789 : : /* Set up memory to receive commands */
790 [ + - + - : 1032960 : if (resources->bufs) {
+ - ]
791 [ + - + - : 2065792 : rdma_recv->buf = (void *)((uintptr_t)resources->bufs + (i *
+ - + - ]
792 [ + - + - ]: 1032960 : opts->in_capsule_data_size));
793 : 128 : }
794 : :
795 [ + - + - : 1032960 : rdma_recv->rdma_wr.type = RDMA_WR_TYPE_RECV;
+ - ]
796 : :
797 [ + - + - : 1032960 : rdma_recv->sgl[0].addr = (uintptr_t)&resources->cmds[i];
+ - + - +
- + - + -
+ - ]
798 [ + - + - : 1032960 : rdma_recv->sgl[0].length = sizeof(resources->cmds[i]);
+ - + - +
- ]
799 [ + - + - : 1032960 : rc = spdk_rdma_get_translation(opts->map, &resources->cmds[i], sizeof(resources->cmds[i]),
+ - + - +
- ]
800 : : &translation);
801 [ + + ]: 1032960 : if (rc) {
802 : 0 : goto cleanup;
803 : : }
804 [ + - + - : 1032960 : rdma_recv->sgl[0].lkey = spdk_rdma_memory_translation_get_lkey(&translation);
+ - + - +
- ]
805 [ + - + - : 1032960 : rdma_recv->wr.num_sge = 1;
+ - ]
806 : :
807 [ + - + - : 1032960 : if (rdma_recv->buf) {
- + ]
808 [ + - + - : 1032960 : rdma_recv->sgl[1].addr = (uintptr_t)rdma_recv->buf;
+ - + - +
- + - +
- ]
809 [ + - + - : 1032960 : rdma_recv->sgl[1].length = opts->in_capsule_data_size;
+ - + - +
- + - +
- ]
810 [ + - + - : 1032960 : rc = spdk_rdma_get_translation(opts->map, rdma_recv->buf, opts->in_capsule_data_size, &translation);
+ - + - +
- + - ]
811 [ + + ]: 1032960 : if (rc) {
812 : 0 : goto cleanup;
813 : : }
814 [ + - + - : 1032960 : rdma_recv->sgl[1].lkey = spdk_rdma_memory_translation_get_lkey(&translation);
+ - + - +
- ]
815 [ + - + - : 1032960 : rdma_recv->wr.num_sge++;
+ - ]
816 : 128 : }
817 : :
818 [ + - + - : 1032960 : rdma_recv->wr.wr_id = (uintptr_t)&rdma_recv->rdma_wr;
+ - + - ]
819 [ + - + - : 1032960 : rdma_recv->wr.sg_list = rdma_recv->sgl;
+ - + - ]
820 [ + + ]: 1032960 : if (srq) {
821 [ # # ]: 1032192 : spdk_rdma_srq_queue_recv_wrs(srq, &rdma_recv->wr);
822 : 0 : } else {
823 [ + - ]: 768 : spdk_rdma_qp_queue_recv_wrs(qp, &rdma_recv->wr);
824 : : }
825 : 128 : }
826 : :
827 [ + + + - : 1033218 : for (i = 0; i < opts->max_queue_depth; i++) {
+ + ]
828 [ + - + - : 1032960 : rdma_req = &resources->reqs[i];
+ - ]
829 : :
830 [ + + + - : 1032960 : if (opts->qpair != NULL) {
+ - ]
831 [ + - + - : 768 : rdma_req->req.qpair = &opts->qpair->qpair;
+ - + - +
- + - ]
832 : 128 : } else {
833 [ # # # # : 1032192 : rdma_req->req.qpair = NULL;
# # ]
834 : : }
835 [ + - + - : 1032960 : rdma_req->req.cmd = NULL;
+ - ]
836 [ + - + - : 1032960 : rdma_req->req.iovcnt = 0;
+ - ]
837 [ + - + - : 1032960 : rdma_req->req.stripped_data = NULL;
+ - ]
838 : :
839 : : /* Set up memory to send responses */
840 [ + - + - : 1032960 : rdma_req->req.rsp = &resources->cpls[i];
+ - + - +
- + - ]
841 : :
842 [ + - + - : 1032960 : rdma_req->rsp.sgl[0].addr = (uintptr_t)&resources->cpls[i];
+ - + - +
- + - + -
+ - ]
843 [ + - + - : 1032960 : rdma_req->rsp.sgl[0].length = sizeof(resources->cpls[i]);
+ - + - +
- ]
844 [ + - + - : 1032960 : rc = spdk_rdma_get_translation(opts->map, &resources->cpls[i], sizeof(resources->cpls[i]),
+ - + - +
- ]
845 : : &translation);
846 [ + + ]: 1032960 : if (rc) {
847 : 0 : goto cleanup;
848 : : }
849 [ + - + - : 1032960 : rdma_req->rsp.sgl[0].lkey = spdk_rdma_memory_translation_get_lkey(&translation);
+ - + - +
- ]
850 : :
851 [ + - + - : 1032960 : rdma_req->rsp_wr.type = RDMA_WR_TYPE_SEND;
+ - ]
852 [ + - + - : 1032960 : rdma_req->rsp.wr.wr_id = (uintptr_t)&rdma_req->rsp_wr;
+ - + - +
- ]
853 [ + - + - : 1032960 : rdma_req->rsp.wr.next = NULL;
+ - + - ]
854 [ + - + - : 1032960 : rdma_req->rsp.wr.opcode = IBV_WR_SEND;
+ - + - ]
855 [ + - + - : 1032960 : rdma_req->rsp.wr.send_flags = IBV_SEND_SIGNALED;
+ - + - ]
856 [ + - + - : 1032960 : rdma_req->rsp.wr.sg_list = rdma_req->rsp.sgl;
+ - + - +
- + - ]
857 [ + - + - : 1032960 : rdma_req->rsp.wr.num_sge = SPDK_COUNTOF(rdma_req->rsp.sgl);
+ - + - ]
858 : :
859 : : /* Set up memory for data buffers */
860 [ + - + - : 1032960 : rdma_req->data_wr.type = RDMA_WR_TYPE_DATA;
+ - ]
861 [ + - + - : 1032960 : rdma_req->data.wr.wr_id = (uintptr_t)&rdma_req->data_wr;
+ - + - +
- ]
862 [ + - + - : 1032960 : rdma_req->data.wr.next = NULL;
+ - + - ]
863 [ + - + - : 1032960 : rdma_req->data.wr.send_flags = IBV_SEND_SIGNALED;
+ - + - ]
864 [ + - + - : 1032960 : rdma_req->data.wr.sg_list = rdma_req->data.sgl;
+ - + - +
- + - ]
865 [ + - + - : 1032960 : rdma_req->data.wr.num_sge = SPDK_COUNTOF(rdma_req->data.sgl);
+ - + - ]
866 : :
867 : : /* Initialize request state to FREE */
868 [ + - + - ]: 1032960 : rdma_req->state = RDMA_REQUEST_STATE_FREE;
869 [ + - + - : 1032960 : STAILQ_INSERT_TAIL(&resources->free_queue, rdma_req, state_link);
+ - + - +
- + - + -
+ - + - +
- + - +
- ]
870 : 128 : }
871 : :
872 [ + + ]: 258 : if (srq) {
873 : 252 : rc = spdk_rdma_srq_flush_recv_wrs(srq, &bad_wr);
874 : 0 : } else {
875 : 6 : rc = spdk_rdma_qp_flush_recv_wrs(qp, &bad_wr);
876 : : }
877 : :
878 [ - + ]: 258 : if (rc) {
879 : 0 : goto cleanup;
880 : : }
881 : :
882 : 258 : return resources;
883 : :
884 : 0 : cleanup:
885 : 0 : nvmf_rdma_resources_destroy(resources);
886 : 0 : return NULL;
887 : 1 : }
888 : :
889 : : static void
890 : 2039 : nvmf_rdma_qpair_clean_ibv_events(struct spdk_nvmf_rdma_qpair *rqpair)
891 : : {
892 : 0 : struct spdk_nvmf_rdma_ibv_event_ctx *ctx, *tctx;
893 [ - + # # : 2039 : STAILQ_FOREACH_SAFE(ctx, &rqpair->ibv_events, link, tctx) {
# # # # #
# # # # #
# # ]
894 [ # # # # ]: 0 : ctx->rqpair = NULL;
895 : : /* Memory allocated for ctx is freed in nvmf_rdma_qpair_process_ibv_event */
896 [ # # # # : 0 : STAILQ_REMOVE(&rqpair->ibv_events, ctx, spdk_nvmf_rdma_ibv_event_ctx, link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # ]
897 : 0 : }
898 : 2039 : }
899 : :
900 : : static void nvmf_rdma_poller_destroy(struct spdk_nvmf_rdma_poller *poller);
901 : :
902 : : static void
903 : 2039 : nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair)
904 : : {
905 : 0 : struct spdk_nvmf_rdma_recv *rdma_recv, *recv_tmp;
906 : 2039 : struct ibv_recv_wr *bad_recv_wr = NULL;
907 : 0 : int rc;
908 : :
909 [ + + + - : 2039 : spdk_trace_record(TRACE_RDMA_QP_DESTROY, 0, 0, (uintptr_t)rqpair);
# # # # #
# # # # #
# # # # ]
910 : :
911 [ + + # # : 2039 : if (rqpair->qd != 0) {
# # ]
912 [ # # ]: 1 : struct spdk_nvmf_qpair *qpair = &rqpair->qpair;
913 [ # # # # ]: 1 : struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(qpair->transport,
914 : : struct spdk_nvmf_rdma_transport, transport);
915 : 0 : struct spdk_nvmf_rdma_request *req;
916 : 1 : uint32_t i, max_req_count = 0;
917 : :
918 [ # # # # ]: 1 : SPDK_WARNLOG("Destroying qpair when queue depth is %d\n", rqpair->qd);
919 : :
920 [ - + # # : 1 : if (rqpair->srq == NULL) {
# # ]
921 : 0 : nvmf_rdma_dump_qpair_contents(rqpair);
922 [ # # # # ]: 0 : max_req_count = rqpair->max_queue_depth;
923 [ + - + - : 1 : } else if (rqpair->poller && rqpair->resources) {
# # # # #
# # # ]
924 [ # # # # : 1 : max_req_count = rqpair->poller->max_srq_depth;
# # # # ]
925 : 0 : }
926 : :
927 [ - + - + : 1 : SPDK_DEBUGLOG(rdma, "Release incomplete requests\n");
# # ]
928 [ + + ]: 4097 : for (i = 0; i < max_req_count; i++) {
929 [ # # # # : 4096 : req = &rqpair->resources->reqs[i];
# # # # #
# ]
930 [ + + + + : 4096 : if (req->req.qpair == qpair && req->state != RDMA_REQUEST_STATE_FREE) {
# # # # #
# # # #
# ]
931 : : /* nvmf_rdma_request_process checks qpair ibv and internal state
932 : : * and completes a request */
933 : 1 : nvmf_rdma_request_process(rtransport, req);
934 : 0 : }
935 : 0 : }
936 [ - + # # : 1 : assert(rqpair->qd == 0);
# # # # ]
937 : 0 : }
938 : :
939 [ + - # # : 2039 : if (rqpair->poller) {
# # ]
940 [ # # # # : 2039 : RB_REMOVE(qpairs_tree, &rqpair->poller->qpairs, rqpair);
# # ]
941 : :
942 [ + - + - : 2039 : if (rqpair->srq != NULL && rqpair->resources != NULL) {
# # # # #
# # # ]
943 : : /* Drop all received but unprocessed commands for this queue and return them to SRQ */
944 [ - + # # : 2039 : STAILQ_FOREACH_SAFE(rdma_recv, &rqpair->resources->incoming_queue, link, recv_tmp) {
# # # # #
# # # # #
# # # # #
# ]
945 [ # # # # : 0 : if (rqpair == rdma_recv->qpair) {
# # ]
946 [ # # # # : 0 : STAILQ_REMOVE(&rqpair->resources->incoming_queue, rdma_recv, spdk_nvmf_rdma_recv, link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
947 [ # # # # : 0 : spdk_rdma_srq_queue_recv_wrs(rqpair->srq, &rdma_recv->wr);
# # ]
948 [ # # # # ]: 0 : rc = spdk_rdma_srq_flush_recv_wrs(rqpair->srq, &bad_recv_wr);
949 [ # # ]: 0 : if (rc) {
950 : 0 : SPDK_ERRLOG("Unable to re-post rx descriptor\n");
951 : 0 : }
952 : 0 : }
953 : 0 : }
954 : 0 : }
955 : 0 : }
956 : :
957 [ + - # # : 2039 : if (rqpair->cm_id) {
# # ]
958 [ + - # # : 2039 : if (rqpair->rdma_qp != NULL) {
# # ]
959 [ # # # # ]: 2039 : spdk_rdma_qp_destroy(rqpair->rdma_qp);
960 [ # # # # ]: 2039 : rqpair->rdma_qp = NULL;
961 : 0 : }
962 : :
963 [ + - - + : 2039 : if (rqpair->poller != NULL && rqpair->srq == NULL) {
# # # # #
# # # ]
964 [ # # # # : 0 : rqpair->poller->required_num_wr -= MAX_WR_PER_QP(rqpair->max_queue_depth);
# # # # #
# # # # #
# # # # ]
965 : 0 : }
966 : 0 : }
967 : :
968 [ - + - - : 2039 : if (rqpair->srq == NULL && rqpair->resources != NULL) {
# # # # #
# # # ]
969 [ # # # # ]: 0 : nvmf_rdma_resources_destroy(rqpair->resources);
970 : 0 : }
971 : :
972 : 2039 : nvmf_rdma_qpair_clean_ibv_events(rqpair);
973 : :
974 [ + - # # : 2039 : if (rqpair->destruct_channel) {
# # ]
975 [ # # # # ]: 2039 : spdk_put_io_channel(rqpair->destruct_channel);
976 [ # # # # ]: 2039 : rqpair->destruct_channel = NULL;
977 : 0 : }
978 : :
979 [ + - - + : 2039 : if (rqpair->poller && rqpair->poller->need_destroy && RB_EMPTY(&rqpair->poller->qpairs)) {
- + - - #
# # # # #
# # # # #
# # # # #
# # # # #
# ]
980 [ # # # # ]: 0 : nvmf_rdma_poller_destroy(rqpair->poller);
981 : 0 : }
982 : :
983 : : /* destroy cm_id last so cma device will not be freed before we destroy the cq. */
984 [ + - # # : 2039 : if (rqpair->cm_id) {
# # ]
985 [ # # # # ]: 2039 : rdma_destroy_id(rqpair->cm_id);
986 : 0 : }
987 : :
988 : 2039 : free(rqpair);
989 : 2039 : }
990 : :
991 : : static int
992 : 30 : nvmf_rdma_resize_cq(struct spdk_nvmf_rdma_qpair *rqpair, struct spdk_nvmf_rdma_device *device)
993 : : {
994 : 5 : struct spdk_nvmf_rdma_poller *rpoller;
995 : 5 : int rc, num_cqe, required_num_wr;
996 : :
997 : : /* Enlarge CQ size dynamically */
998 [ + - + - ]: 30 : rpoller = rqpair->poller;
999 [ + - + - : 30 : required_num_wr = rpoller->required_num_wr + MAX_WR_PER_QP(rqpair->max_queue_depth);
+ - + - +
- + - +
- ]
1000 [ + - + - ]: 30 : num_cqe = rpoller->num_cqe;
1001 [ + + ]: 30 : if (num_cqe < required_num_wr) {
1002 [ + - + - : 24 : num_cqe = spdk_max(num_cqe * 2, required_num_wr);
+ - ]
1003 [ + - + - : 24 : num_cqe = spdk_min(num_cqe, device->attr.max_cqe);
+ - - + +
- + - +
- ]
1004 : 4 : }
1005 : :
1006 [ + + + - : 30 : if (rpoller->num_cqe != num_cqe) {
+ + ]
1007 [ + + + - : 24 : if (device->context->device->transport_type == IBV_TRANSPORT_IWARP) {
+ - + - +
- + - +
+ ]
1008 [ + - + - ]: 6 : SPDK_ERRLOG("iWARP doesn't support CQ resize. Current capacity %u, required %u\n"
1009 : : "Using CQ of insufficient size may lead to CQ overrun\n", rpoller->num_cqe, num_cqe);
1010 : 6 : return -1;
1011 : : }
1012 [ + + + - : 18 : if (required_num_wr > device->attr.max_cqe) {
+ - + + ]
1013 [ + - + - : 6 : SPDK_ERRLOG("RDMA CQE requirement (%d) exceeds device max_cqe limitation (%d)\n",
+ - ]
1014 : : required_num_wr, device->attr.max_cqe);
1015 : 6 : return -1;
1016 : : }
1017 : :
1018 [ + + + + : 12 : SPDK_DEBUGLOG(rdma, "Resize RDMA CQ from %d to %d\n", rpoller->num_cqe, num_cqe);
+ - # # #
# ]
1019 [ + - + - ]: 12 : rc = ibv_resize_cq(rpoller->cq, num_cqe);
1020 [ + + ]: 12 : if (rc) {
1021 [ + - + - ]: 6 : SPDK_ERRLOG("RDMA CQ resize failed: errno %d: %s\n", errno, spdk_strerror(errno));
1022 : 6 : return -1;
1023 : : }
1024 : :
1025 [ + - + - ]: 6 : rpoller->num_cqe = num_cqe;
1026 : 1 : }
1027 : :
1028 [ + - + - ]: 12 : rpoller->required_num_wr = required_num_wr;
1029 : 12 : return 0;
1030 : 5 : }
1031 : :
1032 : : static int
1033 : 2039 : nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
1034 : : {
1035 : 0 : struct spdk_nvmf_rdma_qpair *rqpair;
1036 : 0 : struct spdk_nvmf_rdma_transport *rtransport;
1037 : 0 : struct spdk_nvmf_transport *transport;
1038 : 0 : struct spdk_nvmf_rdma_resource_opts opts;
1039 : 0 : struct spdk_nvmf_rdma_device *device;
1040 : 2039 : struct spdk_rdma_qp_init_attr qp_init_attr = {};
1041 : :
1042 : 2039 : rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
1043 [ # # # # ]: 2039 : device = rqpair->device;
1044 : :
1045 : 2039 : qp_init_attr.qp_context = rqpair;
1046 [ # # # # : 2039 : qp_init_attr.pd = device->pd;
# # ]
1047 [ # # # # : 2039 : qp_init_attr.send_cq = rqpair->poller->cq;
# # # # #
# ]
1048 [ # # # # : 2039 : qp_init_attr.recv_cq = rqpair->poller->cq;
# # # # #
# ]
1049 : :
1050 [ + - # # : 2039 : if (rqpair->srq) {
# # ]
1051 [ # # # # : 2039 : qp_init_attr.srq = rqpair->srq->srq;
# # # # #
# ]
1052 : 0 : } else {
1053 [ # # # # : 0 : qp_init_attr.cap.max_recv_wr = rqpair->max_queue_depth;
# # # # ]
1054 : : }
1055 : :
1056 : : /* SEND, READ, and WRITE operations */
1057 [ # # # # : 2039 : qp_init_attr.cap.max_send_wr = (uint32_t)rqpair->max_queue_depth * 2;
# # # # ]
1058 [ # # # # : 2039 : qp_init_attr.cap.max_send_sge = spdk_min((uint32_t)device->attr.max_sge, NVMF_DEFAULT_TX_SGE);
# # # # #
# # # # #
# # # # ]
1059 [ # # # # : 2039 : qp_init_attr.cap.max_recv_sge = spdk_min((uint32_t)device->attr.max_sge, NVMF_DEFAULT_RX_SGE);
# # # # #
# # # # #
# # # # ]
1060 [ # # # # : 2039 : qp_init_attr.stats = &rqpair->poller->stat.qp_stats;
# # # # #
# ]
1061 : :
1062 [ - + - - : 2039 : if (rqpair->srq == NULL && nvmf_rdma_resize_cq(rqpair, device) < 0) {
# # # # ]
1063 : 0 : SPDK_ERRLOG("Failed to resize the completion queue. Cannot initialize qpair.\n");
1064 : 0 : goto error;
1065 : : }
1066 : :
1067 [ # # # # : 2039 : rqpair->rdma_qp = spdk_rdma_qp_create(rqpair->cm_id, &qp_init_attr);
# # # # ]
1068 [ - + # # : 2039 : if (!rqpair->rdma_qp) {
# # ]
1069 : 0 : goto error;
1070 : : }
1071 : :
1072 [ # # # # : 2039 : rqpair->qp_num = rqpair->rdma_qp->qp->qp_num;
# # # # #
# # # # #
# # ]
1073 : :
1074 [ # # # # : 2039 : rqpair->max_send_depth = spdk_min((uint32_t)(rqpair->max_queue_depth * 2),
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
1075 : : qp_init_attr.cap.max_send_wr);
1076 [ # # # # : 2039 : rqpair->max_send_sge = spdk_min(NVMF_DEFAULT_TX_SGE, qp_init_attr.cap.max_send_sge);
# # # # #
# # # #
# ]
1077 [ # # # # : 2039 : rqpair->max_recv_sge = spdk_min(NVMF_DEFAULT_RX_SGE, qp_init_attr.cap.max_recv_sge);
# # # # #
# # # #
# ]
1078 [ + + + - : 2039 : spdk_trace_record(TRACE_RDMA_QP_CREATE, 0, 0, (uintptr_t)rqpair);
# # # # #
# # # # #
# # # # ]
1079 [ - + - + : 2039 : SPDK_DEBUGLOG(rdma, "New RDMA Connection: %p\n", qpair);
# # ]
1080 : :
1081 [ - + # # : 2039 : if (rqpair->poller->srq == NULL) {
# # # # #
# ]
1082 [ # # # # ]: 0 : rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport);
1083 [ # # ]: 0 : transport = &rtransport->transport;
1084 : :
1085 [ # # # # : 0 : opts.qp = rqpair->rdma_qp;
# # ]
1086 [ # # # # : 0 : opts.map = device->map;
# # ]
1087 : 0 : opts.qpair = rqpair;
1088 [ # # ]: 0 : opts.shared = false;
1089 [ # # # # : 0 : opts.max_queue_depth = rqpair->max_queue_depth;
# # ]
1090 [ # # # # : 0 : opts.in_capsule_data_size = transport->opts.in_capsule_data_size;
# # # # ]
1091 : :
1092 [ # # # # ]: 0 : rqpair->resources = nvmf_rdma_resources_create(&opts);
1093 : :
1094 [ # # # # : 0 : if (!rqpair->resources) {
# # ]
1095 : 0 : SPDK_ERRLOG("Unable to allocate resources for receive queue.\n");
1096 [ # # # # ]: 0 : rdma_destroy_qp(rqpair->cm_id);
1097 : 0 : goto error;
1098 : : }
1099 : 0 : } else {
1100 [ # # # # : 2039 : rqpair->resources = rqpair->poller->resources;
# # # # #
# # # ]
1101 : : }
1102 : :
1103 [ # # # # ]: 2039 : rqpair->current_recv_depth = 0;
1104 [ # # # # : 2039 : STAILQ_INIT(&rqpair->pending_rdma_read_queue);
# # # # #
# # # # #
# # ]
1105 [ # # # # : 2039 : STAILQ_INIT(&rqpair->pending_rdma_write_queue);
# # # # #
# # # # #
# # ]
1106 [ # # # # : 2039 : STAILQ_INIT(&rqpair->pending_rdma_send_queue);
# # # # #
# # # # #
# # ]
1107 : :
1108 : 2039 : return 0;
1109 : :
1110 : 0 : error:
1111 [ # # # # ]: 0 : rdma_destroy_id(rqpair->cm_id);
1112 [ # # # # ]: 0 : rqpair->cm_id = NULL;
1113 : 0 : return -1;
1114 : 0 : }
1115 : :
1116 : : /* Append the given recv wr structure to the resource structs outstanding recvs list. */
1117 : : /* This function accepts either a single wr or the first wr in a linked list. */
1118 : : static void
1119 : 6673455 : nvmf_rdma_qpair_queue_recv_wrs(struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_recv_wr *first)
1120 : : {
1121 [ + - + - : 6673455 : struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport,
+ - ]
1122 : : struct spdk_nvmf_rdma_transport, transport);
1123 : :
1124 [ + + + - : 6673455 : if (rqpair->srq != NULL) {
+ - ]
1125 [ # # # # ]: 6673419 : spdk_rdma_srq_queue_recv_wrs(rqpair->srq, first);
1126 : 0 : } else {
1127 [ + - + - : 36 : if (spdk_rdma_qp_queue_recv_wrs(rqpair->rdma_qp, first)) {
- + ]
1128 [ + - + - : 36 : STAILQ_INSERT_TAIL(&rqpair->poller->qpairs_pending_recv, rqpair, recv_link);
+ - + - +
- + - + -
+ - + - +
- + - + -
+ - + - +
- + - ]
1129 : 6 : }
1130 : : }
1131 : :
1132 [ + + + + : 6673455 : if (rtransport->rdma_opts.no_wr_batching) {
+ - + - +
- ]
1133 [ # # # # ]: 0 : _poller_submit_recvs(rtransport, rqpair->poller);
1134 : 0 : }
1135 : 6673455 : }
1136 : :
1137 : : static int
1138 : 658865 : request_transfer_in(struct spdk_nvmf_request *req)
1139 : : {
1140 : 4 : struct spdk_nvmf_rdma_request *rdma_req;
1141 : 4 : struct spdk_nvmf_qpair *qpair;
1142 : 4 : struct spdk_nvmf_rdma_qpair *rqpair;
1143 : 4 : struct spdk_nvmf_rdma_transport *rtransport;
1144 : :
1145 [ + - + - ]: 658865 : qpair = req->qpair;
1146 : 658865 : rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req);
1147 : 658865 : rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
1148 [ + - + - : 658865 : rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport,
+ - ]
1149 : : struct spdk_nvmf_rdma_transport, transport);
1150 : :
1151 [ + + + - : 658865 : assert(req->xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER);
+ - # # ]
1152 [ + + # # ]: 658865 : assert(rdma_req != NULL);
1153 : :
1154 [ + + + - : 658865 : if (spdk_rdma_qp_queue_send_wrs(rqpair->rdma_qp, rdma_req->transfer_wr)) {
+ - + - -
+ ]
1155 [ + - + - : 65797 : STAILQ_INSERT_TAIL(&rqpair->poller->qpairs_pending_send, rqpair, send_link);
+ - + - +
- + - + -
+ - + - +
- + - + -
+ - + - +
- + - ]
1156 : 4 : }
1157 [ + + + + : 658865 : if (rtransport->rdma_opts.no_wr_batching) {
+ - + - +
- ]
1158 [ # # # # ]: 0 : _poller_submit_sends(rtransport, rqpair->poller);
1159 : 0 : }
1160 : :
1161 [ + + + - : 658865 : assert(rqpair->current_read_depth + rdma_req->num_outstanding_data_wr <= rqpair->max_read_depth);
+ - + - +
- + - + -
# # ]
1162 [ + - + - : 658865 : rqpair->current_read_depth += rdma_req->num_outstanding_data_wr;
+ - + - +
- ]
1163 [ + + + - : 658865 : assert(rqpair->current_send_depth + rdma_req->num_outstanding_data_wr <= rqpair->max_send_depth);
+ - + - +
- + - + -
# # ]
1164 [ + - + - : 658865 : rqpair->current_send_depth += rdma_req->num_outstanding_data_wr;
+ - + - ]
1165 : 658865 : return 0;
1166 : 4 : }
1167 : :
1168 : : static inline void
1169 : 12203 : nvmf_rdma_request_reset_transfer_in(struct spdk_nvmf_rdma_request *rdma_req,
1170 : : struct spdk_nvmf_rdma_transport *rtransport)
1171 : : {
1172 : : /* Put completed WRs back to pool and move transfer_wr pointer */
1173 [ # # # # : 12203 : _nvmf_rdma_request_free_data(rdma_req, rdma_req->transfer_wr, rtransport->data_wr_pool);
# # # # ]
1174 [ # # # # : 12203 : rdma_req->transfer_wr = rdma_req->remaining_tranfer_in_wrs;
# # # # ]
1175 [ # # # # ]: 12203 : rdma_req->remaining_tranfer_in_wrs = NULL;
1176 [ # # # # : 12203 : rdma_req->num_outstanding_data_wr = rdma_req->num_remaining_data_wr;
# # # # ]
1177 [ # # # # ]: 12203 : rdma_req->num_remaining_data_wr = 0;
1178 : 12203 : }
1179 : :
1180 : : static inline int
1181 : 12203 : request_prepare_transfer_in_part(struct spdk_nvmf_request *req, uint32_t num_reads_available)
1182 : : {
1183 : 0 : struct spdk_nvmf_rdma_request *rdma_req;
1184 : 0 : struct ibv_send_wr *wr;
1185 : 0 : uint32_t i;
1186 : :
1187 : 12203 : rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req);
1188 : :
1189 [ - + # # : 12203 : assert(req->xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER);
# # # # ]
1190 [ - + # # ]: 12203 : assert(rdma_req != NULL);
1191 [ - + # # ]: 12203 : assert(num_reads_available > 0);
1192 [ - + # # : 12203 : assert(rdma_req->num_outstanding_data_wr > num_reads_available);
# # # # ]
1193 [ # # # # ]: 12203 : wr = rdma_req->transfer_wr;
1194 : :
1195 [ + + ]: 33425 : for (i = 0; i < num_reads_available - 1; i++) {
1196 [ # # # # ]: 21222 : wr = wr->next;
1197 : 0 : }
1198 : :
1199 [ # # # # : 12203 : rdma_req->remaining_tranfer_in_wrs = wr->next;
# # # # ]
1200 [ # # # # : 12203 : rdma_req->num_remaining_data_wr = rdma_req->num_outstanding_data_wr - num_reads_available;
# # # # ]
1201 [ # # # # ]: 12203 : rdma_req->num_outstanding_data_wr = num_reads_available;
1202 : : /* Break chain of WRs to send only part. Once this portion completes, we continue sending RDMA_READs */
1203 [ # # # # ]: 12203 : wr->next = NULL;
1204 : :
1205 : 12203 : return 0;
1206 : 0 : }
1207 : :
1208 : : static int
1209 : 6673455 : request_transfer_out(struct spdk_nvmf_request *req, int *data_posted)
1210 : : {
1211 : 6673455 : int num_outstanding_data_wr = 0;
1212 : 6 : struct spdk_nvmf_rdma_request *rdma_req;
1213 : 6 : struct spdk_nvmf_qpair *qpair;
1214 : 6 : struct spdk_nvmf_rdma_qpair *rqpair;
1215 : 6 : struct spdk_nvme_cpl *rsp;
1216 : 6673455 : struct ibv_send_wr *first = NULL;
1217 : 6 : struct spdk_nvmf_rdma_transport *rtransport;
1218 : :
1219 [ + - ]: 6673455 : *data_posted = 0;
1220 [ + - + - ]: 6673455 : qpair = req->qpair;
1221 [ + - + - : 6673455 : rsp = &req->rsp->nvme_cpl;
+ - ]
1222 : 6673455 : rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req);
1223 : 6673455 : rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
1224 [ + - + - : 6673455 : rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport,
+ - ]
1225 : : struct spdk_nvmf_rdma_transport, transport);
1226 : :
1227 : : /* Advance our sq_head pointer */
1228 [ + + + - : 6673455 : if (qpair->sq_head == qpair->sq_head_max) {
+ - + - +
- ]
1229 [ + - + - ]: 55053 : qpair->sq_head = 0;
1230 : 6 : } else {
1231 [ # # ]: 6618402 : qpair->sq_head++;
1232 : : }
1233 [ + - + - : 6673455 : rsp->sqhd = qpair->sq_head;
+ - + - ]
1234 : :
1235 : : /* queue the capsule for the recv buffer */
1236 [ + + + - : 6673455 : assert(rdma_req->recv != NULL);
+ - # # ]
1237 : :
1238 [ + - + - : 6673455 : nvmf_rdma_qpair_queue_recv_wrs(rqpair, &rdma_req->recv->wr);
+ - ]
1239 : :
1240 [ + - + - ]: 6673455 : rdma_req->recv = NULL;
1241 [ + + + - : 6673455 : assert(rqpair->current_recv_depth > 0);
+ - # # ]
1242 [ + - ]: 6673455 : rqpair->current_recv_depth--;
1243 : :
1244 : : /* Build the response which consists of optional
1245 : : * RDMA WRITEs to transfer data, plus an RDMA SEND
1246 : : * containing the response.
1247 : : */
1248 [ + - + - ]: 6673455 : first = &rdma_req->rsp.wr;
1249 : :
1250 [ + + + - : 6673455 : if (rsp->status.sc != SPDK_NVME_SC_SUCCESS) {
+ - + + ]
1251 : : /* On failure, data was not read from the controller. So clear the
1252 : : * number of outstanding data WRs to zero.
1253 : : */
1254 [ + - + - ]: 1318651 : rdma_req->num_outstanding_data_wr = 0;
1255 [ + + + - : 5354805 : } else if (req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
+ + ]
1256 [ + - + - ]: 3334353 : first = rdma_req->transfer_wr;
1257 [ + - ]: 3334353 : *data_posted = 1;
1258 [ + - + - ]: 3334353 : num_outstanding_data_wr = rdma_req->num_outstanding_data_wr;
1259 : 1 : }
1260 [ + + + - : 6673455 : if (spdk_rdma_qp_queue_send_wrs(rqpair->rdma_qp, first)) {
- + ]
1261 [ + - + - : 1081801 : STAILQ_INSERT_TAIL(&rqpair->poller->qpairs_pending_send, rqpair, send_link);
+ - + - +
- + - + -
+ - + - +
- + - + -
+ - + - +
- + - ]
1262 : 6 : }
1263 [ + + + + : 6673455 : if (rtransport->rdma_opts.no_wr_batching) {
+ - + - +
- ]
1264 [ # # # # ]: 0 : _poller_submit_sends(rtransport, rqpair->poller);
1265 : 0 : }
1266 : :
1267 : : /* +1 for the rsp wr */
1268 [ + + + - : 6673455 : assert(rqpair->current_send_depth + num_outstanding_data_wr + 1 <= rqpair->max_send_depth);
+ - + - +
- # # ]
1269 [ + - + - : 6673455 : rqpair->current_send_depth += num_outstanding_data_wr + 1;
+ - ]
1270 : :
1271 : 6673455 : return 0;
1272 : 6 : }
1273 : :
1274 : : static int
1275 : 2039 : nvmf_rdma_event_accept(struct rdma_cm_id *id, struct spdk_nvmf_rdma_qpair *rqpair)
1276 : : {
1277 : 0 : struct spdk_nvmf_rdma_accept_private_data accept_data;
1278 : 2039 : struct rdma_conn_param ctrlr_event_data = {};
1279 : 0 : int rc;
1280 : :
1281 : 2039 : accept_data.recfmt = 0;
1282 [ # # # # ]: 2039 : accept_data.crqsize = rqpair->max_queue_depth;
1283 : :
1284 : 2039 : ctrlr_event_data.private_data = &accept_data;
1285 [ # # ]: 2039 : ctrlr_event_data.private_data_len = sizeof(accept_data);
1286 [ + - # # : 2039 : if (id->ps == RDMA_PS_TCP) {
# # ]
1287 : 2039 : ctrlr_event_data.responder_resources = 0; /* We accept 0 reads from the host */
1288 [ # # # # : 2039 : ctrlr_event_data.initiator_depth = rqpair->max_read_depth;
# # ]
1289 : 0 : }
1290 : :
1291 : : /* Configure infinite retries for the initiator side qpair.
1292 : : * We need to pass this value to the initiator to prevent the
1293 : : * initiator side NIC from completing SEND requests back to the
1294 : : * initiator with status rnr_retry_count_exceeded. */
1295 : 2039 : ctrlr_event_data.rnr_retry_count = 0x7;
1296 : :
1297 : : /* When qpair is created without use of rdma cm API, an additional
1298 : : * information must be provided to initiator in the connection response:
1299 : : * whether qpair is using SRQ and its qp_num
1300 : : * Fields below are ignored by rdma cm if qpair has been
1301 : : * created using rdma cm API. */
1302 [ # # # # : 2039 : ctrlr_event_data.srq = rqpair->srq ? 1 : 0;
# # ]
1303 [ # # # # : 2039 : ctrlr_event_data.qp_num = rqpair->qp_num;
# # ]
1304 : :
1305 [ # # # # ]: 2039 : rc = spdk_rdma_qp_accept(rqpair->rdma_qp, &ctrlr_event_data);
1306 [ - + ]: 2039 : if (rc) {
1307 [ # # ]: 0 : SPDK_ERRLOG("Error %d on spdk_rdma_qp_accept\n", errno);
1308 : 0 : } else {
1309 [ - + - + : 2039 : SPDK_DEBUGLOG(rdma, "Sent back the accept\n");
# # ]
1310 : : }
1311 : :
1312 : 2039 : return rc;
1313 : 0 : }
1314 : :
1315 : : static void
1316 : 0 : nvmf_rdma_event_reject(struct rdma_cm_id *id, enum spdk_nvmf_rdma_transport_error error)
1317 : : {
1318 : 0 : struct spdk_nvmf_rdma_reject_private_data rej_data;
1319 : :
1320 : 0 : rej_data.recfmt = 0;
1321 : 0 : rej_data.sts = error;
1322 : :
1323 : 0 : rdma_reject(id, &rej_data, sizeof(rej_data));
1324 : 0 : }
1325 : :
1326 : : static int
1327 : 2039 : nvmf_rdma_connect(struct spdk_nvmf_transport *transport, struct rdma_cm_event *event)
1328 : : {
1329 : 0 : struct spdk_nvmf_rdma_transport *rtransport;
1330 : 2039 : struct spdk_nvmf_rdma_qpair *rqpair = NULL;
1331 : 0 : struct spdk_nvmf_rdma_port *port;
1332 : 2039 : struct rdma_conn_param *rdma_param = NULL;
1333 : 2039 : const struct spdk_nvmf_rdma_request_private_data *private_data = NULL;
1334 : 0 : uint16_t max_queue_depth;
1335 : 0 : uint16_t max_read_depth;
1336 : :
1337 : 2039 : rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
1338 : :
1339 [ - + # # : 2039 : assert(event->id != NULL); /* Impossible. Can't even reject the connection. */
# # # # ]
1340 [ - + # # : 2039 : assert(event->id->verbs != NULL); /* Impossible. No way to handle this. */
# # # # #
# # # ]
1341 : :
1342 [ # # # # ]: 2039 : rdma_param = &event->param.conn;
1343 [ + - # # : 2039 : if (rdma_param->private_data == NULL ||
# # # # ]
1344 [ - + # # ]: 2039 : rdma_param->private_data_len < sizeof(struct spdk_nvmf_rdma_request_private_data)) {
1345 : 0 : SPDK_ERRLOG("connect request: no private data provided\n");
1346 [ # # # # ]: 0 : nvmf_rdma_event_reject(event->id, SPDK_NVMF_RDMA_ERROR_INVALID_PRIVATE_DATA_LENGTH);
1347 : 0 : return -1;
1348 : : }
1349 : :
1350 [ # # # # ]: 2039 : private_data = rdma_param->private_data;
1351 [ - + # # : 2039 : if (private_data->recfmt != 0) {
# # ]
1352 : 0 : SPDK_ERRLOG("Received RDMA private data with RECFMT != 0\n");
1353 [ # # # # ]: 0 : nvmf_rdma_event_reject(event->id, SPDK_NVMF_RDMA_ERROR_INVALID_RECFMT);
1354 : 0 : return -1;
1355 : : }
1356 : :
1357 [ - + - + : 2039 : SPDK_DEBUGLOG(rdma, "Connect Recv on fabric intf name %s, dev_name %s\n",
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # #
# ]
1358 : : event->id->verbs->device->name, event->id->verbs->device->dev_name);
1359 : :
1360 [ # # # # : 2039 : port = event->listen_id->context;
# # # # ]
1361 [ - + - + : 2039 : SPDK_DEBUGLOG(rdma, "Listen Id was %p with verbs %p. ListenAddr: %p\n",
# # # # #
# # # # #
# # # # ]
1362 : : event->listen_id, event->listen_id->verbs, port);
1363 : :
1364 : : /* Figure out the supported queue depth. This is a multi-step process
1365 : : * that takes into account hardware maximums, host provided values,
1366 : : * and our target's internal memory limits */
1367 : :
1368 [ - + - + : 2039 : SPDK_DEBUGLOG(rdma, "Calculating Queue Depth\n");
# # ]
1369 : :
1370 : : /* Start with the maximum queue depth allowed by the target */
1371 [ # # # # : 2039 : max_queue_depth = rtransport->transport.opts.max_queue_depth;
# # # # ]
1372 [ # # # # : 2039 : max_read_depth = rtransport->transport.opts.max_queue_depth;
# # # # ]
1373 [ - + - + : 2039 : SPDK_DEBUGLOG(rdma, "Target Max Queue Depth: %d\n",
# # # # #
# # # #
# ]
1374 : : rtransport->transport.opts.max_queue_depth);
1375 : :
1376 : : /* Next check the local NIC's hardware limitations */
1377 [ - + - + : 2039 : SPDK_DEBUGLOG(rdma,
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
1378 : : "Local NIC Max Send/Recv Queue Depth: %d Max Read/Write Queue Depth: %d\n",
1379 : : port->device->attr.max_qp_wr, port->device->attr.max_qp_rd_atom);
1380 [ # # # # : 2039 : max_queue_depth = spdk_min(max_queue_depth, port->device->attr.max_qp_wr);
# # # # #
# # # # #
# # # # #
# # # ]
1381 [ # # # # : 2039 : max_read_depth = spdk_min(max_read_depth, port->device->attr.max_qp_init_rd_atom);
# # # # #
# # # # #
# # # # #
# # # ]
1382 : :
1383 : : /* Next check the remote NIC's hardware limitations */
1384 [ - + - + : 2039 : SPDK_DEBUGLOG(rdma,
# # # # #
# # # #
# ]
1385 : : "Host (Initiator) NIC Max Incoming RDMA R/W operations: %d Max Outgoing RDMA R/W operations: %d\n",
1386 : : rdma_param->initiator_depth, rdma_param->responder_resources);
1387 : : /* from man3 rdma_get_cm_event
1388 : : * responder_resources - Specifies the number of responder resources that is requested by the recipient.
1389 : : * The responder_resources field must match the initiator depth specified by the remote node when running
1390 : : * the rdma_connect and rdma_accept functions. */
1391 [ - + # # : 2039 : if (rdma_param->responder_resources != 0) {
# # ]
1392 [ # # # # : 0 : if (private_data->qid) {
# # ]
1393 [ # # # # : 0 : SPDK_DEBUGLOG(rdma, "Host (Initiator) is not allowed to use RDMA operations,"
# # # # #
# ]
1394 : : " responder_resources must be 0 but set to %u\n",
1395 : : rdma_param->responder_resources);
1396 : 0 : } else {
1397 [ # # # # ]: 0 : SPDK_WARNLOG("Host (Initiator) is not allowed to use RDMA operations,"
1398 : : " responder_resources must be 0 but set to %u\n",
1399 : : rdma_param->responder_resources);
1400 : : }
1401 : 0 : }
1402 : : /* from man3 rdma_get_cm_event
1403 : : * initiator_depth - Specifies the maximum number of outstanding RDMA read operations that the recipient holds.
1404 : : * The initiator_depth field must match the responder resources specified by the remote node when running
1405 : : * the rdma_connect and rdma_accept functions. */
1406 [ - + # # : 2039 : if (rdma_param->initiator_depth == 0) {
# # ]
1407 : 0 : SPDK_ERRLOG("Host (Initiator) doesn't support RDMA_READ or atomic operations\n");
1408 [ # # # # ]: 0 : nvmf_rdma_event_reject(event->id, SPDK_NVMF_RDMA_ERROR_INVALID_IRD);
1409 : 0 : return -1;
1410 : : }
1411 [ # # # # : 2039 : max_read_depth = spdk_min(max_read_depth, rdma_param->initiator_depth);
# # # # #
# ]
1412 : :
1413 [ - + - + : 2039 : SPDK_DEBUGLOG(rdma, "Host Receive Queue Size: %d\n", private_data->hrqsize);
# # # # #
# ]
1414 [ - + - + : 2039 : SPDK_DEBUGLOG(rdma, "Host Send Queue Size: %d\n", private_data->hsqsize);
# # # # #
# ]
1415 [ # # # # : 2039 : max_queue_depth = spdk_min(max_queue_depth, private_data->hrqsize);
# # # # #
# ]
1416 [ + + # # : 2039 : max_queue_depth = spdk_min(max_queue_depth, private_data->hsqsize + 1);
# # # # #
# # # #
# ]
1417 : :
1418 [ - + - + : 2039 : SPDK_DEBUGLOG(rdma, "Final Negotiated Queue Depth: %d R/W Depth: %d\n",
# # ]
1419 : : max_queue_depth, max_read_depth);
1420 : :
1421 : 2039 : rqpair = calloc(1, sizeof(struct spdk_nvmf_rdma_qpair));
1422 [ - + ]: 2039 : if (rqpair == NULL) {
1423 : 0 : SPDK_ERRLOG("Could not allocate new connection.\n");
1424 [ # # # # ]: 0 : nvmf_rdma_event_reject(event->id, SPDK_NVMF_RDMA_ERROR_NO_RESOURCES);
1425 : 0 : return -1;
1426 : : }
1427 : :
1428 [ # # # # : 2039 : rqpair->device = port->device;
# # # # ]
1429 [ # # # # ]: 2039 : rqpair->max_queue_depth = max_queue_depth;
1430 [ # # # # ]: 2039 : rqpair->max_read_depth = max_read_depth;
1431 [ # # # # : 2039 : rqpair->cm_id = event->id;
# # # # ]
1432 [ # # # # : 2039 : rqpair->listen_id = event->listen_id;
# # # # ]
1433 [ # # # # : 2039 : rqpair->qpair.transport = transport;
# # ]
1434 [ # # # # : 2039 : STAILQ_INIT(&rqpair->ibv_events);
# # # # #
# # # # #
# # ]
1435 : : /* use qid from the private data to determine the qpair type
1436 : : qid will be set to the appropriate value when the controller is created */
1437 [ # # # # : 2039 : rqpair->qpair.qid = private_data->qid;
# # # # #
# ]
1438 : :
1439 [ # # # # : 2039 : event->id->context = &rqpair->qpair;
# # # # #
# ]
1440 : :
1441 [ # # # # : 2039 : spdk_nvmf_tgt_new_qpair(transport->tgt, &rqpair->qpair);
# # ]
1442 : :
1443 : 2039 : return 0;
1444 : 0 : }
1445 : :
1446 : : static inline void
1447 : 5982046 : nvmf_rdma_setup_wr(struct ibv_send_wr *wr, struct ibv_send_wr *next,
1448 : : enum spdk_nvme_data_transfer xfer)
1449 : : {
1450 [ + + ]: 5982046 : if (xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
1451 [ + - + - ]: 4355753 : wr->opcode = IBV_WR_RDMA_WRITE;
1452 [ + - + - ]: 4355753 : wr->send_flags = 0;
1453 [ + - + - ]: 4355753 : wr->next = next;
1454 [ + - ]: 1626317 : } else if (xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
1455 [ + - + - ]: 1626293 : wr->opcode = IBV_WR_RDMA_READ;
1456 [ + - + - ]: 1626293 : wr->send_flags = IBV_SEND_SIGNALED;
1457 [ + - + - ]: 1626293 : wr->next = NULL;
1458 : 4 : } else {
1459 [ # # ]: 0 : assert(0);
1460 : : }
1461 : 5982046 : }
1462 : :
1463 : : static int
1464 : 78086 : nvmf_request_alloc_wrs(struct spdk_nvmf_rdma_transport *rtransport,
1465 : : struct spdk_nvmf_rdma_request *rdma_req,
1466 : : uint32_t num_sgl_descriptors)
1467 : : {
1468 : 30 : struct spdk_nvmf_rdma_request_data *work_requests[SPDK_NVMF_MAX_SGL_ENTRIES];
1469 : 6 : struct spdk_nvmf_rdma_request_data *current_data_wr;
1470 : 6 : uint32_t i;
1471 : :
1472 [ - + ]: 78086 : if (num_sgl_descriptors > SPDK_NVMF_MAX_SGL_ENTRIES) {
1473 : 0 : SPDK_ERRLOG("Requested too much entries (%u), the limit is %u\n",
1474 : : num_sgl_descriptors, SPDK_NVMF_MAX_SGL_ENTRIES);
1475 : 0 : return -EINVAL;
1476 : : }
1477 : :
1478 [ + + + - : 78086 : if (spdk_mempool_get_bulk(rtransport->data_wr_pool, (void **)work_requests, num_sgl_descriptors)) {
+ - ]
1479 : 0 : return -ENOMEM;
1480 : : }
1481 : :
1482 [ + - ]: 78086 : current_data_wr = &rdma_req->data;
1483 : :
1484 [ + + ]: 444868 : for (i = 0; i < num_sgl_descriptors; i++) {
1485 [ + - + - : 366782 : nvmf_rdma_setup_wr(¤t_data_wr->wr, &work_requests[i]->wr, rdma_req->req.xfer);
+ - + - +
- + - + -
+ - ]
1486 [ + - + - : 366782 : current_data_wr->wr.next = &work_requests[i]->wr;
+ - + - +
- + - +
- ]
1487 [ + - + - : 366782 : current_data_wr = work_requests[i];
+ - ]
1488 [ + - + - : 366782 : current_data_wr->wr.sg_list = current_data_wr->sgl;
+ - + - ]
1489 [ + - + - : 366782 : current_data_wr->wr.wr_id = rdma_req->data.wr.wr_id;
+ - + - +
- + - +
- ]
1490 : 6 : }
1491 : :
1492 [ + - + - : 78086 : nvmf_rdma_setup_wr(¤t_data_wr->wr, &rdma_req->rsp.wr, rdma_req->req.xfer);
+ - + - +
- + - ]
1493 : :
1494 : 78086 : return 0;
1495 : 6 : }
1496 : :
1497 : : static inline void
1498 : 5537178 : nvmf_rdma_setup_request(struct spdk_nvmf_rdma_request *rdma_req)
1499 : : {
1500 [ + - + - ]: 5537178 : struct ibv_send_wr *wr = &rdma_req->data.wr;
1501 [ + - + - : 5537178 : struct spdk_nvme_sgl_descriptor *sgl = &rdma_req->req.cmd->nvme_cmd.dptr.sgl1;
+ - + - +
- + - ]
1502 : :
1503 [ + - + - : 5537178 : wr->wr.rdma.rkey = sgl->keyed.key;
+ - + - +
- + - +
- ]
1504 [ + - + - : 5537178 : wr->wr.rdma.remote_addr = sgl->address;
+ - + - +
- + - ]
1505 [ + - + - : 5537178 : nvmf_rdma_setup_wr(wr, &rdma_req->rsp.wr, rdma_req->req.xfer);
+ - + - +
- ]
1506 : 5537178 : }
1507 : :
1508 : : static inline void
1509 : 6 : nvmf_rdma_update_remote_addr(struct spdk_nvmf_rdma_request *rdma_req, uint32_t num_wrs)
1510 : : {
1511 [ + - + - ]: 6 : struct ibv_send_wr *wr = &rdma_req->data.wr;
1512 [ + - + - : 6 : struct spdk_nvme_sgl_descriptor *sgl = &rdma_req->req.cmd->nvme_cmd.dptr.sgl1;
+ - + - +
- + - ]
1513 : 1 : uint32_t i;
1514 : 1 : int j;
1515 : 6 : uint64_t remote_addr_offset = 0;
1516 : :
1517 [ + + ]: 18 : for (i = 0; i < num_wrs; ++i) {
1518 [ + - + - : 12 : wr->wr.rdma.rkey = sgl->keyed.key;
+ - + - +
- + - +
- ]
1519 [ + - + - : 12 : wr->wr.rdma.remote_addr = sgl->address + remote_addr_offset;
+ - + - +
- + - ]
1520 [ + + + - : 114 : for (j = 0; j < wr->num_sge; ++j) {
+ + + - ]
1521 [ + - + - : 102 : remote_addr_offset += wr->sg_list[j].length;
+ - + - +
- ]
1522 : 17 : }
1523 [ + - + - ]: 12 : wr = wr->next;
1524 : 2 : }
1525 : 6 : }
1526 : :
1527 : : static int
1528 : 4535998 : nvmf_rdma_fill_wr_sgl(struct spdk_nvmf_rdma_poll_group *rgroup,
1529 : : struct spdk_nvmf_rdma_device *device,
1530 : : struct spdk_nvmf_rdma_request *rdma_req,
1531 : : struct ibv_send_wr *wr,
1532 : : uint32_t total_length)
1533 : : {
1534 : 75 : struct spdk_rdma_memory_translation mem_translation;
1535 : 15 : struct ibv_sge *sg_ele;
1536 : 15 : struct iovec *iov;
1537 : 15 : uint32_t lkey, remaining;
1538 : 15 : int rc;
1539 : :
1540 [ + - + - ]: 4535998 : wr->num_sge = 0;
1541 : :
1542 [ + + + - : 21930295 : while (total_length && wr->num_sge < SPDK_NVMF_MAX_SGL_ENTRIES) {
+ - + + ]
1543 [ + - + - : 17394297 : iov = &rdma_req->req.iov[rdma_req->iovpos];
+ - + - +
- + - ]
1544 [ + - + - : 17394297 : rc = spdk_rdma_get_translation(device->map, iov->iov_base, iov->iov_len, &mem_translation);
+ - + - +
- + - ]
1545 [ + + ]: 17394297 : if (spdk_unlikely(rc)) {
1546 : 0 : return rc;
1547 : : }
1548 : :
1549 : 17394297 : lkey = spdk_rdma_memory_translation_get_lkey(&mem_translation);
1550 [ + - + - : 17394297 : sg_ele = &wr->sg_list[wr->num_sge];
+ - + - +
- ]
1551 [ + - + - : 17394297 : remaining = spdk_min((uint32_t)iov->iov_len - rdma_req->offset, total_length);
+ - + - +
+ + - + -
+ - + - ]
1552 : :
1553 [ + - + - ]: 17394297 : sg_ele->lkey = lkey;
1554 [ + - + - : 17394297 : sg_ele->addr = (uintptr_t)iov->iov_base + rdma_req->offset;
+ - + - +
- + - ]
1555 [ + - + - ]: 17394297 : sg_ele->length = remaining;
1556 [ + + + + : 17394297 : SPDK_DEBUGLOG(rdma, "sge[%d] %p addr 0x%"PRIx64", len %u\n", wr->num_sge, sg_ele, sg_ele->addr,
+ - # # #
# # # # #
# # # # ]
1557 : : sg_ele->length);
1558 [ + - + - : 17394297 : rdma_req->offset += sg_ele->length;
+ - + - ]
1559 [ + - + - ]: 17394297 : total_length -= sg_ele->length;
1560 [ + - + - ]: 17394297 : wr->num_sge++;
1561 : :
1562 [ + + + - : 17394297 : if (rdma_req->offset == iov->iov_len) {
+ - + - +
+ ]
1563 [ + - + - ]: 17304413 : rdma_req->offset = 0;
1564 [ + - ]: 17304413 : rdma_req->iovpos++;
1565 : 57 : }
1566 : : }
1567 : :
1568 [ - + ]: 4535998 : if (total_length) {
1569 : 0 : SPDK_ERRLOG("Not enough SG entries to hold data buffer\n");
1570 : 0 : return -EINVAL;
1571 : : }
1572 : :
1573 : 4535998 : return 0;
1574 : 15 : }
1575 : :
1576 : : static int
1577 : 60 : nvmf_rdma_fill_wr_sgl_with_dif(struct spdk_nvmf_rdma_poll_group *rgroup,
1578 : : struct spdk_nvmf_rdma_device *device,
1579 : : struct spdk_nvmf_rdma_request *rdma_req,
1580 : : struct ibv_send_wr *wr,
1581 : : uint32_t total_length,
1582 : : uint32_t num_extra_wrs)
1583 : : {
1584 : 50 : struct spdk_rdma_memory_translation mem_translation;
1585 [ + - + - : 60 : struct spdk_dif_ctx *dif_ctx = &rdma_req->req.dif.dif_ctx;
+ - ]
1586 : 10 : struct ibv_sge *sg_ele;
1587 : 10 : struct iovec *iov;
1588 : 10 : struct iovec *rdma_iov;
1589 : 10 : uint32_t lkey, remaining;
1590 : 10 : uint32_t remaining_data_block, data_block_size, md_size;
1591 : 10 : uint32_t sge_len;
1592 : 10 : int rc;
1593 : :
1594 [ + - + - : 60 : data_block_size = dif_ctx->block_size - dif_ctx->md_size;
+ - + - ]
1595 : :
1596 [ + + + - : 60 : if (spdk_likely(!rdma_req->req.stripped_data)) {
+ - + + ]
1597 [ + - + - ]: 30 : rdma_iov = rdma_req->req.iov;
1598 : 30 : remaining_data_block = data_block_size;
1599 [ + - + - ]: 30 : md_size = dif_ctx->md_size;
1600 : 5 : } else {
1601 [ + - + - : 30 : rdma_iov = rdma_req->req.stripped_data->iov;
+ - + - ]
1602 [ + + + - : 30 : total_length = total_length / dif_ctx->block_size * data_block_size;
+ - ]
1603 : 30 : remaining_data_block = total_length;
1604 : 30 : md_size = 0;
1605 : : }
1606 : :
1607 [ + - + - ]: 60 : wr->num_sge = 0;
1608 : :
1609 [ + + + + : 165 : while (total_length && (num_extra_wrs || wr->num_sge < SPDK_NVMF_MAX_SGL_ENTRIES)) {
+ - + - +
+ ]
1610 [ + - + - : 90 : iov = rdma_iov + rdma_req->iovpos;
+ - ]
1611 [ + - + - : 90 : rc = spdk_rdma_get_translation(device->map, iov->iov_base, iov->iov_len, &mem_translation);
+ - + - +
- + - ]
1612 [ + + ]: 90 : if (spdk_unlikely(rc)) {
1613 : 0 : return rc;
1614 : : }
1615 : :
1616 : 90 : lkey = spdk_rdma_memory_translation_get_lkey(&mem_translation);
1617 [ + - + - : 90 : sg_ele = &wr->sg_list[wr->num_sge];
+ - + - +
- ]
1618 [ + - + - : 90 : remaining = spdk_min((uint32_t)iov->iov_len - rdma_req->offset, total_length);
+ - + - +
+ + - + -
+ - + - ]
1619 : :
1620 [ + + ]: 318 : while (remaining) {
1621 [ + + + - : 228 : if (wr->num_sge >= SPDK_NVMF_MAX_SGL_ENTRIES) {
+ + ]
1622 [ + - + - : 6 : if (num_extra_wrs > 0 && wr->next) {
+ - + - ]
1623 [ + - + - ]: 6 : wr = wr->next;
1624 [ + - + - ]: 6 : wr->num_sge = 0;
1625 [ + - + - : 6 : sg_ele = &wr->sg_list[wr->num_sge];
+ - + - +
- ]
1626 : 6 : num_extra_wrs--;
1627 : 1 : } else {
1628 : 0 : break;
1629 : : }
1630 : 1 : }
1631 [ + - + - ]: 228 : sg_ele->lkey = lkey;
1632 [ + - + - : 228 : sg_ele->addr = (uintptr_t)((char *)iov->iov_base + rdma_req->offset);
+ - + - +
- + - +
- ]
1633 [ + + ]: 228 : sge_len = spdk_min(remaining, remaining_data_block);
1634 [ + - + - ]: 228 : sg_ele->length = sge_len;
1635 [ + + + + : 228 : SPDK_DEBUGLOG(rdma, "sge[%d] %p addr 0x%"PRIx64", len %u\n", wr->num_sge, sg_ele,
+ - # # #
# # # # #
# # # # ]
1636 : : sg_ele->addr, sg_ele->length);
1637 : 228 : remaining -= sge_len;
1638 : 228 : remaining_data_block -= sge_len;
1639 [ + - + - ]: 228 : rdma_req->offset += sge_len;
1640 : 228 : total_length -= sge_len;
1641 : :
1642 [ + - ]: 228 : sg_ele++;
1643 [ + - + - ]: 228 : wr->num_sge++;
1644 : :
1645 [ + + ]: 228 : if (remaining_data_block == 0) {
1646 : : /* skip metadata */
1647 [ + - + - ]: 204 : rdma_req->offset += md_size;
1648 : 204 : total_length -= md_size;
1649 : : /* Metadata that do not fit this IO buffer will be included in the next IO buffer */
1650 [ + + ]: 204 : remaining -= spdk_min(remaining, md_size);
1651 : 204 : remaining_data_block = data_block_size;
1652 : 34 : }
1653 : :
1654 [ + + ]: 228 : if (remaining == 0) {
1655 : : /* By subtracting the size of the last IOV from the offset, we ensure that we skip
1656 : : the remaining metadata bits at the beginning of the next buffer */
1657 [ + - + - : 90 : rdma_req->offset -= spdk_min(iov->iov_len, rdma_req->offset);
+ - + - +
+ + - + -
+ - + - +
- + - ]
1658 [ + - ]: 90 : rdma_req->iovpos++;
1659 : 15 : }
1660 : : }
1661 : : }
1662 : :
1663 [ - + ]: 60 : if (total_length) {
1664 : 0 : SPDK_ERRLOG("Not enough SG entries to hold data buffer\n");
1665 : 0 : return -EINVAL;
1666 : : }
1667 : :
1668 : 60 : return 0;
1669 : 10 : }
1670 : :
1671 : : static inline uint32_t
1672 : 48 : nvmf_rdma_calc_num_wrs(uint32_t length, uint32_t io_unit_size, uint32_t block_size)
1673 : : {
1674 : : /* estimate the number of SG entries and WRs needed to process the request */
1675 : 48 : uint32_t num_sge = 0;
1676 : 8 : uint32_t i;
1677 [ + + ]: 48 : uint32_t num_buffers = SPDK_CEIL_DIV(length, io_unit_size);
1678 : :
1679 [ + + + + ]: 138 : for (i = 0; i < num_buffers && length > 0; i++) {
1680 [ + + ]: 90 : uint32_t buffer_len = spdk_min(length, io_unit_size);
1681 [ + + ]: 90 : uint32_t num_sge_in_block = SPDK_CEIL_DIV(buffer_len, block_size);
1682 : :
1683 [ + + ]: 90 : if (num_sge_in_block * block_size > buffer_len) {
1684 : 66 : ++num_sge_in_block;
1685 : 11 : }
1686 : 90 : num_sge += num_sge_in_block;
1687 : 90 : length -= buffer_len;
1688 : 15 : }
1689 [ + - ]: 48 : return SPDK_CEIL_DIV(num_sge, SPDK_NVMF_MAX_SGL_ENTRIES);
1690 : 8 : }
1691 : :
1692 : : static int
1693 : 5537178 : nvmf_rdma_request_fill_iovs(struct spdk_nvmf_rdma_transport *rtransport,
1694 : : struct spdk_nvmf_rdma_device *device,
1695 : : struct spdk_nvmf_rdma_request *rdma_req)
1696 : : {
1697 : 16 : struct spdk_nvmf_rdma_qpair *rqpair;
1698 : 16 : struct spdk_nvmf_rdma_poll_group *rgroup;
1699 [ + - ]: 5537178 : struct spdk_nvmf_request *req = &rdma_req->req;
1700 [ + - + - ]: 5537178 : struct ibv_send_wr *wr = &rdma_req->data.wr;
1701 : 16 : int rc;
1702 : 5537178 : uint32_t num_wrs = 1;
1703 : 16 : uint32_t length;
1704 : :
1705 [ + - + - ]: 5537178 : rqpair = SPDK_CONTAINEROF(req->qpair, struct spdk_nvmf_rdma_qpair, qpair);
1706 [ + - + - : 5537178 : rgroup = rqpair->poller->group;
+ - + - ]
1707 : :
1708 : : /* rdma wr specifics */
1709 : 5537178 : nvmf_rdma_setup_request(rdma_req);
1710 : :
1711 [ + - + - ]: 5537178 : length = req->length;
1712 [ + + + - : 5537178 : if (spdk_unlikely(req->dif_enabled)) {
+ - + + ]
1713 [ + - + - : 48 : req->dif.orig_length = length;
+ - ]
1714 [ + - + - ]: 48 : length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx);
1715 [ + - + - : 48 : req->dif.elba_length = length;
+ - ]
1716 : 8 : }
1717 : :
1718 [ + - + - ]: 5537178 : rc = spdk_nvmf_request_get_buffers(req, &rgroup->group, &rtransport->transport,
1719 : 16 : length);
1720 [ + + ]: 5537178 : if (rc != 0) {
1721 : 1445976 : return rc;
1722 : : }
1723 : :
1724 [ + + + - : 4091202 : assert(req->iovcnt <= rqpair->max_send_sge);
+ - + - +
- # # ]
1725 : :
1726 : : /* When dif_insert_or_strip is true and the I/O data length is greater than one block,
1727 : : * the stripped_buffers are got for DIF stripping. */
1728 [ + + + - : 4091202 : if (spdk_unlikely(req->dif_enabled && (req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST)
+ + + + +
- + - - +
+ - + - +
- + - + -
+ - + - +
+ ]
1729 : : && (req->dif.elba_length > req->dif.dif_ctx.block_size))) {
1730 [ + - ]: 49 : rc = nvmf_request_get_stripped_buffers(req, &rgroup->group,
1731 [ + - + - : 7 : &rtransport->transport, req->dif.orig_length);
+ - + - ]
1732 [ + + ]: 42 : if (rc != 0) {
1733 [ + + + + : 24 : SPDK_INFOLOG(rdma, "Get stripped buffers fail %d, fallback to req.iov.\n", rc);
+ - ]
1734 : 4 : }
1735 : 7 : }
1736 : :
1737 [ + - + - ]: 4091202 : rdma_req->iovpos = 0;
1738 : :
1739 [ + + + - : 4091202 : if (spdk_unlikely(req->dif_enabled)) {
+ - + + ]
1740 [ + - + - : 56 : num_wrs = nvmf_rdma_calc_num_wrs(length, rtransport->transport.opts.io_unit_size,
+ - + - ]
1741 [ + - + - : 8 : req->dif.dif_ctx.block_size);
+ - + - ]
1742 [ + + ]: 48 : if (num_wrs > 1) {
1743 : 6 : rc = nvmf_request_alloc_wrs(rtransport, rdma_req, num_wrs - 1);
1744 [ - + ]: 6 : if (rc != 0) {
1745 : 0 : goto err_exit;
1746 : : }
1747 : 1 : }
1748 : :
1749 : 48 : rc = nvmf_rdma_fill_wr_sgl_with_dif(rgroup, device, rdma_req, wr, length, num_wrs - 1);
1750 [ - + ]: 48 : if (spdk_unlikely(rc != 0)) {
1751 : 0 : goto err_exit;
1752 : : }
1753 : :
1754 [ + + ]: 48 : if (num_wrs > 1) {
1755 : 6 : nvmf_rdma_update_remote_addr(rdma_req, num_wrs);
1756 : 1 : }
1757 : 8 : } else {
1758 : 4091154 : rc = nvmf_rdma_fill_wr_sgl(rgroup, device, rdma_req, wr, length);
1759 [ - + ]: 4091154 : if (spdk_unlikely(rc != 0)) {
1760 : 0 : goto err_exit;
1761 : : }
1762 : : }
1763 : :
1764 : : /* set the number of outstanding data WRs for this request. */
1765 [ + - + - ]: 4091202 : rdma_req->num_outstanding_data_wr = num_wrs;
1766 : :
1767 : 4091202 : return rc;
1768 : :
1769 : 0 : err_exit:
1770 [ # # # # ]: 0 : spdk_nvmf_request_free_buffers(req, &rgroup->group, &rtransport->transport);
1771 : 0 : nvmf_rdma_request_free_data(rdma_req, rtransport);
1772 [ # # # # ]: 0 : req->iovcnt = 0;
1773 : 0 : return rc;
1774 : 16 : }
1775 : :
1776 : : static int
1777 : 78080 : nvmf_rdma_request_fill_iovs_multi_sgl(struct spdk_nvmf_rdma_transport *rtransport,
1778 : : struct spdk_nvmf_rdma_device *device,
1779 : : struct spdk_nvmf_rdma_request *rdma_req)
1780 : : {
1781 : 5 : struct spdk_nvmf_rdma_qpair *rqpair;
1782 : 5 : struct spdk_nvmf_rdma_poll_group *rgroup;
1783 : 5 : struct ibv_send_wr *current_wr;
1784 [ + - ]: 78080 : struct spdk_nvmf_request *req = &rdma_req->req;
1785 : 5 : struct spdk_nvme_sgl_descriptor *inline_segment, *desc;
1786 : 5 : uint32_t num_sgl_descriptors;
1787 : 78080 : uint32_t lengths[SPDK_NVMF_MAX_SGL_ENTRIES], total_length = 0;
1788 : 5 : uint32_t i;
1789 : 5 : int rc;
1790 : :
1791 [ + - + - : 78080 : rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
+ - ]
1792 [ + - + - : 78080 : rgroup = rqpair->poller->group;
+ - + - ]
1793 : :
1794 [ + - + - : 78080 : inline_segment = &req->cmd->nvme_cmd.dptr.sgl1;
+ - + - +
- ]
1795 [ + + + - : 78080 : assert(inline_segment->generic.type == SPDK_NVME_SGL_TYPE_LAST_SEGMENT);
+ - + - #
# ]
1796 [ + + + - : 78080 : assert(inline_segment->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET);
+ - + - #
# ]
1797 : :
1798 [ + - + - : 78080 : num_sgl_descriptors = inline_segment->unkeyed.length / sizeof(struct spdk_nvme_sgl_descriptor);
+ - + - +
- ]
1799 [ + + # # ]: 78080 : assert(num_sgl_descriptors <= SPDK_NVMF_MAX_SGL_ENTRIES);
1800 : :
1801 [ + - + - : 78080 : desc = (struct spdk_nvme_sgl_descriptor *)rdma_req->recv->buf + inline_segment->address;
+ - + - +
- + - +
- ]
1802 [ + + ]: 522936 : for (i = 0; i < num_sgl_descriptors; i++) {
1803 [ + + + - : 444856 : if (spdk_likely(!req->dif_enabled)) {
+ - + + ]
1804 [ + - + - : 444844 : lengths[i] = desc->keyed.length;
+ - + - +
- + - ]
1805 : 8 : } else {
1806 [ + - + - : 12 : req->dif.orig_length += desc->keyed.length;
+ - + - +
- + - ]
1807 [ + - + - : 12 : lengths[i] = spdk_dif_get_length_with_md(desc->keyed.length, &req->dif.dif_ctx);
+ - + - +
- + - + -
+ - ]
1808 [ + - + - : 12 : req->dif.elba_length += lengths[i];
+ - + - +
- + - ]
1809 : : }
1810 [ + - + - : 444856 : total_length += lengths[i];
+ - ]
1811 [ + - ]: 444856 : desc++;
1812 : 10 : }
1813 : :
1814 [ + + + - : 78080 : if (total_length > rtransport->transport.opts.max_io_size) {
+ - + - -
+ ]
1815 [ # # # # : 0 : SPDK_ERRLOG("Multi SGL length 0x%x exceeds max io size 0x%x\n",
# # # # ]
1816 : : total_length, rtransport->transport.opts.max_io_size);
1817 [ # # # # : 0 : req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
# # # # #
# # # ]
1818 : 0 : return -EINVAL;
1819 : : }
1820 : :
1821 [ - + ]: 78080 : if (nvmf_request_alloc_wrs(rtransport, rdma_req, num_sgl_descriptors - 1) != 0) {
1822 : 0 : return -ENOMEM;
1823 : : }
1824 : :
1825 [ + - + - ]: 78080 : rc = spdk_nvmf_request_get_buffers(req, &rgroup->group, &rtransport->transport, total_length);
1826 [ - + ]: 78080 : if (rc != 0) {
1827 : 0 : nvmf_rdma_request_free_data(rdma_req, rtransport);
1828 : 0 : return rc;
1829 : : }
1830 : :
1831 : : /* When dif_insert_or_strip is true and the I/O data length is greater than one block,
1832 : : * the stripped_buffers are got for DIF stripping. */
1833 [ + + + - : 78080 : if (spdk_unlikely(req->dif_enabled && (req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST)
+ + + + +
- + - - +
+ - + - +
- + - + -
+ - + - +
+ ]
1834 : : && (req->dif.elba_length > req->dif.dif_ctx.block_size))) {
1835 [ + - ]: 7 : rc = nvmf_request_get_stripped_buffers(req, &rgroup->group,
1836 [ + - + - : 1 : &rtransport->transport, req->dif.orig_length);
+ - + - ]
1837 [ - + ]: 6 : if (rc != 0) {
1838 [ # # # # : 0 : SPDK_INFOLOG(rdma, "Get stripped buffers fail %d, fallback to req.iov.\n", rc);
# # ]
1839 : 0 : }
1840 : 1 : }
1841 : :
1842 : : /* The first WR must always be the embedded data WR. This is how we unwind them later. */
1843 [ + - + - ]: 78080 : current_wr = &rdma_req->data.wr;
1844 [ + + # # ]: 78080 : assert(current_wr != NULL);
1845 : :
1846 [ + - + - ]: 78080 : req->length = 0;
1847 [ + - + - ]: 78080 : rdma_req->iovpos = 0;
1848 [ + - + - : 78080 : desc = (struct spdk_nvme_sgl_descriptor *)rdma_req->recv->buf + inline_segment->address;
+ - + - +
- + - +
- ]
1849 [ + + ]: 522936 : for (i = 0; i < num_sgl_descriptors; i++) {
1850 : : /* The descriptors must be keyed data block descriptors with an address, not an offset. */
1851 [ + - + + : 444856 : if (spdk_unlikely(desc->generic.type != SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK ||
+ - - + +
- + - + -
- + ]
1852 : : desc->keyed.subtype != SPDK_NVME_SGL_SUBTYPE_ADDRESS)) {
1853 : 0 : rc = -EINVAL;
1854 : 0 : goto err_exit;
1855 : : }
1856 : :
1857 [ + + + - : 444856 : if (spdk_likely(!req->dif_enabled)) {
+ - + + ]
1858 [ + - + - : 444844 : rc = nvmf_rdma_fill_wr_sgl(rgroup, device, rdma_req, current_wr, lengths[i]);
+ - ]
1859 : 8 : } else {
1860 : 14 : rc = nvmf_rdma_fill_wr_sgl_with_dif(rgroup, device, rdma_req, current_wr,
1861 [ + - + - : 10 : lengths[i], 0);
+ - ]
1862 : : }
1863 [ - + ]: 444856 : if (rc != 0) {
1864 : 0 : rc = -ENOMEM;
1865 : 0 : goto err_exit;
1866 : : }
1867 : :
1868 [ + - + - : 444856 : req->length += desc->keyed.length;
+ - + - +
- ]
1869 [ + - + - : 444856 : current_wr->wr.rdma.rkey = desc->keyed.key;
+ - + - +
- + - +
- ]
1870 [ + - + - : 444856 : current_wr->wr.rdma.remote_addr = desc->address;
+ - + - +
- + - ]
1871 [ + - + - ]: 444856 : current_wr = current_wr->next;
1872 [ + - ]: 444856 : desc++;
1873 : 10 : }
1874 : :
1875 : : #ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL
1876 : : /* Go back to the last descriptor in the list. */
1877 [ + - ]: 78080 : desc--;
1878 [ + + + - : 78080 : if ((device->attr.device_cap_flags & IBV_DEVICE_MEM_MGT_EXTENSIONS) != 0) {
+ - + - ]
1879 [ - + # # : 78050 : if (desc->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY) {
# # # # ]
1880 [ # # # # : 0 : rdma_req->rsp.wr.opcode = IBV_WR_SEND_WITH_INV;
# # # # ]
1881 [ # # # # : 0 : rdma_req->rsp.wr.imm_data = desc->keyed.key;
# # # # #
# # # # #
# # ]
1882 : 0 : }
1883 : 0 : }
1884 : : #endif
1885 : :
1886 [ + - + - ]: 78080 : rdma_req->num_outstanding_data_wr = num_sgl_descriptors;
1887 : :
1888 : 78080 : return 0;
1889 : :
1890 : 0 : err_exit:
1891 [ # # # # ]: 0 : spdk_nvmf_request_free_buffers(req, &rgroup->group, &rtransport->transport);
1892 : 0 : nvmf_rdma_request_free_data(rdma_req, rtransport);
1893 : 0 : return rc;
1894 : 5 : }
1895 : :
1896 : : static int
1897 : 6963923 : nvmf_rdma_request_parse_sgl(struct spdk_nvmf_rdma_transport *rtransport,
1898 : : struct spdk_nvmf_rdma_device *device,
1899 : : struct spdk_nvmf_rdma_request *rdma_req)
1900 : : {
1901 [ + - ]: 6963923 : struct spdk_nvmf_request *req = &rdma_req->req;
1902 : 25 : struct spdk_nvme_cpl *rsp;
1903 : 25 : struct spdk_nvme_sgl_descriptor *sgl;
1904 : 25 : int rc;
1905 : 25 : uint32_t length;
1906 : :
1907 [ + - + - : 6963923 : rsp = &req->rsp->nvme_cpl;
+ - ]
1908 [ + - + - : 6963923 : sgl = &req->cmd->nvme_cmd.dptr.sgl1;
+ - + - +
- ]
1909 : :
1910 [ + + + - : 6963923 : if (sgl->generic.type == SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK &&
+ - + + #
# ]
1911 [ + + + - : 5537184 : (sgl->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_ADDRESS ||
+ - - + ]
1912 [ + - # # : 2312761 : sgl->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY)) {
# # ]
1913 : :
1914 [ + - + - : 5537184 : length = sgl->keyed.length;
+ - ]
1915 [ + + + - : 5537184 : if (length > rtransport->transport.opts.max_io_size) {
+ - + - +
+ ]
1916 [ + - + - : 6 : SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n",
+ - + - ]
1917 : : length, rtransport->transport.opts.max_io_size);
1918 [ + - + - : 6 : rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
+ - ]
1919 : 6 : return -1;
1920 : : }
1921 : : #ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL
1922 [ + + + - : 5537178 : if ((device->attr.device_cap_flags & IBV_DEVICE_MEM_MGT_EXTENSIONS) != 0) {
+ - + - ]
1923 [ + + # # : 5537082 : if (sgl->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY) {
# # # # ]
1924 [ # # # # : 2312761 : rdma_req->rsp.wr.opcode = IBV_WR_SEND_WITH_INV;
# # # # ]
1925 [ # # # # : 2312761 : rdma_req->rsp.wr.imm_data = sgl->keyed.key;
# # # # #
# # # # #
# # ]
1926 : 0 : }
1927 : 0 : }
1928 : : #endif
1929 : :
1930 : : /* fill request length and populate iovs */
1931 [ + - + - ]: 5537178 : req->length = length;
1932 : :
1933 : 5537178 : rc = nvmf_rdma_request_fill_iovs(rtransport, device, rdma_req);
1934 [ + + ]: 5537178 : if (spdk_unlikely(rc < 0)) {
1935 [ - + ]: 1445976 : if (rc == -EINVAL) {
1936 : 0 : SPDK_ERRLOG("SGL length exceeds the max I/O size\n");
1937 [ # # # # : 0 : rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
# # ]
1938 : 0 : return -1;
1939 : : }
1940 : : /* No available buffers. Queue this request up. */
1941 [ + + + + : 1445976 : SPDK_DEBUGLOG(rdma, "No available large data buffers. Queueing request %p\n", rdma_req);
+ - ]
1942 : 1445976 : return 0;
1943 : : }
1944 : :
1945 [ + + + + : 4091202 : SPDK_DEBUGLOG(rdma, "Request %p took %d buffer/s from central pool\n", rdma_req,
+ - # # #
# ]
1946 : : req->iovcnt);
1947 : :
1948 : 4091202 : return 0;
1949 [ + + + - : 1426742 : } else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK &&
+ - + + -
+ ]
1950 [ + - + - : 1348659 : sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
+ - ]
1951 [ + - + - ]: 1348659 : uint64_t offset = sgl->address;
1952 [ + - + - : 1348659 : uint32_t max_len = rtransport->transport.opts.in_capsule_data_size;
+ - + - ]
1953 : :
1954 [ + + + + : 1348659 : SPDK_DEBUGLOG(nvmf, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n",
+ - # # #
# # # #
# ]
1955 : : offset, sgl->unkeyed.length);
1956 : :
1957 [ - + ]: 1348659 : if (offset > max_len) {
1958 : 0 : SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " exceeds capsule length 0x%x\n",
1959 : : offset, max_len);
1960 [ # # # # : 0 : rsp->status.sc = SPDK_NVME_SC_INVALID_SGL_OFFSET;
# # ]
1961 : 0 : return -1;
1962 : : }
1963 : 1348659 : max_len -= (uint32_t)offset;
1964 : :
1965 [ + + + - : 1348659 : if (sgl->unkeyed.length > max_len) {
+ - + - +
+ ]
1966 [ + - + - : 12 : SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n",
+ - + - ]
1967 : : sgl->unkeyed.length, max_len);
1968 [ + - + - : 12 : rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
+ - ]
1969 : 12 : return -1;
1970 : : }
1971 : :
1972 [ + - + - ]: 1348647 : rdma_req->num_outstanding_data_wr = 0;
1973 [ + - + - : 1348647 : req->data_from_pool = false;
+ - ]
1974 [ + - + - : 1348647 : req->length = sgl->unkeyed.length;
+ - + - +
- + - ]
1975 : :
1976 [ + - + - : 1348647 : req->iov[0].iov_base = rdma_req->recv->buf + offset;
+ - + - +
- + - + -
+ - + - +
- ]
1977 [ + - + - : 1348647 : req->iov[0].iov_len = req->length;
+ - + - +
- + - +
- ]
1978 [ + - + - ]: 1348647 : req->iovcnt = 1;
1979 : :
1980 : 1348647 : return 0;
1981 [ + - + - : 78088 : } else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_LAST_SEGMENT &&
+ - + - -
+ ]
1982 [ + - + - : 78080 : sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
+ - ]
1983 : :
1984 : 78080 : rc = nvmf_rdma_request_fill_iovs_multi_sgl(rtransport, device, rdma_req);
1985 [ - + ]: 78080 : if (rc == -ENOMEM) {
1986 [ # # # # : 0 : SPDK_DEBUGLOG(rdma, "No available large data buffers. Queueing request %p\n", rdma_req);
# # ]
1987 : 0 : return 0;
1988 [ - + ]: 78080 : } else if (rc == -EINVAL) {
1989 : 0 : SPDK_ERRLOG("Multi SGL element request length exceeds the max I/O size\n");
1990 [ # # # # : 0 : rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
# # ]
1991 : 0 : return -1;
1992 : : }
1993 : :
1994 [ + + + + : 78080 : SPDK_DEBUGLOG(rdma, "Request %p took %d buffer/s from central pool\n", rdma_req,
+ - # # #
# ]
1995 : : req->iovcnt);
1996 : :
1997 : 78080 : return 0;
1998 : : }
1999 : :
2000 [ # # # # : 0 : SPDK_ERRLOG("Invalid NVMf I/O Command SGL: Type 0x%x, Subtype 0x%x\n",
# # # # #
# # # ]
2001 : : sgl->generic.type, sgl->generic.subtype);
2002 [ # # # # : 0 : rsp->status.sc = SPDK_NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID;
# # ]
2003 : 0 : return -1;
2004 : 25 : }
2005 : :
2006 : : static void
2007 : 6674950 : _nvmf_rdma_request_free(struct spdk_nvmf_rdma_request *rdma_req,
2008 : : struct spdk_nvmf_rdma_transport *rtransport)
2009 : : {
2010 : 6 : struct spdk_nvmf_rdma_qpair *rqpair;
2011 : 6 : struct spdk_nvmf_rdma_poll_group *rgroup;
2012 : :
2013 [ + - + - : 6674950 : rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
+ - ]
2014 [ + + + - : 6674950 : if (rdma_req->req.data_from_pool) {
+ - + - +
+ ]
2015 [ + - + - : 4169192 : rgroup = rqpair->poller->group;
+ - + - ]
2016 : :
2017 [ + - + - : 4169192 : spdk_nvmf_request_free_buffers(&rdma_req->req, &rgroup->group, &rtransport->transport);
+ - ]
2018 : 5 : }
2019 [ + + + - : 6674950 : if (rdma_req->req.stripped_data) {
+ - - + ]
2020 [ # # ]: 0 : nvmf_request_free_stripped_buffers(&rdma_req->req,
2021 [ # # # # : 0 : &rqpair->poller->group->group,
# # # # #
# ]
2022 [ # # ]: 0 : &rtransport->transport);
2023 : 0 : }
2024 : 6674950 : nvmf_rdma_request_free_data(rdma_req, rtransport);
2025 [ + - + - : 6674950 : rdma_req->req.length = 0;
+ - ]
2026 [ + - + - : 6674950 : rdma_req->req.iovcnt = 0;
+ - ]
2027 [ + - + - ]: 6674950 : rdma_req->offset = 0;
2028 [ + - + - : 6674950 : rdma_req->req.dif_enabled = false;
+ - + - ]
2029 [ + - + - ]: 6674950 : rdma_req->fused_failed = false;
2030 [ + - + - ]: 6674950 : rdma_req->transfer_wr = NULL;
2031 [ + + + - : 6674950 : if (rdma_req->fused_pair) {
- + ]
2032 : : /* This req was part of a valid fused pair, but failed before it got to
2033 : : * READ_TO_EXECUTE state. This means we need to fail the other request
2034 : : * in the pair, because it is no longer part of a valid pair. If the pair
2035 : : * already reached READY_TO_EXECUTE state, we need to kick it.
2036 : : */
2037 [ # # # # : 0 : rdma_req->fused_pair->fused_failed = true;
# # # # ]
2038 [ # # # # : 0 : if (rdma_req->fused_pair->state == RDMA_REQUEST_STATE_READY_TO_EXECUTE) {
# # # # #
# ]
2039 [ # # # # ]: 0 : nvmf_rdma_request_process(rtransport, rdma_req->fused_pair);
2040 : 0 : }
2041 [ # # # # ]: 0 : rdma_req->fused_pair = NULL;
2042 : 0 : }
2043 [ + + + - : 6674950 : memset(&rdma_req->req.dif, 0, sizeof(rdma_req->req.dif));
+ - ]
2044 [ + - ]: 6674950 : rqpair->qd--;
2045 : :
2046 [ + + + - : 6674950 : STAILQ_INSERT_HEAD(&rqpair->resources->free_queue, rdma_req, state_link);
+ - + - +
- + - + -
+ - + + +
- + - + -
+ - + - +
- + - + -
+ - + - +
- + - ]
2047 [ + - + - ]: 6674950 : rdma_req->state = RDMA_REQUEST_STATE_FREE;
2048 : 6674950 : }
2049 : :
2050 : : static void
2051 : 6674575 : nvmf_rdma_check_fused_ordering(struct spdk_nvmf_rdma_transport *rtransport,
2052 : : struct spdk_nvmf_rdma_qpair *rqpair,
2053 : : struct spdk_nvmf_rdma_request *rdma_req)
2054 : : {
2055 : 6 : enum spdk_nvme_cmd_fuse last, next;
2056 : :
2057 [ + + + - : 6674575 : last = rqpair->fused_first ? rqpair->fused_first->req.cmd->nvme_cmd.fuse : SPDK_NVME_CMD_FUSE_NONE;
- + # # #
# # # # #
# # # # #
# ]
2058 [ + - + - : 6674575 : next = rdma_req->req.cmd->nvme_cmd.fuse;
+ - + - +
- ]
2059 : :
2060 [ + + # # ]: 6674575 : assert(last != SPDK_NVME_CMD_FUSE_SECOND);
2061 : :
2062 [ + + + + ]: 6674575 : if (spdk_likely(last == SPDK_NVME_CMD_FUSE_NONE && next == SPDK_NVME_CMD_FUSE_NONE)) {
2063 : 6672517 : return;
2064 : : }
2065 : :
2066 [ + + ]: 2058 : if (last == SPDK_NVME_CMD_FUSE_FIRST) {
2067 [ + - ]: 1029 : if (next == SPDK_NVME_CMD_FUSE_SECOND) {
2068 : : /* This is a valid pair of fused commands. Point them at each other
2069 : : * so they can be submitted consecutively once ready to be executed.
2070 : : */
2071 [ # # # # : 1029 : rqpair->fused_first->fused_pair = rdma_req;
# # # # ]
2072 [ # # # # : 1029 : rdma_req->fused_pair = rqpair->fused_first;
# # # # ]
2073 [ # # # # ]: 1029 : rqpair->fused_first = NULL;
2074 : 1029 : return;
2075 : : } else {
2076 : : /* Mark the last req as failed since it wasn't followed by a SECOND. */
2077 [ # # # # : 0 : rqpair->fused_first->fused_failed = true;
# # # # ]
2078 : :
2079 : : /* If the last req is in READY_TO_EXECUTE state, then call
2080 : : * nvmf_rdma_request_process(), otherwise nothing else will kick it.
2081 : : */
2082 [ # # # # : 0 : if (rqpair->fused_first->state == RDMA_REQUEST_STATE_READY_TO_EXECUTE) {
# # # # #
# ]
2083 [ # # # # ]: 0 : nvmf_rdma_request_process(rtransport, rqpair->fused_first);
2084 : 0 : }
2085 : :
2086 [ # # # # ]: 0 : rqpair->fused_first = NULL;
2087 : : }
2088 : 0 : }
2089 : :
2090 [ + - ]: 1029 : if (next == SPDK_NVME_CMD_FUSE_FIRST) {
2091 : : /* Set rqpair->fused_first here so that we know to check that the next request
2092 : : * is a SECOND (and to fail this one if it isn't).
2093 : : */
2094 [ # # # # ]: 1029 : rqpair->fused_first = rdma_req;
2095 [ # # ]: 0 : } else if (next == SPDK_NVME_CMD_FUSE_SECOND) {
2096 : : /* Mark this req failed since it ia SECOND and the last one was not a FIRST. */
2097 [ # # # # ]: 0 : rdma_req->fused_failed = true;
2098 : 0 : }
2099 [ - + ]: 6 : }
2100 : :
2101 : : bool
2102 : 24802606 : nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
2103 : : struct spdk_nvmf_rdma_request *rdma_req)
2104 : : {
2105 : 23 : struct spdk_nvmf_rdma_qpair *rqpair;
2106 : 23 : struct spdk_nvmf_rdma_device *device;
2107 : 23 : struct spdk_nvmf_rdma_poll_group *rgroup;
2108 [ + - + - : 24802606 : struct spdk_nvme_cpl *rsp = &rdma_req->req.rsp->nvme_cpl;
+ - + - ]
2109 : 23 : int rc;
2110 : 23 : struct spdk_nvmf_rdma_recv *rdma_recv;
2111 : 23 : enum spdk_nvmf_rdma_request_state prev_state;
2112 : 24802606 : bool progress = false;
2113 : 115 : int data_posted;
2114 : 23 : uint32_t num_blocks, num_rdma_reads_available, qdepth;
2115 : :
2116 [ + - + - : 24802606 : rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
+ - ]
2117 [ + - + - ]: 24802606 : device = rqpair->device;
2118 [ + - + - : 24802606 : rgroup = rqpair->poller->group;
+ - + - ]
2119 : :
2120 [ + + + - : 24802606 : assert(rdma_req->state != RDMA_REQUEST_STATE_FREE);
+ - # # ]
2121 : :
2122 : : /* If the queue pair is in an error state, force the request to the completed state
2123 : : * to release resources. */
2124 [ + + + + : 24802606 : if (rqpair->ibv_state == IBV_QPS_ERR || rqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE) {
+ - + - +
- + - -
+ ]
2125 [ - - - - : 920 : switch (rdma_req->state) {
+ # # #
# ]
2126 : 0 : case RDMA_REQUEST_STATE_NEED_BUFFER:
2127 [ # # # # : 0 : STAILQ_REMOVE(&rgroup->group.pending_buf_queue, &rdma_req->req, spdk_nvmf_request, buf_link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
2128 : 0 : break;
2129 : 0 : case RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING:
2130 [ # # # # : 0 : STAILQ_REMOVE(&rqpair->pending_rdma_read_queue, rdma_req, spdk_nvmf_rdma_request, state_link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # ]
2131 : 0 : break;
2132 : 0 : case RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING:
2133 [ # # # # : 0 : STAILQ_REMOVE(&rqpair->pending_rdma_write_queue, rdma_req, spdk_nvmf_rdma_request, state_link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # ]
2134 : 0 : break;
2135 : 0 : case RDMA_REQUEST_STATE_READY_TO_COMPLETE_PENDING:
2136 [ # # # # : 0 : STAILQ_REMOVE(&rqpair->pending_rdma_send_queue, rdma_req, spdk_nvmf_rdma_request, state_link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # ]
2137 : 0 : break;
2138 : 920 : default:
2139 : 920 : break;
2140 : : }
2141 [ # # # # ]: 920 : rdma_req->state = RDMA_REQUEST_STATE_COMPLETED;
2142 : 0 : }
2143 : :
2144 : : /* The loop here is to allow for several back-to-back state changes. */
2145 : 23 : do {
2146 [ + - + - ]: 71021828 : prev_state = rdma_req->state;
2147 : :
2148 [ + + + + : 71021828 : SPDK_DEBUGLOG(rdma, "Request %p entering state %d\n", rdma_req, prev_state);
+ - ]
2149 : :
2150 [ + + + + : 71021828 : switch (rdma_req->state) {
+ + + + +
+ + + + +
+ + + + +
- ]
2151 : 6674056 : case RDMA_REQUEST_STATE_FREE:
2152 : : /* Some external code must kick a request into RDMA_REQUEST_STATE_NEW
2153 : : * to escape this state. */
2154 : 6674062 : break;
2155 : 6674569 : case RDMA_REQUEST_STATE_NEW:
2156 [ + + + - : 6674575 : spdk_trace_record(TRACE_RDMA_REQUEST_STATE_NEW, 0, 0,
# # # # #
# # # # #
# # # # ]
2157 : : (uintptr_t)rdma_req, (uintptr_t)rqpair);
2158 [ + - + - ]: 6674575 : rdma_recv = rdma_req->recv;
2159 : :
2160 : : /* The first element of the SGL is the NVMe command */
2161 [ + - + - : 6674575 : rdma_req->req.cmd = (union nvmf_h2c_msg *)rdma_recv->sgl[0].addr;
+ - + - +
- + - + -
+ - ]
2162 [ + + + - : 6674575 : memset(rdma_req->req.rsp, 0, sizeof(*rdma_req->req.rsp));
+ - + - ]
2163 [ + - + - : 6674575 : rdma_req->transfer_wr = &rdma_req->data.wr;
+ - + - ]
2164 : :
2165 [ + - + + : 6674575 : if (rqpair->ibv_state == IBV_QPS_ERR || rqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE) {
+ - + - +
- + - -
+ ]
2166 [ # # # # ]: 0 : rdma_req->state = RDMA_REQUEST_STATE_COMPLETED;
2167 : 0 : break;
2168 : : }
2169 : :
2170 [ + + + - : 6674575 : if (spdk_unlikely(spdk_nvmf_request_get_dif_ctx(&rdma_req->req, &rdma_req->req.dif.dif_ctx))) {
+ - + - +
- ]
2171 [ # # # # : 0 : rdma_req->req.dif_enabled = true;
# # # # ]
2172 : 0 : }
2173 : :
2174 : 6674575 : nvmf_rdma_check_fused_ordering(rtransport, rqpair, rdma_req);
2175 : :
2176 : : #ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL
2177 [ + - + - : 6674575 : rdma_req->rsp.wr.opcode = IBV_WR_SEND;
+ - + - ]
2178 [ + - + - : 6674575 : rdma_req->rsp.wr.imm_data = 0;
+ - + - +
- ]
2179 : : #endif
2180 : :
2181 : : /* The next state transition depends on the data transfer needs of this request. */
2182 [ + - + - : 6674575 : rdma_req->req.xfer = spdk_nvmf_req_get_xfer(&rdma_req->req);
+ - + - ]
2183 : :
2184 [ + + + - : 6674575 : if (spdk_unlikely(rdma_req->req.xfer == SPDK_NVME_DATA_BIDIRECTIONAL)) {
+ - + + ]
2185 [ + - + - : 6 : rsp->status.sct = SPDK_NVME_SCT_GENERIC;
+ - ]
2186 [ + - + - : 6 : rsp->status.sc = SPDK_NVME_SC_INVALID_OPCODE;
+ - ]
2187 [ + - + - : 6 : STAILQ_INSERT_TAIL(&rqpair->pending_rdma_send_queue, rdma_req, state_link);
+ - + - +
- + - + -
+ - + - +
- + - +
- ]
2188 [ + - + - ]: 6 : rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE_PENDING;
2189 [ + + + + : 6 : SPDK_DEBUGLOG(rdma, "Request %p: invalid xfer type (BIDIRECTIONAL)\n", rdma_req);
+ - ]
2190 : 6 : break;
2191 : : }
2192 : :
2193 : : /* If no data to transfer, ready to execute. */
2194 [ + + + - : 6674569 : if (rdma_req->req.xfer == SPDK_NVME_DATA_NONE) {
+ - + - ]
2195 [ # # # # ]: 1156736 : rdma_req->state = RDMA_REQUEST_STATE_READY_TO_EXECUTE;
2196 : 1156736 : break;
2197 : : }
2198 : :
2199 [ + - + - ]: 5517833 : rdma_req->state = RDMA_REQUEST_STATE_NEED_BUFFER;
2200 [ + - + - : 5517833 : STAILQ_INSERT_TAIL(&rgroup->group.pending_buf_queue, &rdma_req->req, buf_link);
+ - + - +
- + - + -
+ - + - +
- + - + -
+ - + - +
- + - +
- ]
2201 : 5517833 : break;
2202 : 7550578 : case RDMA_REQUEST_STATE_NEED_BUFFER:
2203 [ + + + - : 7550583 : spdk_trace_record(TRACE_RDMA_REQUEST_STATE_NEED_BUFFER, 0, 0,
# # # # #
# # # # #
# # # # ]
2204 : : (uintptr_t)rdma_req, (uintptr_t)rqpair);
2205 : :
2206 [ + + + - : 7550583 : assert(rdma_req->req.xfer != SPDK_NVME_DATA_NONE);
+ - - + #
# ]
2207 : :
2208 [ + + + - : 7550583 : if (&rdma_req->req != STAILQ_FIRST(&rgroup->group.pending_buf_queue)) {
+ - + - +
- - + ]
2209 : : /* This request needs to wait in line to obtain a buffer */
2210 : 586780 : break;
2211 : : }
2212 : :
2213 : : /* Try to get a data buffer */
2214 : 6963803 : rc = nvmf_rdma_request_parse_sgl(rtransport, device, rdma_req);
2215 [ + + ]: 6963803 : if (rc < 0) {
2216 [ # # # # : 0 : STAILQ_REMOVE_HEAD(&rgroup->group.pending_buf_queue, buf_link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # ]
2217 [ # # # # : 0 : STAILQ_INSERT_TAIL(&rqpair->pending_rdma_send_queue, rdma_req, state_link);
# # # # #
# # # # #
# # # # #
# # # #
# ]
2218 [ # # # # ]: 0 : rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE_PENDING;
2219 : 0 : break;
2220 : : }
2221 : :
2222 [ + + + - : 6963803 : if (rdma_req->req.iovcnt == 0) {
+ - - + ]
2223 : : /* No buffers available. */
2224 [ # # # # ]: 1445970 : rgroup->stat.pending_data_buffer++;
2225 : 1445970 : break;
2226 : : }
2227 : :
2228 [ + + + - : 5517833 : STAILQ_REMOVE_HEAD(&rgroup->group.pending_buf_queue, buf_link);
+ - + - +
- + - + -
+ - + - +
- + - - +
+ - + - +
- + - + -
+ - + - ]
2229 : :
2230 : : /* If data is transferring from host to controller and the data didn't
2231 : : * arrive using in capsule data, we need to do a transfer from the host.
2232 : : */
2233 [ + + + + : 5517837 : if (rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER &&
+ - + + -
+ ]
2234 [ + - + - : 4 : rdma_req->req.data_from_pool) {
+ - + - ]
2235 [ + - + - : 646662 : STAILQ_INSERT_TAIL(&rqpair->pending_rdma_read_queue, rdma_req, state_link);
+ - + - +
- + - + -
+ - + - +
- + - +
- ]
2236 [ + - + - ]: 646662 : rdma_req->state = RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING;
2237 : 646662 : break;
2238 : : }
2239 : :
2240 [ + - + - ]: 4871171 : rdma_req->state = RDMA_REQUEST_STATE_READY_TO_EXECUTE;
2241 : 4871171 : break;
2242 : 2400200 : case RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING:
2243 [ + + + - : 2400204 : spdk_trace_record(TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING, 0, 0,
# # # # #
# # # # #
# # # # ]
2244 : : (uintptr_t)rdma_req, (uintptr_t)rqpair);
2245 : :
2246 [ + + + - : 2400204 : if (rdma_req != STAILQ_FIRST(&rqpair->pending_rdma_read_queue)) {
+ - - + ]
2247 : : /* This request needs to wait in line to perform RDMA */
2248 : 378179 : break;
2249 : : }
2250 [ + + + - : 2022025 : assert(rqpair->max_send_depth >= rqpair->current_send_depth);
+ - + - -
+ # # ]
2251 [ + - + - : 2022025 : qdepth = rqpair->max_send_depth - rqpair->current_send_depth;
+ - + - ]
2252 [ + + + - : 2022025 : assert(rqpair->max_read_depth >= rqpair->current_read_depth);
+ - + - -
+ # # ]
2253 [ + - + - : 2022025 : num_rdma_reads_available = rqpair->max_read_depth - rqpair->current_read_depth;
+ - + - ]
2254 [ + + + - : 2022029 : if (rdma_req->num_outstanding_data_wr > qdepth ||
+ - - + ]
2255 [ + + + - ]: 2021591 : rdma_req->num_outstanding_data_wr > num_rdma_reads_available) {
2256 [ + + + + ]: 1375363 : if (num_rdma_reads_available && qdepth) {
2257 : : /* Send as much as we can */
2258 [ # # # # ]: 12203 : request_prepare_transfer_in_part(&rdma_req->req, spdk_min(num_rdma_reads_available, qdepth));
2259 : 0 : } else {
2260 : : /* We can only have so many WRs outstanding. we have to wait until some finish. */
2261 [ # # # # : 1363160 : rqpair->poller->stat.pending_rdma_read++;
# # # # ]
2262 : 1363160 : break;
2263 : : }
2264 : 0 : }
2265 : :
2266 : : /* We have already verified that this request is the head of the queue. */
2267 [ + + + - : 658865 : if (rdma_req->num_remaining_data_wr == 0) {
- + ]
2268 [ + + + - : 646662 : STAILQ_REMOVE_HEAD(&rqpair->pending_rdma_read_queue, state_link);
+ - + - +
- + - + -
+ - + - -
+ + - + -
+ - + - +
- ]
2269 : 4 : }
2270 : :
2271 [ + - ]: 658865 : rc = request_transfer_in(&rdma_req->req);
2272 [ + + ]: 658865 : if (!rc) {
2273 [ + - + - ]: 658865 : rdma_req->state = RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER;
2274 : 4 : } else {
2275 [ # # # # : 0 : rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
# # ]
2276 [ # # # # : 0 : STAILQ_INSERT_TAIL(&rqpair->pending_rdma_send_queue, rdma_req, state_link);
# # # # #
# # # # #
# # # # #
# # # #
# ]
2277 [ # # # # ]: 0 : rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE_PENDING;
2278 : : }
2279 : 658865 : break;
2280 : 986754 : case RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
2281 [ + + + - : 986758 : spdk_trace_record(TRACE_RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER, 0, 0,
# # # # #
# # # # #
# # # # ]
2282 : : (uintptr_t)rdma_req, (uintptr_t)rqpair);
2283 : : /* Some external code must kick a request into RDMA_REQUEST_STATE_READY_TO_EXECUTE
2284 : : * to escape this state. */
2285 : 986758 : break;
2286 : 6674552 : case RDMA_REQUEST_STATE_READY_TO_EXECUTE:
2287 [ + + + - : 6674557 : spdk_trace_record(TRACE_RDMA_REQUEST_STATE_READY_TO_EXECUTE, 0, 0,
# # # # #
# # # # #
# # # # ]
2288 : : (uintptr_t)rdma_req, (uintptr_t)rqpair);
2289 : :
2290 [ + + + - : 6674557 : if (spdk_unlikely(rdma_req->req.dif_enabled)) {
+ - + - +
- ]
2291 [ # # # # : 0 : if (rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
# # # # ]
2292 : : /* generate DIF for write operation */
2293 [ # # # # : 0 : num_blocks = SPDK_CEIL_DIV(rdma_req->req.dif.elba_length, rdma_req->req.dif.dif_ctx.block_size);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# ]
2294 [ # # # # ]: 0 : assert(num_blocks > 0);
2295 : :
2296 [ # # # # : 0 : rc = spdk_dif_generate(rdma_req->req.iov, rdma_req->req.iovcnt,
# # # # #
# ]
2297 [ # # # # : 0 : num_blocks, &rdma_req->req.dif.dif_ctx);
# # ]
2298 [ # # ]: 0 : if (rc != 0) {
2299 : 0 : SPDK_ERRLOG("DIF generation failed\n");
2300 [ # # # # ]: 0 : rdma_req->state = RDMA_REQUEST_STATE_COMPLETED;
2301 [ # # ]: 0 : spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL);
2302 : 0 : break;
2303 : : }
2304 : 0 : }
2305 : :
2306 [ # # # # : 0 : assert(rdma_req->req.dif.elba_length >= rdma_req->req.length);
# # # # #
# # # # #
# # # # ]
2307 : : /* set extended length before IO operation */
2308 [ # # # # : 0 : rdma_req->req.length = rdma_req->req.dif.elba_length;
# # # # #
# # # #
# ]
2309 : 0 : }
2310 : :
2311 [ + + + - : 6674557 : if (rdma_req->req.cmd->nvme_cmd.fuse != SPDK_NVME_CMD_FUSE_NONE) {
+ - + - +
- + - ]
2312 [ - + - + : 2058 : if (rdma_req->fused_failed) {
# # # # ]
2313 : : /* This request failed FUSED semantics. Fail it immediately, without
2314 : : * even sending it to the target layer.
2315 : : */
2316 [ # # # # : 0 : rsp->status.sct = SPDK_NVME_SCT_GENERIC;
# # ]
2317 [ # # # # : 0 : rsp->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED;
# # ]
2318 [ # # # # : 0 : STAILQ_INSERT_TAIL(&rqpair->pending_rdma_send_queue, rdma_req, state_link);
# # # # #
# # # # #
# # # # #
# # # #
# ]
2319 [ # # # # ]: 0 : rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE_PENDING;
2320 : 0 : break;
2321 : : }
2322 : :
2323 [ + + # # : 2058 : if (rdma_req->fused_pair == NULL ||
# # # # ]
2324 [ + - # # : 1029 : rdma_req->fused_pair->state != RDMA_REQUEST_STATE_READY_TO_EXECUTE) {
# # # # ]
2325 : : /* This request is ready to execute, but either we don't know yet if it's
2326 : : * valid - i.e. this is a FIRST but we haven't received the next
2327 : : * request yet or the other request of this fused pair isn't ready to
2328 : : * execute. So break here and this request will get processed later either
2329 : : * when the other request is ready or we find that this request isn't valid.
2330 : : */
2331 : 0 : break;
2332 : : }
2333 : 0 : }
2334 : :
2335 : : /* If we get to this point, and this request is a fused command, we know that
2336 : : * it is part of valid sequence (FIRST followed by a SECOND) and that both
2337 : : * requests are READY_TO_EXECUTE. So call spdk_nvmf_request_exec() both on this
2338 : : * request, and the other request of the fused pair, in the correct order.
2339 : : * Also clear the ->fused_pair pointers on both requests, since after this point
2340 : : * we no longer need to maintain the relationship between these two requests.
2341 : : */
2342 [ + + + - : 6673528 : if (rdma_req->req.cmd->nvme_cmd.fuse == SPDK_NVME_CMD_FUSE_SECOND) {
+ - + - +
- + - ]
2343 [ - + # # : 1029 : assert(rdma_req->fused_pair != NULL);
# # # # ]
2344 [ - + # # : 1029 : assert(rdma_req->fused_pair->fused_pair != NULL);
# # # # #
# # # ]
2345 [ # # # # : 1029 : rdma_req->fused_pair->state = RDMA_REQUEST_STATE_EXECUTING;
# # # # ]
2346 [ # # # # : 1029 : spdk_nvmf_request_exec(&rdma_req->fused_pair->req);
# # ]
2347 [ # # # # : 1029 : rdma_req->fused_pair->fused_pair = NULL;
# # # # ]
2348 [ # # # # ]: 1029 : rdma_req->fused_pair = NULL;
2349 : 0 : }
2350 [ + - + - ]: 6673528 : rdma_req->state = RDMA_REQUEST_STATE_EXECUTING;
2351 [ + - ]: 6673528 : spdk_nvmf_request_exec(&rdma_req->req);
2352 [ + + + - : 6673528 : if (rdma_req->req.cmd->nvme_cmd.fuse == SPDK_NVME_CMD_FUSE_FIRST) {
+ - + - +
- + - ]
2353 [ # # # # : 0 : assert(rdma_req->fused_pair != NULL);
# # # # ]
2354 [ # # # # : 0 : assert(rdma_req->fused_pair->fused_pair != NULL);
# # # # #
# # # ]
2355 [ # # # # : 0 : rdma_req->fused_pair->state = RDMA_REQUEST_STATE_EXECUTING;
# # # # ]
2356 [ # # # # : 0 : spdk_nvmf_request_exec(&rdma_req->fused_pair->req);
# # ]
2357 [ # # # # : 0 : rdma_req->fused_pair->fused_pair = NULL;
# # # # ]
2358 [ # # # # ]: 0 : rdma_req->fused_pair = NULL;
2359 : 0 : }
2360 : 6673528 : break;
2361 : 5370877 : case RDMA_REQUEST_STATE_EXECUTING:
2362 [ + + + - : 5370882 : spdk_trace_record(TRACE_RDMA_REQUEST_STATE_EXECUTING, 0, 0,
# # # # #
# # # # #
# # # # ]
2363 : : (uintptr_t)rdma_req, (uintptr_t)rqpair);
2364 : : /* Some external code must kick a request into RDMA_REQUEST_STATE_EXECUTED
2365 : : * to escape this state. */
2366 : 5370882 : break;
2367 : 6673444 : case RDMA_REQUEST_STATE_EXECUTED:
2368 [ + + + - : 6673449 : spdk_trace_record(TRACE_RDMA_REQUEST_STATE_EXECUTED, 0, 0,
# # # # #
# # # # #
# # # # ]
2369 : : (uintptr_t)rdma_req, (uintptr_t)rqpair);
2370 [ + + + - : 6673454 : if (rsp->status.sc == SPDK_NVME_SC_SUCCESS &&
+ - + - +
+ ]
2371 [ + + + - : 5354804 : rdma_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
+ - ]
2372 [ + - + - : 3334353 : STAILQ_INSERT_TAIL(&rqpair->pending_rdma_write_queue, rdma_req, state_link);
+ - + - +
- + - + -
+ - + - +
- + - +
- ]
2373 [ + - + - ]: 3334353 : rdma_req->state = RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING;
2374 : 1 : } else {
2375 [ + - + - : 3339096 : STAILQ_INSERT_TAIL(&rqpair->pending_rdma_send_queue, rdma_req, state_link);
+ - + - +
- + - + -
+ - + - +
- + - +
- ]
2376 [ + - + - ]: 3339096 : rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE_PENDING;
2377 : : }
2378 [ + + + - : 6673449 : if (spdk_unlikely(rdma_req->req.dif_enabled)) {
+ - + - -
+ ]
2379 : : /* restore the original length */
2380 [ # # # # : 0 : rdma_req->req.length = rdma_req->req.dif.orig_length;
# # # # #
# # # #
# ]
2381 : :
2382 [ # # # # : 0 : if (rdma_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
# # # # ]
2383 : 0 : struct spdk_dif_error error_blk;
2384 : :
2385 [ # # # # : 0 : num_blocks = SPDK_CEIL_DIV(rdma_req->req.dif.elba_length, rdma_req->req.dif.dif_ctx.block_size);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# ]
2386 [ # # # # : 0 : if (!rdma_req->req.stripped_data) {
# # # # ]
2387 [ # # # # : 0 : rc = spdk_dif_verify(rdma_req->req.iov, rdma_req->req.iovcnt, num_blocks,
# # # # #
# ]
2388 [ # # # # : 0 : &rdma_req->req.dif.dif_ctx, &error_blk);
# # ]
2389 : 0 : } else {
2390 [ # # # # : 0 : rc = spdk_dif_verify_copy(rdma_req->req.stripped_data->iov,
# # # # ]
2391 [ # # # # : 0 : rdma_req->req.stripped_data->iovcnt,
# # # # #
# ]
2392 [ # # # # : 0 : rdma_req->req.iov, rdma_req->req.iovcnt, num_blocks,
# # # # #
# ]
2393 [ # # # # : 0 : &rdma_req->req.dif.dif_ctx, &error_blk);
# # ]
2394 : : }
2395 [ # # ]: 0 : if (rc) {
2396 [ # # # # : 0 : struct spdk_nvme_cpl *rsp = &rdma_req->req.rsp->nvme_cpl;
# # # # ]
2397 : :
2398 [ # # ]: 0 : SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n", error_blk.err_type,
2399 : : error_blk.err_offset);
2400 [ # # # # : 0 : rsp->status.sct = SPDK_NVME_SCT_MEDIA_ERROR;
# # ]
2401 [ # # # # : 0 : rsp->status.sc = nvmf_rdma_dif_error_to_compl_status(error_blk.err_type);
# # ]
2402 [ # # # # : 0 : STAILQ_REMOVE(&rqpair->pending_rdma_write_queue, rdma_req, spdk_nvmf_rdma_request, state_link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # ]
2403 [ # # # # : 0 : STAILQ_INSERT_TAIL(&rqpair->pending_rdma_send_queue, rdma_req, state_link);
# # # # #
# # # # #
# # # # #
# # # #
# ]
2404 [ # # # # ]: 0 : rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE_PENDING;
2405 : 0 : }
2406 : 0 : }
2407 : 0 : }
2408 : 6673449 : break;
2409 : 3353918 : case RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING:
2410 [ + + + - : 3353919 : spdk_trace_record(TRACE_RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING, 0, 0,
# # # # #
# # # # #
# # # # ]
2411 : : (uintptr_t)rdma_req, (uintptr_t)rqpair);
2412 : :
2413 [ + + - + : 3353919 : if (rdma_req != STAILQ_FIRST(&rqpair->pending_rdma_write_queue)) {
- + - + ]
2414 : : /* This request needs to wait in line to perform RDMA */
2415 : 5929 : break;
2416 : : }
2417 [ + - + - : 3347991 : if ((rqpair->current_send_depth + rdma_req->num_outstanding_data_wr + 1) >
+ - + - -
+ ]
2418 [ + + + - ]: 3347990 : rqpair->max_send_depth) {
2419 : : /* We can only have so many WRs outstanding. we have to wait until some finish.
2420 : : * +1 since each request has an additional wr in the resp. */
2421 [ # # # # : 13637 : rqpair->poller->stat.pending_rdma_write++;
# # # # ]
2422 : 13637 : break;
2423 : : }
2424 : :
2425 : : /* We have already verified that this request is the head of the queue. */
2426 [ + + + - : 3334353 : STAILQ_REMOVE_HEAD(&rqpair->pending_rdma_write_queue, state_link);
+ - + - +
- + - + -
+ - + - -
+ + - + -
+ - + - +
- ]
2427 : :
2428 : : /* The data transfer will be kicked off from
2429 : : * RDMA_REQUEST_STATE_READY_TO_COMPLETE state.
2430 : : * We verified that data + response fit into send queue, so we can go to the next state directly
2431 : : */
2432 [ + - + - ]: 3334353 : rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE;
2433 : 3334353 : break;
2434 : 3339214 : case RDMA_REQUEST_STATE_READY_TO_COMPLETE_PENDING:
2435 [ + + + - : 3339221 : spdk_trace_record(TRACE_RDMA_REQUEST_STATE_READY_TO_COMPLETE_PENDING, 0, 0,
# # # # #
# # # # #
# # # # ]
2436 : : (uintptr_t)rdma_req, (uintptr_t)rqpair);
2437 : :
2438 [ + + + - : 3339221 : if (rdma_req != STAILQ_FIRST(&rqpair->pending_rdma_send_queue)) {
+ - - + ]
2439 : : /* This request needs to wait in line to send the completion */
2440 : 19 : break;
2441 : : }
2442 : :
2443 [ + + + - : 3339202 : assert(rqpair->current_send_depth <= rqpair->max_send_depth);
+ - + - -
+ # # ]
2444 [ + + + - : 3339202 : if (rqpair->current_send_depth == rqpair->max_send_depth) {
+ - + - +
+ ]
2445 : : /* We can only have so many WRs outstanding. we have to wait until some finish */
2446 [ + - + - : 100 : rqpair->poller->stat.pending_rdma_send++;
+ - + - ]
2447 : 100 : break;
2448 : : }
2449 : :
2450 : : /* We have already verified that this request is the head of the queue. */
2451 [ + + + - : 3339102 : STAILQ_REMOVE_HEAD(&rqpair->pending_rdma_send_queue, state_link);
+ - + - +
- + - + -
+ - + - -
+ + - + -
+ - + - +
- ]
2452 : :
2453 : : /* The response sending will be kicked off from
2454 : : * RDMA_REQUEST_STATE_READY_TO_COMPLETE state.
2455 : : */
2456 [ + - + - ]: 3339102 : rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE;
2457 : 3339102 : break;
2458 : 6673449 : case RDMA_REQUEST_STATE_READY_TO_COMPLETE:
2459 [ + + + - : 6673455 : spdk_trace_record(TRACE_RDMA_REQUEST_STATE_READY_TO_COMPLETE, 0, 0,
# # # # #
# # # # #
# # # # ]
2460 : : (uintptr_t)rdma_req, (uintptr_t)rqpair);
2461 [ + - ]: 6673455 : rc = request_transfer_out(&rdma_req->req, &data_posted);
2462 [ - + # # ]: 6673455 : assert(rc == 0); /* No good way to handle this currently */
2463 [ - + ]: 6673455 : if (rc) {
2464 [ # # # # ]: 0 : rdma_req->state = RDMA_REQUEST_STATE_COMPLETED;
2465 : 0 : } else {
2466 [ + + - + ]: 6673455 : rdma_req->state = data_posted ? RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST :
2467 : : RDMA_REQUEST_STATE_COMPLETING;
2468 : : }
2469 : 6673455 : break;
2470 : 3337338 : case RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST:
2471 [ + + + - : 3337339 : spdk_trace_record(TRACE_RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST, 0, 0,
# # # # #
# # # # #
# # # # ]
2472 : : (uintptr_t)rdma_req, (uintptr_t)rqpair);
2473 : : /* Some external code must kick a request into RDMA_REQUEST_STATE_COMPLETED
2474 : : * to escape this state. */
2475 : 3337339 : break;
2476 : 4638757 : case RDMA_REQUEST_STATE_COMPLETING:
2477 [ + + + - : 4638762 : spdk_trace_record(TRACE_RDMA_REQUEST_STATE_COMPLETING, 0, 0,
# # # # #
# # # # #
# # # # ]
2478 : : (uintptr_t)rdma_req, (uintptr_t)rqpair);
2479 : : /* Some external code must kick a request into RDMA_REQUEST_STATE_COMPLETED
2480 : : * to escape this state. */
2481 : 4638762 : break;
2482 : 6674056 : case RDMA_REQUEST_STATE_COMPLETED:
2483 [ + + + - : 6674062 : spdk_trace_record(TRACE_RDMA_REQUEST_STATE_COMPLETED, 0, 0,
# # # # #
# # # # #
# # # # ]
2484 : : (uintptr_t)rdma_req, (uintptr_t)rqpair);
2485 : :
2486 [ - + - + : 6674062 : rqpair->poller->stat.request_latency += spdk_get_ticks() - rdma_req->receive_tsc;
- + - + -
+ - + -
+ ]
2487 : 6674062 : _nvmf_rdma_request_free(rdma_req, rtransport);
2488 : 6674062 : break;
2489 : 0 : case RDMA_REQUEST_NUM_STATES:
2490 : : default:
2491 [ # # ]: 0 : assert(0);
2492 : : break;
2493 : : }
2494 : :
2495 [ + + + - : 71021828 : if (rdma_req->state != prev_state) {
+ + ]
2496 : 46219222 : progress = true;
2497 : 43 : }
2498 [ + + + - : 71021828 : } while (rdma_req->state != prev_state);
+ + ]
2499 : :
2500 [ + - ]: 24802606 : return progress;
2501 : 23 : }
2502 : :
2503 : : /* Public API callbacks begin here */
2504 : :
2505 : : #define SPDK_NVMF_RDMA_DEFAULT_MAX_QUEUE_DEPTH 128
2506 : : #define SPDK_NVMF_RDMA_DEFAULT_AQ_DEPTH 128
2507 : : #define SPDK_NVMF_RDMA_DEFAULT_SRQ_DEPTH 4096
2508 : : #define SPDK_NVMF_RDMA_DEFAULT_MAX_QPAIRS_PER_CTRLR 128
2509 : : #define SPDK_NVMF_RDMA_DEFAULT_IN_CAPSULE_DATA_SIZE 4096
2510 : : #define SPDK_NVMF_RDMA_DEFAULT_MAX_IO_SIZE 131072
2511 : : #define SPDK_NVMF_RDMA_MIN_IO_BUFFER_SIZE (SPDK_NVMF_RDMA_DEFAULT_MAX_IO_SIZE / SPDK_NVMF_MAX_SGL_ENTRIES)
2512 : : #define SPDK_NVMF_RDMA_DEFAULT_NUM_SHARED_BUFFERS 4095
2513 : : #define SPDK_NVMF_RDMA_DEFAULT_BUFFER_CACHE_SIZE UINT32_MAX
2514 : : #define SPDK_NVMF_RDMA_DEFAULT_NO_SRQ false
2515 : : #define SPDK_NVMF_RDMA_DIF_INSERT_OR_STRIP false
2516 : : #define SPDK_NVMF_RDMA_ACCEPTOR_BACKLOG 100
2517 : : #define SPDK_NVMF_RDMA_DEFAULT_ABORT_TIMEOUT_SEC 1
2518 : : #define SPDK_NVMF_RDMA_DEFAULT_NO_WR_BATCHING false
2519 : :
2520 : : static void
2521 : 45 : nvmf_rdma_opts_init(struct spdk_nvmf_transport_opts *opts)
2522 : : {
2523 [ + - + - ]: 45 : opts->max_queue_depth = SPDK_NVMF_RDMA_DEFAULT_MAX_QUEUE_DEPTH;
2524 [ + - + - ]: 45 : opts->max_qpairs_per_ctrlr = SPDK_NVMF_RDMA_DEFAULT_MAX_QPAIRS_PER_CTRLR;
2525 [ + - + - ]: 45 : opts->in_capsule_data_size = SPDK_NVMF_RDMA_DEFAULT_IN_CAPSULE_DATA_SIZE;
2526 [ + - + - ]: 45 : opts->max_io_size = SPDK_NVMF_RDMA_DEFAULT_MAX_IO_SIZE;
2527 [ + - + - ]: 45 : opts->io_unit_size = SPDK_NVMF_RDMA_MIN_IO_BUFFER_SIZE;
2528 [ + - + - ]: 45 : opts->max_aq_depth = SPDK_NVMF_RDMA_DEFAULT_AQ_DEPTH;
2529 [ + - + - ]: 45 : opts->num_shared_buffers = SPDK_NVMF_RDMA_DEFAULT_NUM_SHARED_BUFFERS;
2530 [ + - + - ]: 45 : opts->buf_cache_size = SPDK_NVMF_RDMA_DEFAULT_BUFFER_CACHE_SIZE;
2531 [ + - + - ]: 45 : opts->dif_insert_or_strip = SPDK_NVMF_RDMA_DIF_INSERT_OR_STRIP;
2532 [ + - + - ]: 45 : opts->abort_timeout_sec = SPDK_NVMF_RDMA_DEFAULT_ABORT_TIMEOUT_SEC;
2533 [ + - + - ]: 45 : opts->transport_specific = NULL;
2534 : 45 : }
2535 : :
2536 : : static int nvmf_rdma_destroy(struct spdk_nvmf_transport *transport,
2537 : : spdk_nvmf_transport_destroy_done_cb cb_fn, void *cb_arg);
2538 : :
2539 : : static inline bool
2540 : 2154 : nvmf_rdma_is_rxe_device(struct spdk_nvmf_rdma_device *device)
2541 : : {
2542 [ + - # # : 4308 : return device->attr.vendor_id == SPDK_RDMA_RXE_VENDOR_ID_OLD ||
# # # # ]
2543 [ - + # # : 2154 : device->attr.vendor_id == SPDK_RDMA_RXE_VENDOR_ID_NEW;
# # ]
2544 : : }
2545 : :
2546 : : static int nvmf_rdma_accept(void *ctx);
2547 : : static bool nvmf_rdma_retry_listen_port(struct spdk_nvmf_rdma_transport *rtransport);
2548 : : static void destroy_ib_device(struct spdk_nvmf_rdma_transport *rtransport,
2549 : : struct spdk_nvmf_rdma_device *device);
2550 : :
2551 : : static int
2552 : 78 : create_ib_device(struct spdk_nvmf_rdma_transport *rtransport, struct ibv_context *context,
2553 : : struct spdk_nvmf_rdma_device **new_device)
2554 : : {
2555 : 0 : struct spdk_nvmf_rdma_device *device;
2556 : 78 : int flag = 0;
2557 : 78 : int rc = 0;
2558 : :
2559 : 78 : device = calloc(1, sizeof(*device));
2560 [ - + ]: 78 : if (!device) {
2561 : 0 : SPDK_ERRLOG("Unable to allocate memory for RDMA devices.\n");
2562 : 0 : return -ENOMEM;
2563 : : }
2564 [ # # # # ]: 78 : device->context = context;
2565 [ # # # # : 78 : rc = ibv_query_device(device->context, &device->attr);
# # ]
2566 [ - + ]: 78 : if (rc < 0) {
2567 : 0 : SPDK_ERRLOG("Failed to query RDMA device attributes.\n");
2568 : 0 : free(device);
2569 : 0 : return rc;
2570 : : }
2571 : :
2572 : : #ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL
2573 [ - + # # : 78 : if ((device->attr.device_cap_flags & IBV_DEVICE_MEM_MGT_EXTENSIONS) == 0) {
# # # # ]
2574 : 0 : SPDK_WARNLOG("The libibverbs on this system supports SEND_WITH_INVALIDATE,");
2575 [ # # # # : 0 : SPDK_WARNLOG("but the device with vendor ID %u does not.\n", device->attr.vendor_id);
# # ]
2576 : 0 : }
2577 : :
2578 : : /**
2579 : : * The vendor ID is assigned by the IEEE and an ID of 0 implies Soft-RoCE.
2580 : : * The Soft-RoCE RXE driver does not currently support send with invalidate,
2581 : : * but incorrectly reports that it does. There are changes making their way
2582 : : * through the kernel now that will enable this feature. When they are merged,
2583 : : * we can conditionally enable this feature.
2584 : : *
2585 : : * TODO: enable this for versions of the kernel rxe driver that support it.
2586 : : */
2587 [ - + ]: 78 : if (nvmf_rdma_is_rxe_device(device)) {
2588 [ # # # # : 0 : device->attr.device_cap_flags &= ~(IBV_DEVICE_MEM_MGT_EXTENSIONS);
# # ]
2589 : 0 : }
2590 : : #endif
2591 : :
2592 : : /* set up device context async ev fd as NON_BLOCKING */
2593 [ # # # # : 78 : flag = fcntl(device->context->async_fd, F_GETFL);
# # # # ]
2594 [ # # # # : 78 : rc = fcntl(device->context->async_fd, F_SETFL, flag | O_NONBLOCK);
# # # # ]
2595 [ - + ]: 78 : if (rc < 0) {
2596 : 0 : SPDK_ERRLOG("Failed to set context async fd to NONBLOCK.\n");
2597 : 0 : free(device);
2598 : 0 : return rc;
2599 : : }
2600 : :
2601 [ # # # # : 78 : TAILQ_INSERT_TAIL(&rtransport->devices, device, link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
2602 [ - + - + : 78 : SPDK_DEBUGLOG(rdma, "New device %p is added to RDMA trasport\n", device);
# # ]
2603 : :
2604 [ - + ]: 78 : if (g_nvmf_hooks.get_ibv_pd) {
2605 [ # # # # : 0 : device->pd = g_nvmf_hooks.get_ibv_pd(NULL, device->context);
# # # # #
# # # ]
2606 : 0 : } else {
2607 [ # # # # : 78 : device->pd = ibv_alloc_pd(device->context);
# # # # ]
2608 : : }
2609 : :
2610 [ - + # # : 78 : if (!device->pd) {
# # ]
2611 : 0 : SPDK_ERRLOG("Unable to allocate protection domain.\n");
2612 : 0 : destroy_ib_device(rtransport, device);
2613 : 0 : return -ENOMEM;
2614 : : }
2615 : :
2616 [ - + # # : 78 : assert(device->map == NULL);
# # # # ]
2617 : :
2618 [ # # # # : 78 : device->map = spdk_rdma_create_mem_map(device->pd, &g_nvmf_hooks, SPDK_RDMA_MEMORY_MAP_ROLE_TARGET);
# # # # ]
2619 [ - + # # : 78 : if (!device->map) {
# # ]
2620 : 0 : SPDK_ERRLOG("Unable to allocate memory map for listen address\n");
2621 : 0 : destroy_ib_device(rtransport, device);
2622 : 0 : return -ENOMEM;
2623 : : }
2624 : :
2625 [ - + # # : 78 : assert(device->map != NULL);
# # # # ]
2626 [ - + # # : 78 : assert(device->pd != NULL);
# # # # ]
2627 : :
2628 [ + - ]: 78 : if (new_device) {
2629 [ # # ]: 78 : *new_device = device;
2630 : 0 : }
2631 [ # # # # ]: 78 : SPDK_NOTICELOG("Create IB device %s(%p/%p) succeed.\n", ibv_get_device_name(context->device),
2632 : : device, context);
2633 : :
2634 : 78 : return 0;
2635 : 0 : }
2636 : :
2637 : : static void
2638 : 39 : free_poll_fds(struct spdk_nvmf_rdma_transport *rtransport)
2639 : : {
2640 [ + - # # : 39 : if (rtransport->poll_fds) {
# # ]
2641 [ # # # # ]: 39 : free(rtransport->poll_fds);
2642 [ # # # # ]: 39 : rtransport->poll_fds = NULL;
2643 : 0 : }
2644 [ # # # # ]: 39 : rtransport->npoll_fds = 0;
2645 : 39 : }
2646 : :
2647 : : static int
2648 : 39 : generate_poll_fds(struct spdk_nvmf_rdma_transport *rtransport)
2649 : : {
2650 : : /* Set up poll descriptor array to monitor events from RDMA and IB
2651 : : * in a single poll syscall
2652 : : */
2653 : 39 : int device_count = 0;
2654 : 39 : int i = 0;
2655 : 0 : struct spdk_nvmf_rdma_device *device, *tmp;
2656 : :
2657 [ + + # # : 117 : TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, tmp) {
# # # # #
# # # # #
# # ]
2658 [ # # ]: 78 : device_count++;
2659 : 0 : }
2660 : :
2661 [ # # # # : 39 : rtransport->npoll_fds = device_count + 1;
# # ]
2662 : :
2663 [ # # # # : 39 : rtransport->poll_fds = calloc(rtransport->npoll_fds, sizeof(struct pollfd));
# # # # ]
2664 [ - + # # : 39 : if (rtransport->poll_fds == NULL) {
# # ]
2665 : 0 : SPDK_ERRLOG("poll_fds allocation failed\n");
2666 : 0 : return -ENOMEM;
2667 : : }
2668 : :
2669 [ # # # # : 39 : rtransport->poll_fds[i].fd = rtransport->event_channel->fd;
# # # # #
# # # # #
# # # # ]
2670 [ # # # # : 39 : rtransport->poll_fds[i++].events = POLLIN;
# # # # #
# # # ]
2671 : :
2672 [ + + # # : 117 : TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, tmp) {
# # # # #
# # # # #
# # ]
2673 [ # # # # : 78 : rtransport->poll_fds[i].fd = device->context->async_fd;
# # # # #
# # # # #
# # # # ]
2674 [ # # # # : 78 : rtransport->poll_fds[i++].events = POLLIN;
# # # # #
# # # ]
2675 : 0 : }
2676 : :
2677 : 39 : return 0;
2678 : 0 : }
2679 : :
2680 : : static struct spdk_nvmf_transport *
2681 : 39 : nvmf_rdma_create(struct spdk_nvmf_transport_opts *opts)
2682 : : {
2683 : 0 : int rc;
2684 : 0 : struct spdk_nvmf_rdma_transport *rtransport;
2685 : 0 : struct spdk_nvmf_rdma_device *device;
2686 : 0 : struct ibv_context **contexts;
2687 : 0 : uint32_t i;
2688 : 0 : int flag;
2689 : 0 : uint32_t sge_count;
2690 : 0 : uint32_t min_shared_buffers;
2691 : 0 : uint32_t min_in_capsule_data_size;
2692 : 39 : int max_device_sge = SPDK_NVMF_MAX_SGL_ENTRIES;
2693 : :
2694 : 39 : rtransport = calloc(1, sizeof(*rtransport));
2695 [ - + ]: 39 : if (!rtransport) {
2696 : 0 : return NULL;
2697 : : }
2698 : :
2699 [ # # # # : 39 : TAILQ_INIT(&rtransport->devices);
# # # # #
# # # # #
# # ]
2700 [ # # # # : 39 : TAILQ_INIT(&rtransport->ports);
# # # # #
# # # # #
# # ]
2701 [ # # # # : 39 : TAILQ_INIT(&rtransport->poll_groups);
# # # # #
# # # # #
# # ]
2702 [ # # # # : 39 : TAILQ_INIT(&rtransport->retry_ports);
# # # # #
# # # # #
# # ]
2703 : :
2704 [ # # # # : 39 : rtransport->transport.ops = &spdk_nvmf_transport_rdma;
# # ]
2705 [ # # # # : 39 : rtransport->rdma_opts.num_cqe = DEFAULT_NVMF_RDMA_CQ_SIZE;
# # ]
2706 [ # # # # : 39 : rtransport->rdma_opts.max_srq_depth = SPDK_NVMF_RDMA_DEFAULT_SRQ_DEPTH;
# # ]
2707 [ # # # # : 39 : rtransport->rdma_opts.no_srq = SPDK_NVMF_RDMA_DEFAULT_NO_SRQ;
# # ]
2708 [ # # # # : 39 : rtransport->rdma_opts.acceptor_backlog = SPDK_NVMF_RDMA_ACCEPTOR_BACKLOG;
# # ]
2709 [ # # # # : 39 : rtransport->rdma_opts.no_wr_batching = SPDK_NVMF_RDMA_DEFAULT_NO_WR_BATCHING;
# # ]
2710 [ + - - + : 78 : if (opts->transport_specific != NULL &&
# # # # ]
2711 [ # # # # ]: 39 : spdk_json_decode_object_relaxed(opts->transport_specific, rdma_transport_opts_decoder,
2712 : : SPDK_COUNTOF(rdma_transport_opts_decoder),
2713 [ # # ]: 39 : &rtransport->rdma_opts)) {
2714 : 0 : SPDK_ERRLOG("spdk_json_decode_object_relaxed failed\n");
2715 [ # # ]: 0 : nvmf_rdma_destroy(&rtransport->transport, NULL, NULL);
2716 : 0 : return NULL;
2717 : : }
2718 : :
2719 [ - + - + : 39 : SPDK_INFOLOG(rdma, "*** RDMA Transport Init ***\n"
- - - - #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # #
# ]
2720 : : " Transport opts: max_ioq_depth=%d, max_io_size=%d,\n"
2721 : : " max_io_qpairs_per_ctrlr=%d, io_unit_size=%d,\n"
2722 : : " in_capsule_data_size=%d, max_aq_depth=%d,\n"
2723 : : " num_shared_buffers=%d, num_cqe=%d, max_srq_depth=%d, no_srq=%d,"
2724 : : " acceptor_backlog=%d, no_wr_batching=%d abort_timeout_sec=%d\n",
2725 : : opts->max_queue_depth,
2726 : : opts->max_io_size,
2727 : : opts->max_qpairs_per_ctrlr - 1,
2728 : : opts->io_unit_size,
2729 : : opts->in_capsule_data_size,
2730 : : opts->max_aq_depth,
2731 : : opts->num_shared_buffers,
2732 : : rtransport->rdma_opts.num_cqe,
2733 : : rtransport->rdma_opts.max_srq_depth,
2734 : : rtransport->rdma_opts.no_srq,
2735 : : rtransport->rdma_opts.acceptor_backlog,
2736 : : rtransport->rdma_opts.no_wr_batching,
2737 : : opts->abort_timeout_sec);
2738 : :
2739 : : /* I/O unit size cannot be larger than max I/O size */
2740 [ - + # # : 39 : if (opts->io_unit_size > opts->max_io_size) {
# # # # #
# ]
2741 [ # # # # : 0 : opts->io_unit_size = opts->max_io_size;
# # # # ]
2742 : 0 : }
2743 : :
2744 [ - + # # : 39 : if (rtransport->rdma_opts.acceptor_backlog <= 0) {
# # # # ]
2745 : 0 : SPDK_ERRLOG("The acceptor backlog cannot be less than 1, setting to the default value of (%d).\n",
2746 : : SPDK_NVMF_RDMA_ACCEPTOR_BACKLOG);
2747 [ # # # # : 0 : rtransport->rdma_opts.acceptor_backlog = SPDK_NVMF_RDMA_ACCEPTOR_BACKLOG;
# # ]
2748 : 0 : }
2749 : :
2750 [ - + # # : 39 : if (opts->num_shared_buffers < (SPDK_NVMF_MAX_SGL_ENTRIES * 2)) {
# # ]
2751 [ # # # # ]: 0 : SPDK_ERRLOG("The number of shared data buffers (%d) is less than"
2752 : : "the minimum number required to guarantee that forward progress can be made (%d)\n",
2753 : : opts->num_shared_buffers, (SPDK_NVMF_MAX_SGL_ENTRIES * 2));
2754 [ # # ]: 0 : nvmf_rdma_destroy(&rtransport->transport, NULL, NULL);
2755 : 0 : return NULL;
2756 : : }
2757 : :
2758 : : /* If buf_cache_size == UINT32_MAX, we will dynamically pick a cache size later that we know will fit. */
2759 [ - + # # : 39 : if (opts->buf_cache_size < UINT32_MAX) {
# # ]
2760 [ # # # # ]: 0 : min_shared_buffers = spdk_env_get_core_count() * opts->buf_cache_size;
2761 [ # # # # : 0 : if (min_shared_buffers > opts->num_shared_buffers) {
# # ]
2762 [ # # # # ]: 0 : SPDK_ERRLOG("There are not enough buffers to satisfy"
2763 : : "per-poll group caches for each thread. (%" PRIu32 ")"
2764 : : "supplied. (%" PRIu32 ") required\n", opts->num_shared_buffers, min_shared_buffers);
2765 : 0 : SPDK_ERRLOG("Please specify a larger number of shared buffers\n");
2766 [ # # ]: 0 : nvmf_rdma_destroy(&rtransport->transport, NULL, NULL);
2767 : 0 : return NULL;
2768 : : }
2769 : 0 : }
2770 : :
2771 [ - + # # : 39 : sge_count = opts->max_io_size / opts->io_unit_size;
# # # # #
# ]
2772 [ - + ]: 39 : if (sge_count > NVMF_DEFAULT_TX_SGE) {
2773 [ # # # # ]: 0 : SPDK_ERRLOG("Unsupported IO Unit size specified, %d bytes\n", opts->io_unit_size);
2774 [ # # ]: 0 : nvmf_rdma_destroy(&rtransport->transport, NULL, NULL);
2775 : 0 : return NULL;
2776 : : }
2777 : :
2778 : 39 : min_in_capsule_data_size = sizeof(struct spdk_nvme_sgl_descriptor) * SPDK_NVMF_MAX_SGL_ENTRIES;
2779 [ + + # # : 39 : if (opts->in_capsule_data_size < min_in_capsule_data_size) {
# # ]
2780 : 4 : SPDK_WARNLOG("In capsule data size is set to %u, this is minimum size required to support msdbd=16\n",
2781 : : min_in_capsule_data_size);
2782 [ # # # # ]: 4 : opts->in_capsule_data_size = min_in_capsule_data_size;
2783 : 0 : }
2784 : :
2785 [ # # # # ]: 39 : rtransport->event_channel = rdma_create_event_channel();
2786 [ - + # # : 39 : if (rtransport->event_channel == NULL) {
# # ]
2787 [ # # ]: 0 : SPDK_ERRLOG("rdma_create_event_channel() failed, %s\n", spdk_strerror(errno));
2788 [ # # ]: 0 : nvmf_rdma_destroy(&rtransport->transport, NULL, NULL);
2789 : 0 : return NULL;
2790 : : }
2791 : :
2792 [ # # # # : 39 : flag = fcntl(rtransport->event_channel->fd, F_GETFL);
# # # # ]
2793 [ - + # # : 39 : if (fcntl(rtransport->event_channel->fd, F_SETFL, flag | O_NONBLOCK) < 0) {
# # # # #
# ]
2794 [ # # # # : 0 : SPDK_ERRLOG("fcntl can't set nonblocking mode for socket, fd: %d (%s)\n",
# # # # #
# ]
2795 : : rtransport->event_channel->fd, spdk_strerror(errno));
2796 [ # # ]: 0 : nvmf_rdma_destroy(&rtransport->transport, NULL, NULL);
2797 : 0 : return NULL;
2798 : : }
2799 : :
2800 [ # # # # ]: 39 : rtransport->data_wr_pool = spdk_mempool_create("spdk_nvmf_rdma_wr_data",
2801 [ # # # # : 39 : opts->max_queue_depth * SPDK_NVMF_MAX_SGL_ENTRIES,
# # ]
2802 : : sizeof(struct spdk_nvmf_rdma_request_data),
2803 : : SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
2804 : : SPDK_ENV_SOCKET_ID_ANY);
2805 [ - + # # : 39 : if (!rtransport->data_wr_pool) {
# # ]
2806 [ # # ]: 0 : if (spdk_mempool_lookup("spdk_nvmf_rdma_wr_data") != NULL) {
2807 : 0 : SPDK_ERRLOG("Unable to allocate work request pool for poll group: already exists\n");
2808 : 0 : SPDK_ERRLOG("Probably running in multiprocess environment, which is "
2809 : : "unsupported by the nvmf library\n");
2810 : 0 : } else {
2811 : 0 : SPDK_ERRLOG("Unable to allocate work request pool for poll group\n");
2812 : : }
2813 [ # # ]: 0 : nvmf_rdma_destroy(&rtransport->transport, NULL, NULL);
2814 : 0 : return NULL;
2815 : : }
2816 : :
2817 : 39 : contexts = rdma_get_devices(NULL);
2818 [ - + ]: 39 : if (contexts == NULL) {
2819 [ # # # # ]: 0 : SPDK_ERRLOG("rdma_get_devices() failed: %s (%d)\n", spdk_strerror(errno), errno);
2820 [ # # ]: 0 : nvmf_rdma_destroy(&rtransport->transport, NULL, NULL);
2821 : 0 : return NULL;
2822 : : }
2823 : :
2824 : 39 : i = 0;
2825 : 39 : rc = 0;
2826 [ + + # # : 117 : while (contexts[i] != NULL) {
# # ]
2827 [ # # # # ]: 78 : rc = create_ib_device(rtransport, contexts[i], &device);
2828 [ - + ]: 78 : if (rc < 0) {
2829 : 0 : break;
2830 : : }
2831 : 78 : i++;
2832 [ # # # # : 78 : max_device_sge = spdk_min(max_device_sge, device->attr.max_sge);
# # # # #
# # # #
# ]
2833 [ # # # # ]: 78 : device->is_ready = true;
2834 : : }
2835 : 39 : rdma_free_devices(contexts);
2836 : :
2837 [ - + # # : 39 : if (opts->io_unit_size * max_device_sge < opts->max_io_size) {
# # # # #
# ]
2838 : : /* divide and round up. */
2839 [ # # # # : 0 : opts->io_unit_size = (opts->max_io_size + max_device_sge - 1) / max_device_sge;
# # # # #
# ]
2840 : :
2841 : : /* round up to the nearest 4k. */
2842 [ # # # # : 0 : opts->io_unit_size = (opts->io_unit_size + NVMF_DATA_BUFFER_ALIGNMENT - 1) & ~NVMF_DATA_BUFFER_MASK;
# # # # #
# # # ]
2843 : :
2844 [ # # # # : 0 : opts->io_unit_size = spdk_max(opts->io_unit_size, SPDK_NVMF_RDMA_MIN_IO_BUFFER_SIZE);
# # # # #
# # # #
# ]
2845 [ # # # # ]: 0 : SPDK_NOTICELOG("Adjusting the io unit size to fit the device's maximum I/O size. New I/O unit size %u\n",
2846 : : opts->io_unit_size);
2847 : 0 : }
2848 : :
2849 [ - + ]: 39 : if (rc < 0) {
2850 [ # # ]: 0 : nvmf_rdma_destroy(&rtransport->transport, NULL, NULL);
2851 : 0 : return NULL;
2852 : : }
2853 : :
2854 : 39 : rc = generate_poll_fds(rtransport);
2855 [ - + ]: 39 : if (rc < 0) {
2856 [ # # ]: 0 : nvmf_rdma_destroy(&rtransport->transport, NULL, NULL);
2857 : 0 : return NULL;
2858 : : }
2859 : :
2860 [ # # # # : 39 : rtransport->accept_poller = SPDK_POLLER_REGISTER(nvmf_rdma_accept, &rtransport->transport,
# # # # #
# ]
2861 : : opts->acceptor_poll_rate);
2862 [ - + # # : 39 : if (!rtransport->accept_poller) {
# # ]
2863 [ # # ]: 0 : nvmf_rdma_destroy(&rtransport->transport, NULL, NULL);
2864 : 0 : return NULL;
2865 : : }
2866 : :
2867 [ # # ]: 39 : return &rtransport->transport;
2868 : 0 : }
2869 : :
2870 : : static void
2871 : 78 : destroy_ib_device(struct spdk_nvmf_rdma_transport *rtransport,
2872 : : struct spdk_nvmf_rdma_device *device)
2873 : : {
2874 [ + + # # : 78 : TAILQ_REMOVE(&rtransport->devices, device, link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # ]
2875 [ # # ]: 78 : spdk_rdma_free_mem_map(&device->map);
2876 [ + - # # : 78 : if (device->pd) {
# # ]
2877 [ + - ]: 78 : if (!g_nvmf_hooks.get_ibv_pd) {
2878 [ # # # # ]: 78 : ibv_dealloc_pd(device->pd);
2879 : 0 : }
2880 : 0 : }
2881 [ - + - + : 78 : SPDK_DEBUGLOG(rdma, "IB device [%p] is destroyed.\n", device);
# # ]
2882 : 78 : free(device);
2883 : 78 : }
2884 : :
2885 : : static void
2886 : 44 : nvmf_rdma_dump_opts(struct spdk_nvmf_transport *transport, struct spdk_json_write_ctx *w)
2887 : : {
2888 : 0 : struct spdk_nvmf_rdma_transport *rtransport;
2889 [ - + # # ]: 44 : assert(w != NULL);
2890 : :
2891 : 44 : rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
2892 [ # # # # : 44 : spdk_json_write_named_uint32(w, "max_srq_depth", rtransport->rdma_opts.max_srq_depth);
# # ]
2893 [ - + # # : 44 : spdk_json_write_named_bool(w, "no_srq", rtransport->rdma_opts.no_srq);
# # # # ]
2894 [ - + - + : 44 : if (rtransport->rdma_opts.no_srq == true) {
# # # # #
# ]
2895 [ # # # # : 0 : spdk_json_write_named_int32(w, "num_cqe", rtransport->rdma_opts.num_cqe);
# # ]
2896 : 0 : }
2897 [ # # # # : 44 : spdk_json_write_named_int32(w, "acceptor_backlog", rtransport->rdma_opts.acceptor_backlog);
# # ]
2898 [ - + # # : 44 : spdk_json_write_named_bool(w, "no_wr_batching", rtransport->rdma_opts.no_wr_batching);
# # # # ]
2899 : 44 : }
2900 : :
2901 : : static int
2902 : 39 : nvmf_rdma_destroy(struct spdk_nvmf_transport *transport,
2903 : : spdk_nvmf_transport_destroy_done_cb cb_fn, void *cb_arg)
2904 : : {
2905 : 0 : struct spdk_nvmf_rdma_transport *rtransport;
2906 : 0 : struct spdk_nvmf_rdma_port *port, *port_tmp;
2907 : 0 : struct spdk_nvmf_rdma_device *device, *device_tmp;
2908 : :
2909 : 39 : rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
2910 : :
2911 [ - + # # : 39 : TAILQ_FOREACH_SAFE(port, &rtransport->retry_ports, link, port_tmp) {
# # # # #
# # # # #
# # ]
2912 [ # # # # : 0 : TAILQ_REMOVE(&rtransport->retry_ports, port, link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # ]
2913 : 0 : free(port);
2914 : 0 : }
2915 : :
2916 [ - + # # : 39 : TAILQ_FOREACH_SAFE(port, &rtransport->ports, link, port_tmp) {
# # # # #
# # # # #
# # ]
2917 [ # # # # : 0 : TAILQ_REMOVE(&rtransport->ports, port, link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # ]
2918 [ # # # # ]: 0 : rdma_destroy_id(port->id);
2919 : 0 : free(port);
2920 : 0 : }
2921 : :
2922 : 39 : free_poll_fds(rtransport);
2923 : :
2924 [ + - # # : 39 : if (rtransport->event_channel != NULL) {
# # ]
2925 [ # # # # ]: 39 : rdma_destroy_event_channel(rtransport->event_channel);
2926 : 0 : }
2927 : :
2928 [ + + # # : 117 : TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, device_tmp) {
# # # # #
# # # # #
# # ]
2929 : 78 : destroy_ib_device(rtransport, device);
2930 : 0 : }
2931 : :
2932 [ + - # # : 39 : if (rtransport->data_wr_pool != NULL) {
# # ]
2933 [ # # # # : 39 : if (spdk_mempool_count(rtransport->data_wr_pool) !=
# # ]
2934 [ - + # # : 39 : (transport->opts.max_queue_depth * SPDK_NVMF_MAX_SGL_ENTRIES)) {
# # # # ]
2935 [ # # # # : 0 : SPDK_ERRLOG("transport wr pool count is %zu but should be %u\n",
# # # # #
# # # ]
2936 : : spdk_mempool_count(rtransport->data_wr_pool),
2937 : : transport->opts.max_queue_depth * SPDK_NVMF_MAX_SGL_ENTRIES);
2938 : 0 : }
2939 : 0 : }
2940 : :
2941 [ # # # # ]: 39 : spdk_mempool_free(rtransport->data_wr_pool);
2942 : :
2943 [ # # ]: 39 : spdk_poller_unregister(&rtransport->accept_poller);
2944 : 39 : free(rtransport);
2945 : :
2946 [ + - ]: 39 : if (cb_fn) {
2947 [ # # # # ]: 39 : cb_fn(cb_arg);
2948 : 0 : }
2949 : 39 : return 0;
2950 : 0 : }
2951 : :
2952 : : static int nvmf_rdma_trid_from_cm_id(struct rdma_cm_id *id,
2953 : : struct spdk_nvme_transport_id *trid,
2954 : : bool peer);
2955 : :
2956 : : static bool nvmf_rdma_rescan_devices(struct spdk_nvmf_rdma_transport *rtransport);
2957 : :
2958 : : static int
2959 : 58 : nvmf_rdma_listen(struct spdk_nvmf_transport *transport, const struct spdk_nvme_transport_id *trid,
2960 : : struct spdk_nvmf_listen_opts *listen_opts)
2961 : : {
2962 : 0 : struct spdk_nvmf_rdma_transport *rtransport;
2963 : 0 : struct spdk_nvmf_rdma_device *device;
2964 : 0 : struct spdk_nvmf_rdma_port *port, *tmp_port;
2965 : 0 : struct addrinfo *res;
2966 : 0 : struct addrinfo hints;
2967 : 0 : int family;
2968 : 0 : int rc;
2969 : 0 : long int port_val;
2970 : 58 : bool is_retry = false;
2971 : :
2972 [ - + # # : 58 : if (!strlen(trid->trsvcid)) {
# # ]
2973 : 0 : SPDK_ERRLOG("Service id is required\n");
2974 : 0 : return -EINVAL;
2975 : : }
2976 : :
2977 : 58 : rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
2978 [ - + # # : 58 : assert(rtransport->event_channel != NULL);
# # # # ]
2979 : :
2980 : 58 : port = calloc(1, sizeof(*port));
2981 [ - + ]: 58 : if (!port) {
2982 : 0 : SPDK_ERRLOG("Port allocation failed\n");
2983 : 0 : return -ENOMEM;
2984 : : }
2985 : :
2986 [ # # # # ]: 58 : port->trid = trid;
2987 : :
2988 [ + - - # : 58 : switch (trid->adrfam) {
# # # ]
2989 : 58 : case SPDK_NVMF_ADRFAM_IPV4:
2990 : 58 : family = AF_INET;
2991 : 58 : break;
2992 : 0 : case SPDK_NVMF_ADRFAM_IPV6:
2993 : 0 : family = AF_INET6;
2994 : 0 : break;
2995 : 0 : default:
2996 [ # # # # ]: 0 : SPDK_ERRLOG("Unhandled ADRFAM %d\n", trid->adrfam);
2997 : 0 : free(port);
2998 : 0 : return -EINVAL;
2999 : : }
3000 : :
3001 [ - + ]: 58 : memset(&hints, 0, sizeof(hints));
3002 [ # # ]: 58 : hints.ai_family = family;
3003 : 58 : hints.ai_flags = AI_NUMERICSERV;
3004 [ # # ]: 58 : hints.ai_socktype = SOCK_STREAM;
3005 [ # # ]: 58 : hints.ai_protocol = 0;
3006 : :
3007 : : /* Range check the trsvcid. Fail in 3 cases:
3008 : : * < 0: means that spdk_strtol hit an error
3009 : : * 0: this results in ephemeral port which we don't want
3010 : : * > 65535: port too high
3011 : : */
3012 [ # # ]: 58 : port_val = spdk_strtol(trid->trsvcid, 10);
3013 [ + - - + ]: 58 : if (port_val <= 0 || port_val > 65535) {
3014 [ # # ]: 0 : SPDK_ERRLOG("invalid trsvcid %s\n", trid->trsvcid);
3015 : 0 : free(port);
3016 : 0 : return -EINVAL;
3017 : : }
3018 : :
3019 [ # # # # ]: 58 : rc = getaddrinfo(trid->traddr, trid->trsvcid, &hints, &res);
3020 [ - + ]: 58 : if (rc) {
3021 : 0 : SPDK_ERRLOG("getaddrinfo failed: %s (%d)\n", gai_strerror(rc), rc);
3022 : 0 : free(port);
3023 [ # # ]: 0 : return -(abs(rc));
3024 : : }
3025 : :
3026 [ # # # # : 58 : rc = rdma_create_id(rtransport->event_channel, &port->id, port, RDMA_PS_TCP);
# # ]
3027 [ - + ]: 58 : if (rc < 0) {
3028 : 0 : SPDK_ERRLOG("rdma_create_id() failed\n");
3029 : 0 : freeaddrinfo(res);
3030 : 0 : free(port);
3031 : 0 : return rc;
3032 : : }
3033 : :
3034 [ # # # # : 58 : rc = rdma_bind_addr(port->id, res->ai_addr);
# # # # ]
3035 : 58 : freeaddrinfo(res);
3036 : :
3037 [ - + ]: 58 : if (rc < 0) {
3038 [ # # # # : 0 : TAILQ_FOREACH(tmp_port, &rtransport->retry_ports, link) {
# # # # #
# # # #
# ]
3039 [ # # # # : 0 : if (spdk_nvme_transport_id_compare(tmp_port->trid, trid) == 0) {
# # ]
3040 : 0 : is_retry = true;
3041 : 0 : break;
3042 : : }
3043 : 0 : }
3044 [ # # # # ]: 0 : if (!is_retry) {
3045 : 0 : SPDK_ERRLOG("rdma_bind_addr() failed\n");
3046 : 0 : }
3047 [ # # # # ]: 0 : rdma_destroy_id(port->id);
3048 : 0 : free(port);
3049 : 0 : return rc;
3050 : : }
3051 : :
3052 [ - + # # : 58 : if (!port->id->verbs) {
# # # # #
# ]
3053 : 0 : SPDK_ERRLOG("ibv_context is null\n");
3054 [ # # # # ]: 0 : rdma_destroy_id(port->id);
3055 : 0 : free(port);
3056 : 0 : return -1;
3057 : : }
3058 : :
3059 [ # # # # : 58 : rc = rdma_listen(port->id, rtransport->rdma_opts.acceptor_backlog);
# # # # #
# ]
3060 [ - + ]: 58 : if (rc < 0) {
3061 : 0 : SPDK_ERRLOG("rdma_listen() failed\n");
3062 [ # # # # ]: 0 : rdma_destroy_id(port->id);
3063 : 0 : free(port);
3064 : 0 : return rc;
3065 : : }
3066 : :
3067 [ + - # # : 59 : TAILQ_FOREACH(device, &rtransport->devices, link) {
# # # # #
# # # #
# ]
3068 [ + + - + : 59 : if (device->context == port->id->verbs && device->is_ready) {
+ - # # #
# # # # #
# # # # #
# # # ]
3069 [ # # # # ]: 58 : port->device = device;
3070 : 58 : break;
3071 : : }
3072 : 0 : }
3073 [ - + # # : 58 : if (!port->device) {
# # ]
3074 [ # # # # : 0 : SPDK_ERRLOG("Accepted a connection with verbs %p, but unable to find a corresponding device.\n",
# # # # ]
3075 : : port->id->verbs);
3076 [ # # # # ]: 0 : rdma_destroy_id(port->id);
3077 : 0 : free(port);
3078 : 0 : nvmf_rdma_rescan_devices(rtransport);
3079 : 0 : return -EINVAL;
3080 : : }
3081 : :
3082 [ # # # # ]: 58 : SPDK_NOTICELOG("*** NVMe/RDMA Target Listening on %s port %s ***\n",
3083 : : trid->traddr, trid->trsvcid);
3084 : :
3085 [ # # # # : 58 : TAILQ_INSERT_TAIL(&rtransport->ports, port, link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
3086 : 58 : return 0;
3087 : 0 : }
3088 : :
3089 : : static void
3090 : 58 : nvmf_rdma_stop_listen_ex(struct spdk_nvmf_transport *transport,
3091 : : const struct spdk_nvme_transport_id *trid, bool need_retry)
3092 : : {
3093 : 0 : struct spdk_nvmf_rdma_transport *rtransport;
3094 : 0 : struct spdk_nvmf_rdma_port *port, *tmp;
3095 : :
3096 : 58 : rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
3097 : :
3098 [ + - # # ]: 58 : if (!need_retry) {
3099 [ - + # # : 58 : TAILQ_FOREACH_SAFE(port, &rtransport->retry_ports, link, tmp) {
# # # # #
# # # # #
# # ]
3100 [ # # # # : 0 : if (spdk_nvme_transport_id_compare(port->trid, trid) == 0) {
# # ]
3101 [ # # # # : 0 : TAILQ_REMOVE(&rtransport->retry_ports, port, link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # ]
3102 : 0 : free(port);
3103 : 0 : }
3104 : 0 : }
3105 : 0 : }
3106 : :
3107 [ + - # # : 64 : TAILQ_FOREACH_SAFE(port, &rtransport->ports, link, tmp) {
# # # # #
# # # # #
# # ]
3108 [ + + # # : 64 : if (spdk_nvme_transport_id_compare(port->trid, trid) == 0) {
# # ]
3109 [ - + - + : 58 : SPDK_DEBUGLOG(rdma, "Port %s:%s removed. need retry: %d\n",
# # # # #
# # # # #
# # # # #
# ]
3110 : : port->trid->traddr, port->trid->trsvcid, need_retry);
3111 [ + + # # : 58 : TAILQ_REMOVE(&rtransport->ports, port, link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # ]
3112 [ # # # # ]: 58 : rdma_destroy_id(port->id);
3113 [ # # # # ]: 58 : port->id = NULL;
3114 [ # # # # ]: 58 : port->device = NULL;
3115 [ - + # # ]: 58 : if (need_retry) {
3116 [ # # # # : 0 : TAILQ_INSERT_TAIL(&rtransport->retry_ports, port, link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
3117 : 0 : } else {
3118 : 58 : free(port);
3119 : : }
3120 : 58 : break;
3121 : : }
3122 : 0 : }
3123 : 58 : }
3124 : :
3125 : : static void
3126 : 58 : nvmf_rdma_stop_listen(struct spdk_nvmf_transport *transport,
3127 : : const struct spdk_nvme_transport_id *trid)
3128 : : {
3129 : 58 : nvmf_rdma_stop_listen_ex(transport, trid, false);
3130 : 58 : }
3131 : :
3132 : : static void _nvmf_rdma_register_poller_in_group(void *c);
3133 : : static void _nvmf_rdma_remove_poller_in_group(void *c);
3134 : :
3135 : : static bool
3136 : 0 : nvmf_rdma_all_pollers_management_done(void *c)
3137 : : {
3138 : 0 : struct poller_manage_ctx *ctx = c;
3139 : 0 : int counter;
3140 : :
3141 [ # # # # ]: 0 : counter = __atomic_sub_fetch(ctx->inflight_op_counter, 1, __ATOMIC_SEQ_CST);
3142 [ # # # # : 0 : SPDK_DEBUGLOG(rdma, "nvmf_rdma_all_pollers_management_done called. counter: %d, poller: %p\n",
# # # # #
# ]
3143 : : counter, ctx->rpoller);
3144 : :
3145 [ # # ]: 0 : if (counter == 0) {
3146 [ # # # # ]: 0 : free((void *)ctx->inflight_op_counter);
3147 : 0 : }
3148 : 0 : free(ctx);
3149 : :
3150 : 0 : return counter == 0;
3151 : 0 : }
3152 : :
3153 : : static int
3154 : 0 : nvmf_rdma_manage_poller(struct spdk_nvmf_rdma_transport *rtransport,
3155 : : struct spdk_nvmf_rdma_device *device, bool *has_inflight, bool is_add)
3156 : : {
3157 : 0 : struct spdk_nvmf_rdma_poll_group *rgroup;
3158 : 0 : struct spdk_nvmf_rdma_poller *rpoller;
3159 : 0 : struct spdk_nvmf_poll_group *poll_group;
3160 : 0 : struct poller_manage_ctx *ctx;
3161 : 0 : bool found;
3162 : 0 : int *inflight_counter;
3163 : 0 : spdk_msg_fn do_fn;
3164 : :
3165 [ # # ]: 0 : *has_inflight = false;
3166 [ # # ]: 0 : do_fn = is_add ? _nvmf_rdma_register_poller_in_group : _nvmf_rdma_remove_poller_in_group;
3167 : 0 : inflight_counter = calloc(1, sizeof(int));
3168 [ # # ]: 0 : if (!inflight_counter) {
3169 : 0 : SPDK_ERRLOG("Failed to allocate inflight counter when removing pollers\n");
3170 : 0 : return -ENOMEM;
3171 : : }
3172 : :
3173 [ # # # # : 0 : TAILQ_FOREACH(rgroup, &rtransport->poll_groups, link) {
# # # # #
# # # #
# ]
3174 [ # # ]: 0 : (*inflight_counter)++;
3175 : 0 : }
3176 : :
3177 [ # # # # : 0 : TAILQ_FOREACH(rgroup, &rtransport->poll_groups, link) {
# # # # #
# # # #
# ]
3178 : 0 : found = false;
3179 [ # # # # : 0 : TAILQ_FOREACH(rpoller, &rgroup->pollers, link) {
# # # # #
# # # #
# ]
3180 [ # # # # : 0 : if (rpoller->device == device) {
# # ]
3181 : 0 : found = true;
3182 : 0 : break;
3183 : : }
3184 : 0 : }
3185 [ # # # # : 0 : if (found == is_add) {
# # ]
3186 : 0 : __atomic_fetch_sub(inflight_counter, 1, __ATOMIC_SEQ_CST);
3187 : 0 : continue;
3188 : : }
3189 : :
3190 : 0 : ctx = calloc(1, sizeof(struct poller_manage_ctx));
3191 [ # # ]: 0 : if (!ctx) {
3192 : 0 : SPDK_ERRLOG("Failed to allocate poller_manage_ctx when removing pollers\n");
3193 [ # # # # : 0 : if (!*has_inflight) {
# # ]
3194 : 0 : free(inflight_counter);
3195 : 0 : }
3196 : 0 : return -ENOMEM;
3197 : : }
3198 : :
3199 [ # # # # ]: 0 : ctx->rtransport = rtransport;
3200 [ # # # # ]: 0 : ctx->rgroup = rgroup;
3201 [ # # # # ]: 0 : ctx->rpoller = rpoller;
3202 [ # # # # ]: 0 : ctx->device = device;
3203 [ # # # # ]: 0 : ctx->thread = spdk_get_thread();
3204 [ # # # # ]: 0 : ctx->inflight_op_counter = inflight_counter;
3205 [ # # ]: 0 : *has_inflight = true;
3206 : :
3207 [ # # # # : 0 : poll_group = rgroup->group.group;
# # ]
3208 [ # # # # : 0 : if (poll_group->thread != spdk_get_thread()) {
# # ]
3209 [ # # # # ]: 0 : spdk_thread_send_msg(poll_group->thread, do_fn, ctx);
3210 : 0 : } else {
3211 [ # # # # ]: 0 : do_fn(ctx);
3212 : : }
3213 : 0 : }
3214 : :
3215 [ # # # # : 0 : if (!*has_inflight) {
# # ]
3216 : 0 : free(inflight_counter);
3217 : 0 : }
3218 : :
3219 : 0 : return 0;
3220 : 0 : }
3221 : :
3222 : : static void nvmf_rdma_handle_device_removal(struct spdk_nvmf_rdma_transport *rtransport,
3223 : : struct spdk_nvmf_rdma_device *device);
3224 : :
3225 : : static struct spdk_nvmf_rdma_device *
3226 : 0 : nvmf_rdma_find_ib_device(struct spdk_nvmf_rdma_transport *rtransport,
3227 : : struct ibv_context *context)
3228 : : {
3229 : 0 : struct spdk_nvmf_rdma_device *device, *tmp_device;
3230 : :
3231 [ # # # # : 0 : TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, tmp_device) {
# # # # #
# # # # #
# # ]
3232 [ # # # # : 0 : if (device->need_destroy) {
# # # # ]
3233 : 0 : continue;
3234 : : }
3235 : :
3236 [ # # # # : 0 : if (strcmp(device->context->device->dev_name, context->device->dev_name) == 0) {
# # # # #
# # # # #
# # # # #
# # # ]
3237 : 0 : return device;
3238 : : }
3239 : 0 : }
3240 : :
3241 : 0 : return NULL;
3242 : 0 : }
3243 : :
3244 : : static bool
3245 : 0 : nvmf_rdma_check_devices_context(struct spdk_nvmf_rdma_transport *rtransport,
3246 : : struct ibv_context *context)
3247 : : {
3248 : 0 : struct spdk_nvmf_rdma_device *old_device, *new_device;
3249 : 0 : int rc = 0;
3250 : 0 : bool has_inflight;
3251 : :
3252 : 0 : old_device = nvmf_rdma_find_ib_device(rtransport, context);
3253 : :
3254 [ # # ]: 0 : if (old_device) {
3255 [ # # # # : 0 : if (old_device->context != context && !old_device->need_destroy && old_device->is_ready) {
# # # # #
# # # # #
# # # # #
# # # ]
3256 : : /* context may not have time to be cleaned when rescan. exactly one context
3257 : : * is valid for a device so this context must be invalid and just remove it. */
3258 [ # # # # ]: 0 : SPDK_WARNLOG("Device %p has a invalid context %p\n", old_device, old_device->context);
3259 [ # # # # ]: 0 : old_device->need_destroy = true;
3260 : 0 : nvmf_rdma_handle_device_removal(rtransport, old_device);
3261 : 0 : }
3262 : 0 : return false;
3263 : : }
3264 : :
3265 : 0 : rc = create_ib_device(rtransport, context, &new_device);
3266 : : /* TODO: update transport opts. */
3267 [ # # ]: 0 : if (rc < 0) {
3268 [ # # # # ]: 0 : SPDK_ERRLOG("Failed to create ib device for context: %s(%p)\n",
3269 : : ibv_get_device_name(context->device), context);
3270 : 0 : return false;
3271 : : }
3272 : :
3273 : 0 : rc = nvmf_rdma_manage_poller(rtransport, new_device, &has_inflight, true);
3274 [ # # ]: 0 : if (rc < 0) {
3275 [ # # # # ]: 0 : SPDK_ERRLOG("Failed to add poller for device context: %s(%p)\n",
3276 : : ibv_get_device_name(context->device), context);
3277 : 0 : return false;
3278 : : }
3279 : :
3280 [ # # # # ]: 0 : if (has_inflight) {
3281 [ # # # # ]: 0 : new_device->is_ready = true;
3282 : 0 : }
3283 : :
3284 : 0 : return true;
3285 : 0 : }
3286 : :
3287 : : static bool
3288 : 0 : nvmf_rdma_rescan_devices(struct spdk_nvmf_rdma_transport *rtransport)
3289 : : {
3290 : 0 : struct spdk_nvmf_rdma_device *device;
3291 : 0 : struct ibv_device **ibv_device_list = NULL;
3292 : 0 : struct ibv_context **contexts = NULL;
3293 : 0 : int i = 0;
3294 : 0 : int num_dev = 0;
3295 : 0 : bool new_create = false, has_new_device = false;
3296 : 0 : struct ibv_context *tmp_verbs = NULL;
3297 : :
3298 : : /* do not rescan when any device is destroying, or context may be freed when
3299 : : * regenerating the poll fds.
3300 : : */
3301 [ # # # # : 0 : TAILQ_FOREACH(device, &rtransport->devices, link) {
# # # # #
# # # #
# ]
3302 [ # # # # : 0 : if (device->need_destroy) {
# # # # ]
3303 : 0 : return false;
3304 : : }
3305 : 0 : }
3306 : :
3307 : 0 : ibv_device_list = ibv_get_device_list(&num_dev);
3308 : :
3309 : : /* There is a bug in librdmacm. If verbs init failed in rdma_get_devices, it'll be
3310 : : * marked as dead verbs and never be init again. So we need to make sure the
3311 : : * verbs is available before we call rdma_get_devices. */
3312 [ # # ]: 0 : if (num_dev >= 0) {
3313 [ # # # # ]: 0 : for (i = 0; i < num_dev; i++) {
3314 [ # # # # ]: 0 : tmp_verbs = ibv_open_device(ibv_device_list[i]);
3315 [ # # ]: 0 : if (!tmp_verbs) {
3316 [ # # # # : 0 : SPDK_WARNLOG("Failed to init ibv device %p, err %d. Skip rescan.\n", ibv_device_list[i], errno);
# # ]
3317 : 0 : break;
3318 : : }
3319 [ # # ]: 0 : if (nvmf_rdma_find_ib_device(rtransport, tmp_verbs) == NULL) {
3320 [ # # # # : 0 : SPDK_DEBUGLOG(rdma, "Find new verbs init ibv device %p(%s).\n", ibv_device_list[i],
# # # # #
# # # # #
# # ]
3321 : : tmp_verbs->device->dev_name);
3322 : 0 : has_new_device = true;
3323 : 0 : }
3324 : 0 : ibv_close_device(tmp_verbs);
3325 : 0 : }
3326 : 0 : ibv_free_device_list(ibv_device_list);
3327 [ # # # # : 0 : if (!tmp_verbs || !has_new_device) {
# # ]
3328 : 0 : return false;
3329 : : }
3330 : 0 : }
3331 : :
3332 : 0 : contexts = rdma_get_devices(NULL);
3333 : :
3334 [ # # # # : 0 : for (i = 0; contexts && contexts[i] != NULL; i++) {
# # # # #
# ]
3335 [ # # # # : 0 : new_create |= nvmf_rdma_check_devices_context(rtransport, contexts[i]);
# # ]
3336 : 0 : }
3337 : :
3338 [ # # # # ]: 0 : if (new_create) {
3339 : 0 : free_poll_fds(rtransport);
3340 : 0 : generate_poll_fds(rtransport);
3341 : 0 : }
3342 : :
3343 [ # # ]: 0 : if (contexts) {
3344 : 0 : rdma_free_devices(contexts);
3345 : 0 : }
3346 : :
3347 [ # # ]: 0 : return new_create;
3348 : 0 : }
3349 : :
3350 : : static bool
3351 : 89270 : nvmf_rdma_retry_listen_port(struct spdk_nvmf_rdma_transport *rtransport)
3352 : : {
3353 : 0 : struct spdk_nvmf_rdma_port *port, *tmp_port;
3354 : 89270 : int rc = 0;
3355 : 89270 : bool new_create = false;
3356 : :
3357 [ + - # # : 89270 : if (TAILQ_EMPTY(&rtransport->retry_ports)) {
# # # # ]
3358 : 89270 : return false;
3359 : : }
3360 : :
3361 : 0 : new_create = nvmf_rdma_rescan_devices(rtransport);
3362 : :
3363 [ # # # # : 0 : TAILQ_FOREACH_SAFE(port, &rtransport->retry_ports, link, tmp_port) {
# # # # #
# # # # #
# # ]
3364 [ # # # # : 0 : rc = nvmf_rdma_listen(&rtransport->transport, port->trid, NULL);
# # ]
3365 : :
3366 [ # # # # : 0 : TAILQ_REMOVE(&rtransport->retry_ports, port, link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # ]
3367 [ # # ]: 0 : if (rc) {
3368 [ # # # # ]: 0 : if (new_create) {
3369 [ # # # # : 0 : SPDK_ERRLOG("Found new IB device but port %s:%s is still failed(%d) to listen.\n",
# # # # #
# # # ]
3370 : : port->trid->traddr, port->trid->trsvcid, rc);
3371 : 0 : }
3372 [ # # # # : 0 : TAILQ_INSERT_TAIL(&rtransport->retry_ports, port, link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
3373 : 0 : break;
3374 : : } else {
3375 [ # # # # : 0 : SPDK_NOTICELOG("Port %s:%s come back\n", port->trid->traddr, port->trid->trsvcid);
# # # # #
# # # ]
3376 : 0 : free(port);
3377 : : }
3378 : 0 : }
3379 : :
3380 : 0 : return true;
3381 : 0 : }
3382 : :
3383 : : static void
3384 : 14161161 : nvmf_rdma_qpair_process_pending(struct spdk_nvmf_rdma_transport *rtransport,
3385 : : struct spdk_nvmf_rdma_qpair *rqpair, bool drain)
3386 : : {
3387 : 0 : struct spdk_nvmf_request *req, *tmp;
3388 : 0 : struct spdk_nvmf_rdma_request *rdma_req, *req_tmp;
3389 : 0 : struct spdk_nvmf_rdma_resources *resources;
3390 : :
3391 : : /* First process requests which are waiting for response to be sent */
3392 [ + + # # : 14161244 : STAILQ_FOREACH_SAFE(rdma_req, &rqpair->pending_rdma_send_queue, state_link, req_tmp) {
# # # # #
# # # # #
# # ]
3393 [ + + + - : 107 : if (nvmf_rdma_request_process(rtransport, rdma_req) == false && drain == false) {
# # ]
3394 : 24 : break;
3395 : : }
3396 : 0 : }
3397 : :
3398 : : /* We process I/O in the data transfer pending queue at the highest priority. */
3399 [ + + # # : 14540078 : STAILQ_FOREACH_SAFE(rdma_req, &rqpair->pending_rdma_read_queue, state_link, req_tmp) {
# # # # #
# # # # #
# # ]
3400 [ + + + - : 2069232 : if (nvmf_rdma_request_process(rtransport, rdma_req) == false && drain == false) {
# # ]
3401 : 1690315 : break;
3402 : : }
3403 : 0 : }
3404 : :
3405 : : /* Then RDMA writes since reads have stronger restrictions than writes */
3406 [ + + # # : 14167543 : STAILQ_FOREACH_SAFE(rdma_req, &rqpair->pending_rdma_write_queue, state_link, req_tmp) {
# # # # #
# # # # #
# # ]
3407 [ + + + - : 19566 : if (nvmf_rdma_request_process(rtransport, rdma_req) == false && drain == false) {
# # ]
3408 : 13184 : break;
3409 : : }
3410 : 0 : }
3411 : :
3412 : : /* Then we handle request waiting on memory buffers. */
3413 [ + + # # : 14747957 : STAILQ_FOREACH_SAFE(req, &rqpair->poller->group->group.pending_buf_queue, buf_link, tmp) {
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
3414 : 2032750 : rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req);
3415 [ + + + - : 2032750 : if (nvmf_rdma_request_process(rtransport, rdma_req) == false && drain == false) {
# # ]
3416 : 1445954 : break;
3417 : : }
3418 : 0 : }
3419 : :
3420 [ # # # # ]: 14161161 : resources = rqpair->resources;
3421 [ + - + + : 20836075 : while (!STAILQ_EMPTY(&resources->free_queue) && !STAILQ_EMPTY(&resources->incoming_queue)) {
# # # # #
# # # # #
# # ]
3422 [ # # # # : 6674914 : rdma_req = STAILQ_FIRST(&resources->free_queue);
# # ]
3423 [ - + # # : 6674914 : STAILQ_REMOVE_HEAD(&resources->free_queue, state_link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# ]
3424 [ # # # # : 6674914 : rdma_req->recv = STAILQ_FIRST(&resources->incoming_queue);
# # # # #
# ]
3425 [ + - # # : 6674914 : STAILQ_REMOVE_HEAD(&resources->incoming_queue, link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# ]
3426 : :
3427 [ + - # # : 6674914 : if (rqpair->srq != NULL) {
# # ]
3428 [ # # # # : 6674914 : rdma_req->req.qpair = &rdma_req->recv->qpair->qpair;
# # # # #
# # # # #
# # ]
3429 [ # # # # : 6674914 : rdma_req->recv->qpair->qd++;
# # # # #
# ]
3430 : 0 : } else {
3431 [ # # ]: 0 : rqpair->qd++;
3432 : : }
3433 : :
3434 [ # # # # : 6674914 : rdma_req->receive_tsc = rdma_req->recv->receive_tsc;
# # # # #
# # # ]
3435 [ # # # # ]: 6674914 : rdma_req->state = RDMA_REQUEST_STATE_NEW;
3436 [ - + ]: 6674914 : if (nvmf_rdma_request_process(rtransport, rdma_req) == false) {
3437 : 0 : break;
3438 : : }
3439 : : }
3440 [ - + - - : 14161161 : if (!STAILQ_EMPTY(&resources->incoming_queue) && STAILQ_EMPTY(&resources->free_queue)) {
# # # # #
# # # # #
# # ]
3441 [ # # # # : 0 : rqpair->poller->stat.pending_free_request++;
# # # # ]
3442 : 0 : }
3443 : 14161161 : }
3444 : :
3445 : : static inline bool
3446 : 2076 : nvmf_rdma_can_ignore_last_wqe_reached(struct spdk_nvmf_rdma_device *device)
3447 : : {
3448 : : /* iWARP transport and SoftRoCE driver don't support LAST_WQE_REACHED ibv async event */
3449 [ + - ]: 4152 : return nvmf_rdma_is_rxe_device(device) ||
3450 [ - + # # : 2076 : device->context->device->transport_type == IBV_TRANSPORT_IWARP;
# # # # #
# # # ]
3451 : : }
3452 : :
3453 : : static void
3454 : 4778 : nvmf_rdma_destroy_drained_qpair(struct spdk_nvmf_rdma_qpair *rqpair)
3455 : : {
3456 [ # # # # : 4778 : struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport,
# # ]
3457 : : struct spdk_nvmf_rdma_transport, transport);
3458 : :
3459 : 4778 : nvmf_rdma_qpair_process_pending(rtransport, rqpair, true);
3460 : :
3461 : : /* nvmf_rdma_close_qpair is not called */
3462 [ - + + + : 4778 : if (!rqpair->to_close) {
# # # # ]
3463 : 53 : return;
3464 : : }
3465 : :
3466 : : /* device is already destroyed and we should force destroy this qpair. */
3467 [ + - - + : 4725 : if (rqpair->poller && rqpair->poller->need_destroy) {
- + # # #
# # # # #
# # # # ]
3468 : 0 : nvmf_rdma_qpair_destroy(rqpair);
3469 : 0 : return;
3470 : : }
3471 : :
3472 : : /* In non SRQ path, we will reach rqpair->max_queue_depth. In SRQ path, we will get the last_wqe event. */
3473 [ + + # # : 4725 : if (rqpair->current_send_depth != 0) {
# # ]
3474 : 610 : return;
3475 : : }
3476 : :
3477 [ - + - - : 4115 : if (rqpair->srq == NULL && rqpair->current_recv_depth != rqpair->max_queue_depth) {
# # # # #
# # # # #
# # ]
3478 : 0 : return;
3479 : : }
3480 : :
3481 [ + - - + : 4115 : if (rqpair->srq != NULL && rqpair->last_wqe_reached == false &&
+ + # # #
# # # # #
# # ]
3482 [ + - # # ]: 2076 : !nvmf_rdma_can_ignore_last_wqe_reached(rqpair->device)) {
3483 : 2076 : return;
3484 : : }
3485 : :
3486 [ - + # # : 2039 : assert(rqpair->qpair.state == SPDK_NVMF_QPAIR_ERROR);
# # # # #
# ]
3487 : :
3488 : 2039 : nvmf_rdma_qpair_destroy(rqpair);
3489 [ # # ]: 0 : }
3490 : :
3491 : : static int
3492 : 2010 : nvmf_rdma_disconnect(struct rdma_cm_event *evt, bool *event_acked)
3493 : : {
3494 : 0 : struct spdk_nvmf_qpair *qpair;
3495 : 0 : struct spdk_nvmf_rdma_qpair *rqpair;
3496 : :
3497 [ - + # # : 2010 : if (evt->id == NULL) {
# # ]
3498 : 0 : SPDK_ERRLOG("disconnect request: missing cm_id\n");
3499 : 0 : return -1;
3500 : : }
3501 : :
3502 [ # # # # : 2010 : qpair = evt->id->context;
# # # # ]
3503 [ - + ]: 2010 : if (qpair == NULL) {
3504 : 0 : SPDK_ERRLOG("disconnect request: no active connection\n");
3505 : 0 : return -1;
3506 : : }
3507 : :
3508 : 2010 : rdma_ack_cm_event(evt);
3509 [ # # ]: 2010 : *event_acked = true;
3510 : :
3511 : 2010 : rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
3512 : :
3513 [ + + + - : 2010 : spdk_trace_record(TRACE_RDMA_QP_DISCONNECT, 0, 0, (uintptr_t)rqpair);
# # # # #
# # # # #
# # # # ]
3514 : :
3515 [ # # ]: 2010 : spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL);
3516 : :
3517 : 2010 : return 0;
3518 : 0 : }
3519 : :
3520 : : #ifdef DEBUG
3521 : : static const char *CM_EVENT_STR[] = {
3522 : : "RDMA_CM_EVENT_ADDR_RESOLVED",
3523 : : "RDMA_CM_EVENT_ADDR_ERROR",
3524 : : "RDMA_CM_EVENT_ROUTE_RESOLVED",
3525 : : "RDMA_CM_EVENT_ROUTE_ERROR",
3526 : : "RDMA_CM_EVENT_CONNECT_REQUEST",
3527 : : "RDMA_CM_EVENT_CONNECT_RESPONSE",
3528 : : "RDMA_CM_EVENT_CONNECT_ERROR",
3529 : : "RDMA_CM_EVENT_UNREACHABLE",
3530 : : "RDMA_CM_EVENT_REJECTED",
3531 : : "RDMA_CM_EVENT_ESTABLISHED",
3532 : : "RDMA_CM_EVENT_DISCONNECTED",
3533 : : "RDMA_CM_EVENT_DEVICE_REMOVAL",
3534 : : "RDMA_CM_EVENT_MULTICAST_JOIN",
3535 : : "RDMA_CM_EVENT_MULTICAST_ERROR",
3536 : : "RDMA_CM_EVENT_ADDR_CHANGE",
3537 : : "RDMA_CM_EVENT_TIMEWAIT_EXIT"
3538 : : };
3539 : : #endif /* DEBUG */
3540 : :
3541 : : static void
3542 : 0 : nvmf_rdma_disconnect_qpairs_on_port(struct spdk_nvmf_rdma_transport *rtransport,
3543 : : struct spdk_nvmf_rdma_port *port)
3544 : : {
3545 : 0 : struct spdk_nvmf_rdma_poll_group *rgroup;
3546 : 0 : struct spdk_nvmf_rdma_poller *rpoller;
3547 : 0 : struct spdk_nvmf_rdma_qpair *rqpair;
3548 : :
3549 [ # # # # : 0 : TAILQ_FOREACH(rgroup, &rtransport->poll_groups, link) {
# # # # #
# # # #
# ]
3550 [ # # # # : 0 : TAILQ_FOREACH(rpoller, &rgroup->pollers, link) {
# # # # #
# # # #
# ]
3551 [ # # # # ]: 0 : RB_FOREACH(rqpair, qpairs_tree, &rpoller->qpairs) {
3552 [ # # # # : 0 : if (rqpair->listen_id == port->id) {
# # # # #
# ]
3553 [ # # ]: 0 : spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL);
3554 : 0 : }
3555 : 0 : }
3556 : 0 : }
3557 : 0 : }
3558 : 0 : }
3559 : :
3560 : : static bool
3561 : 0 : nvmf_rdma_handle_cm_event_addr_change(struct spdk_nvmf_transport *transport,
3562 : : struct rdma_cm_event *event)
3563 : : {
3564 : 0 : const struct spdk_nvme_transport_id *trid;
3565 : 0 : struct spdk_nvmf_rdma_port *port;
3566 : 0 : struct spdk_nvmf_rdma_transport *rtransport;
3567 : 0 : bool event_acked = false;
3568 : :
3569 : 0 : rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
3570 [ # # # # : 0 : TAILQ_FOREACH(port, &rtransport->ports, link) {
# # # # #
# # # #
# ]
3571 [ # # # # : 0 : if (port->id == event->id) {
# # # # #
# ]
3572 [ # # # # : 0 : SPDK_ERRLOG("ADDR_CHANGE: IP %s:%s migrated\n", port->trid->traddr, port->trid->trsvcid);
# # # # #
# # # ]
3573 : 0 : rdma_ack_cm_event(event);
3574 : 0 : event_acked = true;
3575 [ # # # # ]: 0 : trid = port->trid;
3576 : 0 : break;
3577 : : }
3578 : 0 : }
3579 : :
3580 [ # # # # ]: 0 : if (event_acked) {
3581 : 0 : nvmf_rdma_disconnect_qpairs_on_port(rtransport, port);
3582 : :
3583 : 0 : nvmf_rdma_stop_listen(transport, trid);
3584 : 0 : nvmf_rdma_listen(transport, trid, NULL);
3585 : 0 : }
3586 : :
3587 [ # # ]: 0 : return event_acked;
3588 : 0 : }
3589 : :
3590 : : static void
3591 : 0 : nvmf_rdma_handle_device_removal(struct spdk_nvmf_rdma_transport *rtransport,
3592 : : struct spdk_nvmf_rdma_device *device)
3593 : : {
3594 : 0 : struct spdk_nvmf_rdma_port *port, *port_tmp;
3595 : 0 : int rc;
3596 : 0 : bool has_inflight;
3597 : :
3598 : 0 : rc = nvmf_rdma_manage_poller(rtransport, device, &has_inflight, false);
3599 [ # # ]: 0 : if (rc) {
3600 : 0 : SPDK_ERRLOG("Failed to handle device removal, rc %d\n", rc);
3601 : 0 : return;
3602 : : }
3603 : :
3604 [ # # # # ]: 0 : if (!has_inflight) {
3605 : : /* no pollers, destroy the device */
3606 [ # # # # ]: 0 : device->ready_to_destroy = true;
3607 : 0 : spdk_thread_send_msg(spdk_get_thread(), _nvmf_rdma_remove_destroyed_device, rtransport);
3608 : 0 : }
3609 : :
3610 [ # # # # : 0 : TAILQ_FOREACH_SAFE(port, &rtransport->ports, link, port_tmp) {
# # # # #
# # # # #
# # ]
3611 [ # # # # : 0 : if (port->device == device) {
# # ]
3612 [ # # # # : 0 : SPDK_NOTICELOG("Port %s:%s on device %s is being removed.\n",
# # # # #
# # # # #
# # # # #
# # # #
# ]
3613 : : port->trid->traddr,
3614 : : port->trid->trsvcid,
3615 : : ibv_get_device_name(port->device->context->device));
3616 : :
3617 : : /* keep NVMF listener and only destroy structures of the
3618 : : * RDMA transport. when the device comes back we can retry listening
3619 : : * and the application's workflow will not be interrupted.
3620 : : */
3621 [ # # # # : 0 : nvmf_rdma_stop_listen_ex(&rtransport->transport, port->trid, true);
# # ]
3622 : 0 : }
3623 : 0 : }
3624 [ # # ]: 0 : }
3625 : :
3626 : : static void
3627 : 0 : nvmf_rdma_handle_cm_event_port_removal(struct spdk_nvmf_transport *transport,
3628 : : struct rdma_cm_event *event)
3629 : : {
3630 : 0 : struct spdk_nvmf_rdma_port *port, *tmp_port;
3631 : 0 : struct spdk_nvmf_rdma_transport *rtransport;
3632 : :
3633 [ # # # # : 0 : port = event->id->context;
# # # # ]
3634 : 0 : rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
3635 : :
3636 : 0 : rdma_ack_cm_event(event);
3637 : :
3638 : : /* if device removal happens during ctrl qpair disconnecting, it's possible that we receive
3639 : : * an DEVICE_REMOVAL event on qpair but the id->qp is just NULL. So we should make sure that
3640 : : * we are handling a port event here.
3641 : : */
3642 [ # # # # : 0 : TAILQ_FOREACH(tmp_port, &rtransport->ports, link) {
# # # # #
# # # #
# ]
3643 [ # # # # : 0 : if (port == tmp_port && port->device && !port->device->need_destroy) {
# # # # #
# # # # #
# # # # #
# ]
3644 [ # # # # : 0 : port->device->need_destroy = true;
# # # # ]
3645 [ # # # # ]: 0 : nvmf_rdma_handle_device_removal(rtransport, port->device);
3646 : 0 : }
3647 : 0 : }
3648 : 0 : }
3649 : :
3650 : : static void
3651 : 4596 : nvmf_process_cm_event(struct spdk_nvmf_transport *transport)
3652 : : {
3653 : 0 : struct spdk_nvmf_rdma_transport *rtransport;
3654 : 0 : struct rdma_cm_event *event;
3655 : 0 : int rc;
3656 : 0 : bool event_acked;
3657 : :
3658 : 4596 : rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
3659 : :
3660 [ - + # # : 4596 : if (rtransport->event_channel == NULL) {
# # ]
3661 : 0 : return;
3662 : : }
3663 : :
3664 : 0 : while (1) {
3665 : 10684 : event_acked = false;
3666 [ # # # # ]: 10684 : rc = rdma_get_cm_event(rtransport->event_channel, &event);
3667 [ + + ]: 10684 : if (rc) {
3668 [ - + - - : 4596 : if (errno != EAGAIN && errno != EWOULDBLOCK) {
# # # # ]
3669 [ # # ]: 0 : SPDK_ERRLOG("Acceptor Event Error: %s\n", spdk_strerror(errno));
3670 : 0 : }
3671 : 4596 : break;
3672 : : }
3673 : :
3674 [ - + - + : 6088 : SPDK_DEBUGLOG(rdma, "Acceptor Event: %s\n", CM_EVENT_STR[event->event]);
# # # # #
# # # # #
# # ]
3675 : :
3676 [ + + + - : 6088 : spdk_trace_record(TRACE_RDMA_CM_ASYNC_EVENT, 0, 0, 0, event->event);
# # # # #
# # # # #
# # # # #
# # # ]
3677 : :
3678 [ - + - - : 6088 : switch (event->event) {
- + + - -
- - - # #
# # ]
3679 : 0 : case RDMA_CM_EVENT_ADDR_RESOLVED:
3680 : : case RDMA_CM_EVENT_ADDR_ERROR:
3681 : : case RDMA_CM_EVENT_ROUTE_RESOLVED:
3682 : : case RDMA_CM_EVENT_ROUTE_ERROR:
3683 : : /* No action required. The target never attempts to resolve routes. */
3684 : 0 : break;
3685 : 2039 : case RDMA_CM_EVENT_CONNECT_REQUEST:
3686 : 2039 : rc = nvmf_rdma_connect(transport, event);
3687 [ - + ]: 2039 : if (rc < 0) {
3688 : 0 : SPDK_ERRLOG("Unable to process connect event. rc: %d\n", rc);
3689 : 0 : break;
3690 : : }
3691 : 2039 : break;
3692 : 0 : case RDMA_CM_EVENT_CONNECT_RESPONSE:
3693 : : /* The target never initiates a new connection. So this will not occur. */
3694 : 0 : break;
3695 : 0 : case RDMA_CM_EVENT_CONNECT_ERROR:
3696 : : /* Can this happen? The docs say it can, but not sure what causes it. */
3697 : 0 : break;
3698 : 0 : case RDMA_CM_EVENT_UNREACHABLE:
3699 : : case RDMA_CM_EVENT_REJECTED:
3700 : : /* These only occur on the client side. */
3701 : 0 : break;
3702 : 2039 : case RDMA_CM_EVENT_ESTABLISHED:
3703 : : /* TODO: Should we be waiting for this event anywhere? */
3704 : 2039 : break;
3705 : 2010 : case RDMA_CM_EVENT_DISCONNECTED:
3706 : 2010 : rc = nvmf_rdma_disconnect(event, &event_acked);
3707 [ - + ]: 2010 : if (rc < 0) {
3708 : 0 : SPDK_ERRLOG("Unable to process disconnect event. rc: %d\n", rc);
3709 : 0 : break;
3710 : : }
3711 : 2010 : break;
3712 : 0 : case RDMA_CM_EVENT_DEVICE_REMOVAL:
3713 : : /* In case of device removal, kernel IB part triggers IBV_EVENT_DEVICE_FATAL
3714 : : * which triggers RDMA_CM_EVENT_DEVICE_REMOVAL on all cma_id’s.
3715 : : * Once these events are sent to SPDK, we should release all IB resources and
3716 : : * don't make attempts to call any ibv_query/modify/create functions. We can only call
3717 : : * ibv_destroy* functions to release user space memory allocated by IB. All kernel
3718 : : * resources are already cleaned. */
3719 [ # # # # : 0 : if (event->id->qp) {
# # # # #
# ]
3720 : : /* If rdma_cm event has a valid `qp` pointer then the event refers to the
3721 : : * corresponding qpair. Otherwise the event refers to a listening device. */
3722 : 0 : rc = nvmf_rdma_disconnect(event, &event_acked);
3723 [ # # ]: 0 : if (rc < 0) {
3724 : 0 : SPDK_ERRLOG("Unable to process disconnect event. rc: %d\n", rc);
3725 : 0 : break;
3726 : : }
3727 : 0 : } else {
3728 : 0 : nvmf_rdma_handle_cm_event_port_removal(transport, event);
3729 : 0 : event_acked = true;
3730 : : }
3731 : 0 : break;
3732 : 0 : case RDMA_CM_EVENT_MULTICAST_JOIN:
3733 : : case RDMA_CM_EVENT_MULTICAST_ERROR:
3734 : : /* Multicast is not used */
3735 : 0 : break;
3736 : 0 : case RDMA_CM_EVENT_ADDR_CHANGE:
3737 : 0 : event_acked = nvmf_rdma_handle_cm_event_addr_change(transport, event);
3738 : 0 : break;
3739 : 0 : case RDMA_CM_EVENT_TIMEWAIT_EXIT:
3740 : : /* For now, do nothing. The target never re-uses queue pairs. */
3741 : 0 : break;
3742 : 0 : default:
3743 [ # # # # ]: 0 : SPDK_ERRLOG("Unexpected Acceptor Event [%d]\n", event->event);
3744 : 0 : break;
3745 : : }
3746 [ - + + + ]: 6088 : if (!event_acked) {
3747 : 4078 : rdma_ack_cm_event(event);
3748 : 0 : }
3749 : : }
3750 [ # # ]: 0 : }
3751 : :
3752 : : static void
3753 : 2039 : nvmf_rdma_handle_last_wqe_reached(struct spdk_nvmf_rdma_qpair *rqpair)
3754 : : {
3755 [ # # # # ]: 2039 : rqpair->last_wqe_reached = true;
3756 : 2039 : nvmf_rdma_destroy_drained_qpair(rqpair);
3757 : 2039 : }
3758 : :
3759 : : static void
3760 : 2039 : nvmf_rdma_qpair_process_ibv_event(void *ctx)
3761 : : {
3762 : 2039 : struct spdk_nvmf_rdma_ibv_event_ctx *event_ctx = ctx;
3763 : :
3764 [ + - # # : 2039 : if (event_ctx->rqpair) {
# # ]
3765 [ + - + - : 2039 : STAILQ_REMOVE(&event_ctx->rqpair->ibv_events, event_ctx, spdk_nvmf_rdma_ibv_event_ctx, link);
- - - - #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
3766 [ + - # # : 2039 : if (event_ctx->cb_fn) {
# # ]
3767 [ # # # # : 2039 : event_ctx->cb_fn(event_ctx->rqpair);
# # # # #
# # # ]
3768 : 0 : }
3769 : 0 : }
3770 : 2039 : free(event_ctx);
3771 : 2039 : }
3772 : :
3773 : : static int
3774 : 2039 : nvmf_rdma_send_qpair_async_event(struct spdk_nvmf_rdma_qpair *rqpair,
3775 : : spdk_nvmf_rdma_qpair_ibv_event fn)
3776 : : {
3777 : 0 : struct spdk_nvmf_rdma_ibv_event_ctx *ctx;
3778 : 2039 : struct spdk_thread *thr = NULL;
3779 : 0 : int rc;
3780 : :
3781 [ - + # # : 2039 : if (rqpair->qpair.group) {
# # # # ]
3782 [ # # # # : 0 : thr = rqpair->qpair.group->thread;
# # # # #
# ]
3783 [ + - # # : 2039 : } else if (rqpair->destruct_channel) {
# # ]
3784 [ # # # # ]: 2039 : thr = spdk_io_channel_get_thread(rqpair->destruct_channel);
3785 : 0 : }
3786 : :
3787 [ - + ]: 2039 : if (!thr) {
3788 [ # # # # : 0 : SPDK_DEBUGLOG(rdma, "rqpair %p has no thread\n", rqpair);
# # ]
3789 : 0 : return -EINVAL;
3790 : : }
3791 : :
3792 : 2039 : ctx = calloc(1, sizeof(*ctx));
3793 [ - + ]: 2039 : if (!ctx) {
3794 : 0 : return -ENOMEM;
3795 : : }
3796 : :
3797 [ # # # # ]: 2039 : ctx->rqpair = rqpair;
3798 [ # # # # ]: 2039 : ctx->cb_fn = fn;
3799 [ # # # # : 2039 : STAILQ_INSERT_TAIL(&rqpair->ibv_events, ctx, link);
# # # # #
# # # # #
# # # # #
# # # #
# ]
3800 : :
3801 : 2039 : rc = spdk_thread_send_msg(thr, nvmf_rdma_qpair_process_ibv_event, ctx);
3802 [ - + ]: 2039 : if (rc) {
3803 [ # # # # : 0 : STAILQ_REMOVE(&rqpair->ibv_events, ctx, spdk_nvmf_rdma_ibv_event_ctx, link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # ]
3804 : 0 : free(ctx);
3805 : 0 : }
3806 : :
3807 : 2039 : return rc;
3808 : 0 : }
3809 : :
3810 : : static int
3811 : 3067 : nvmf_process_ib_event(struct spdk_nvmf_rdma_device *device)
3812 : : {
3813 : 0 : int rc;
3814 : 3067 : struct spdk_nvmf_rdma_qpair *rqpair = NULL;
3815 : 0 : struct ibv_async_event event;
3816 : :
3817 [ # # # # ]: 3067 : rc = ibv_get_async_event(device->context, &event);
3818 : :
3819 [ + + ]: 3067 : if (rc) {
3820 : : /* In non-blocking mode -1 means there are no events available */
3821 : 1028 : return rc;
3822 : : }
3823 : :
3824 [ + - - # : 2039 : switch (event.event_type) {
# # ]
3825 : 2039 : case IBV_EVENT_QP_FATAL:
3826 : : case IBV_EVENT_QP_LAST_WQE_REACHED:
3827 : : case IBV_EVENT_SQ_DRAINED:
3828 : : case IBV_EVENT_QP_REQ_ERR:
3829 : : case IBV_EVENT_QP_ACCESS_ERR:
3830 : : case IBV_EVENT_COMM_EST:
3831 : : case IBV_EVENT_PATH_MIG:
3832 : : case IBV_EVENT_PATH_MIG_ERR:
3833 [ # # # # ]: 2039 : rqpair = event.element.qp->qp_context;
3834 [ - + ]: 2039 : if (!rqpair) {
3835 : : /* Any QP event for NVMe-RDMA initiator may be returned. */
3836 [ # # ]: 0 : SPDK_NOTICELOG("Async QP event for unknown QP: %s\n",
3837 : : ibv_event_type_str(event.event_type));
3838 : 0 : break;
3839 : : }
3840 : :
3841 [ - + - - : 2039 : switch (event.event_type) {
- # # ]
3842 : 0 : case IBV_EVENT_QP_FATAL:
3843 : 0 : SPDK_ERRLOG("Fatal event received for rqpair %p\n", rqpair);
3844 [ # # # # : 0 : spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0,
# # # # #
# # # # #
# # # # #
# ]
3845 : : (uintptr_t)rqpair, event.event_type);
3846 : 0 : nvmf_rdma_update_ibv_state(rqpair);
3847 [ # # ]: 0 : spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL);
3848 : 0 : break;
3849 : 2039 : case IBV_EVENT_QP_LAST_WQE_REACHED:
3850 : : /* This event only occurs for shared receive queues. */
3851 [ - + - + : 2039 : SPDK_DEBUGLOG(rdma, "Last WQE reached event received for rqpair %p\n", rqpair);
# # ]
3852 : 2039 : rc = nvmf_rdma_send_qpair_async_event(rqpair, nvmf_rdma_handle_last_wqe_reached);
3853 [ - + ]: 2039 : if (rc) {
3854 : 0 : SPDK_WARNLOG("Failed to send LAST_WQE_REACHED event. rqpair %p, err %d\n", rqpair, rc);
3855 [ # # # # ]: 0 : rqpair->last_wqe_reached = true;
3856 : 0 : }
3857 : 2039 : break;
3858 : 0 : case IBV_EVENT_SQ_DRAINED:
3859 : : /* This event occurs frequently in both error and non-error states.
3860 : : * Check if the qpair is in an error state before sending a message. */
3861 [ # # # # : 0 : SPDK_DEBUGLOG(rdma, "Last sq drained event received for rqpair %p\n", rqpair);
# # ]
3862 [ # # # # : 0 : spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0,
# # # # #
# # # # #
# # # # #
# ]
3863 : : (uintptr_t)rqpair, event.event_type);
3864 [ # # ]: 0 : if (nvmf_rdma_update_ibv_state(rqpair) == IBV_QPS_ERR) {
3865 [ # # ]: 0 : spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL);
3866 : 0 : }
3867 : 0 : break;
3868 : 0 : case IBV_EVENT_QP_REQ_ERR:
3869 : : case IBV_EVENT_QP_ACCESS_ERR:
3870 : : case IBV_EVENT_COMM_EST:
3871 : : case IBV_EVENT_PATH_MIG:
3872 : : case IBV_EVENT_PATH_MIG_ERR:
3873 [ # # ]: 0 : SPDK_NOTICELOG("Async QP event: %s\n",
3874 : : ibv_event_type_str(event.event_type));
3875 [ # # # # : 0 : spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0,
# # # # #
# # # # #
# # # # #
# ]
3876 : : (uintptr_t)rqpair, event.event_type);
3877 : 0 : nvmf_rdma_update_ibv_state(rqpair);
3878 : 0 : break;
3879 : 0 : default:
3880 : 0 : break;
3881 : : }
3882 : 2039 : break;
3883 : 0 : case IBV_EVENT_DEVICE_FATAL:
3884 [ # # # # : 0 : SPDK_ERRLOG("Device Fatal event[%s] received on %s. device: %p\n",
# # # # #
# ]
3885 : : ibv_event_type_str(event.event_type), ibv_get_device_name(device->context->device), device);
3886 [ # # # # ]: 0 : device->need_destroy = true;
3887 : 0 : break;
3888 : 0 : case IBV_EVENT_CQ_ERR:
3889 : : case IBV_EVENT_PORT_ACTIVE:
3890 : : case IBV_EVENT_PORT_ERR:
3891 : : case IBV_EVENT_LID_CHANGE:
3892 : : case IBV_EVENT_PKEY_CHANGE:
3893 : : case IBV_EVENT_SM_CHANGE:
3894 : : case IBV_EVENT_SRQ_ERR:
3895 : : case IBV_EVENT_SRQ_LIMIT_REACHED:
3896 : : case IBV_EVENT_CLIENT_REREGISTER:
3897 : 0 : case IBV_EVENT_GID_CHANGE:
3898 : : default:
3899 [ # # ]: 0 : SPDK_NOTICELOG("Async event: %s\n",
3900 : : ibv_event_type_str(event.event_type));
3901 [ # # # # : 0 : spdk_trace_record(TRACE_RDMA_IBV_ASYNC_EVENT, 0, 0, 0, event.event_type);
# # # # #
# # # # #
# # # # #
# ]
3902 : 0 : break;
3903 : : }
3904 : 2039 : ibv_ack_async_event(&event);
3905 : :
3906 : 2039 : return 0;
3907 : 0 : }
3908 : :
3909 : : static void
3910 : 1028 : nvmf_process_ib_events(struct spdk_nvmf_rdma_device *device, uint32_t max_events)
3911 : : {
3912 : 1028 : int rc = 0;
3913 : 1028 : uint32_t i = 0;
3914 : :
3915 [ + - ]: 3067 : for (i = 0; i < max_events; i++) {
3916 : 3067 : rc = nvmf_process_ib_event(device);
3917 [ + + ]: 3067 : if (rc) {
3918 : 1028 : break;
3919 : : }
3920 : 0 : }
3921 : :
3922 [ - + - + : 1028 : SPDK_DEBUGLOG(rdma, "Device %s: %u events processed\n", device->context->device->name, i);
# # # # #
# # # # #
# # ]
3923 : 1028 : }
3924 : :
3925 : : static int
3926 : 89270 : nvmf_rdma_accept(void *ctx)
3927 : : {
3928 : 89270 : int nfds, i = 0;
3929 : 89270 : struct spdk_nvmf_transport *transport = ctx;
3930 : 0 : struct spdk_nvmf_rdma_transport *rtransport;
3931 : 0 : struct spdk_nvmf_rdma_device *device, *tmp;
3932 : 0 : uint32_t count;
3933 : 0 : short revents;
3934 : 0 : bool do_retry;
3935 : :
3936 : 89270 : rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
3937 : 89270 : do_retry = nvmf_rdma_retry_listen_port(rtransport);
3938 : :
3939 [ # # # # : 89270 : count = nfds = poll(rtransport->poll_fds, rtransport->npoll_fds, 0);
# # # # ]
3940 : :
3941 [ + + ]: 89270 : if (nfds <= 0) {
3942 [ # # ]: 84219 : return do_retry ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
3943 : : }
3944 : :
3945 : : /* The first poll descriptor is RDMA CM event */
3946 [ + + # # : 5051 : if (rtransport->poll_fds[i++].revents & POLLIN) {
# # # # #
# # # #
# ]
3947 : 4596 : nvmf_process_cm_event(transport);
3948 [ # # ]: 4596 : nfds--;
3949 : 0 : }
3950 : :
3951 [ + + ]: 5051 : if (nfds == 0) {
3952 : 4023 : return SPDK_POLLER_BUSY;
3953 : : }
3954 : :
3955 : : /* Second and subsequent poll descriptors are IB async events */
3956 [ + + # # : 3084 : TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, tmp) {
# # # # #
# # # # #
# # ]
3957 [ # # # # : 2056 : revents = rtransport->poll_fds[i++].revents;
# # # # #
# # # ]
3958 [ + + ]: 2056 : if (revents & POLLIN) {
3959 [ - + + - : 1028 : if (spdk_likely(!device->need_destroy)) {
# # # # ]
3960 : 1028 : nvmf_process_ib_events(device, 32);
3961 [ - + - + : 1028 : if (spdk_unlikely(device->need_destroy)) {
# # # # ]
3962 : 0 : nvmf_rdma_handle_device_removal(rtransport, device);
3963 : 0 : }
3964 : 0 : }
3965 [ # # ]: 1028 : nfds--;
3966 [ + - - + ]: 1028 : } else if (revents & POLLNVAL || revents & POLLHUP) {
3967 : 0 : SPDK_ERRLOG("Receive unknown revent %x on device %p\n", (int)revents, device);
3968 [ # # ]: 0 : nfds--;
3969 : 0 : }
3970 : 0 : }
3971 : : /* check all flagged fd's have been served */
3972 [ - + # # ]: 1028 : assert(nfds == 0);
3973 : :
3974 : 1028 : return count > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
3975 : 0 : }
3976 : :
3977 : : static void
3978 : 325 : nvmf_rdma_cdata_init(struct spdk_nvmf_transport *transport, struct spdk_nvmf_subsystem *subsystem,
3979 : : struct spdk_nvmf_ctrlr_data *cdata)
3980 : : {
3981 [ # # # # : 325 : cdata->nvmf_specific.msdbd = NVMF_DEFAULT_MSDBD;
# # ]
3982 : :
3983 : : /* Disable in-capsule data transfer for RDMA controller when dif_insert_or_strip is enabled
3984 : : since in-capsule data only works with NVME drives that support SGL memory layout */
3985 [ - + - + : 325 : if (transport->opts.dif_insert_or_strip) {
# # # # #
# ]
3986 [ # # # # : 0 : cdata->nvmf_specific.ioccsz = sizeof(struct spdk_nvme_cmd) / 16;
# # ]
3987 : 0 : }
3988 : :
3989 [ - + # # : 325 : if (cdata->nvmf_specific.ioccsz > ((sizeof(struct spdk_nvme_cmd) + 0x1000) / 16)) {
# # # # ]
3990 : 0 : SPDK_WARNLOG("RDMA is configured to support up to 16 SGL entries while in capsule"
3991 : : " data is greater than 4KiB.\n");
3992 : 0 : SPDK_WARNLOG("When used in conjunction with the NVMe-oF initiator from the Linux "
3993 : : "kernel between versions 5.4 and 5.12 data corruption may occur for "
3994 : : "writes that are not a multiple of 4KiB in size.\n");
3995 : 0 : }
3996 : 325 : }
3997 : :
3998 : : static void
3999 : 98 : nvmf_rdma_discover(struct spdk_nvmf_transport *transport,
4000 : : struct spdk_nvme_transport_id *trid,
4001 : : struct spdk_nvmf_discovery_log_page_entry *entry)
4002 : : {
4003 [ # # # # ]: 98 : entry->trtype = SPDK_NVMF_TRTYPE_RDMA;
4004 [ # # # # : 98 : entry->adrfam = trid->adrfam;
# # # # ]
4005 [ # # # # ]: 98 : entry->treq.secure_channel = SPDK_NVMF_TREQ_SECURE_CHANNEL_NOT_REQUIRED;
4006 : :
4007 [ # # # # ]: 98 : spdk_strcpy_pad(entry->trsvcid, trid->trsvcid, sizeof(entry->trsvcid), ' ');
4008 [ # # # # ]: 98 : spdk_strcpy_pad(entry->traddr, trid->traddr, sizeof(entry->traddr), ' ');
4009 : :
4010 [ # # # # : 98 : entry->tsas.rdma.rdma_qptype = SPDK_NVMF_RDMA_QPTYPE_RELIABLE_CONNECTED;
# # # # ]
4011 [ # # # # : 98 : entry->tsas.rdma.rdma_prtype = SPDK_NVMF_RDMA_PRTYPE_NONE;
# # # # ]
4012 [ # # # # : 98 : entry->tsas.rdma.rdma_cms = SPDK_NVMF_RDMA_CMS_RDMA_CM;
# # # # ]
4013 : 98 : }
4014 : :
4015 : : static int
4016 : 252 : nvmf_rdma_poller_create(struct spdk_nvmf_rdma_transport *rtransport,
4017 : : struct spdk_nvmf_rdma_poll_group *rgroup, struct spdk_nvmf_rdma_device *device,
4018 : : struct spdk_nvmf_rdma_poller **out_poller)
4019 : : {
4020 : 0 : struct spdk_nvmf_rdma_poller *poller;
4021 : 0 : struct spdk_rdma_srq_init_attr srq_init_attr;
4022 : 0 : struct spdk_nvmf_rdma_resource_opts opts;
4023 : 0 : int num_cqe;
4024 : :
4025 : 252 : poller = calloc(1, sizeof(*poller));
4026 [ - + ]: 252 : if (!poller) {
4027 : 0 : SPDK_ERRLOG("Unable to allocate memory for new RDMA poller\n");
4028 : 0 : return -1;
4029 : : }
4030 : :
4031 [ # # # # ]: 252 : poller->device = device;
4032 [ # # # # ]: 252 : poller->group = rgroup;
4033 [ # # ]: 252 : *out_poller = poller;
4034 : :
4035 [ # # # # : 252 : RB_INIT(&poller->qpairs);
# # ]
4036 [ # # # # : 252 : STAILQ_INIT(&poller->qpairs_pending_send);
# # # # #
# # # # #
# # ]
4037 [ # # # # : 252 : STAILQ_INIT(&poller->qpairs_pending_recv);
# # # # #
# # # # #
# # ]
4038 : :
4039 [ # # # # : 252 : TAILQ_INSERT_TAIL(&rgroup->pollers, poller, link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
4040 [ - + - + : 252 : SPDK_DEBUGLOG(rdma, "Create poller %p on device %p in poll group %p.\n", poller, device, rgroup);
# # ]
4041 [ - + + - : 252 : if (rtransport->rdma_opts.no_srq == false && device->num_srq < device->attr.max_srq) {
+ - # # #
# # # # #
# # # # #
# # # ]
4042 [ - + # # : 252 : if ((int)rtransport->rdma_opts.max_srq_depth > device->attr.max_srq_wr) {
# # # # #
# # # #
# ]
4043 [ # # # # : 0 : SPDK_WARNLOG("Requested SRQ depth %u, max supported by dev %s is %d\n",
# # # # #
# # # # #
# # # # #
# # # ]
4044 : : rtransport->rdma_opts.max_srq_depth, device->context->device->name, device->attr.max_srq_wr);
4045 : 0 : }
4046 [ # # # # : 252 : poller->max_srq_depth = spdk_min((int)rtransport->rdma_opts.max_srq_depth, device->attr.max_srq_wr);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# ]
4047 : :
4048 [ # # # # ]: 252 : device->num_srq++;
4049 [ # # ]: 252 : memset(&srq_init_attr, 0, sizeof(srq_init_attr));
4050 [ # # # # ]: 252 : srq_init_attr.pd = device->pd;
4051 [ # # # # : 252 : srq_init_attr.stats = &poller->stat.qp_stats.recv;
# # # # ]
4052 [ # # # # : 252 : srq_init_attr.srq_init_attr.attr.max_wr = poller->max_srq_depth;
# # # # #
# ]
4053 [ # # # # : 252 : srq_init_attr.srq_init_attr.attr.max_sge = spdk_min(device->attr.max_sge, NVMF_DEFAULT_RX_SGE);
# # # # #
# # # # #
# # # # #
# ]
4054 [ # # # # ]: 252 : poller->srq = spdk_rdma_srq_create(&srq_init_attr);
4055 [ - + # # : 252 : if (!poller->srq) {
# # ]
4056 [ # # ]: 0 : SPDK_ERRLOG("Unable to create shared receive queue, errno %d\n", errno);
4057 : 0 : return -1;
4058 : : }
4059 : :
4060 [ # # # # : 252 : opts.qp = poller->srq;
# # ]
4061 [ # # # # : 252 : opts.map = device->map;
# # ]
4062 : 252 : opts.qpair = NULL;
4063 [ # # ]: 252 : opts.shared = true;
4064 [ # # # # : 252 : opts.max_queue_depth = poller->max_srq_depth;
# # ]
4065 [ # # # # : 252 : opts.in_capsule_data_size = rtransport->transport.opts.in_capsule_data_size;
# # # # #
# ]
4066 : :
4067 [ # # # # ]: 252 : poller->resources = nvmf_rdma_resources_create(&opts);
4068 [ - + # # : 252 : if (!poller->resources) {
# # ]
4069 : 0 : SPDK_ERRLOG("Unable to allocate resources for shared receive queue.\n");
4070 : 0 : return -1;
4071 : : }
4072 : 0 : }
4073 : :
4074 : : /*
4075 : : * When using an srq, we can limit the completion queue at startup.
4076 : : * The following formula represents the calculation:
4077 : : * num_cqe = num_recv + num_data_wr + num_send_wr.
4078 : : * where num_recv=num_data_wr=and num_send_wr=poller->max_srq_depth
4079 : : */
4080 [ + - # # : 252 : if (poller->srq) {
# # ]
4081 [ # # # # : 252 : num_cqe = poller->max_srq_depth * 3;
# # ]
4082 : 0 : } else {
4083 [ # # # # : 0 : num_cqe = rtransport->rdma_opts.num_cqe;
# # ]
4084 : : }
4085 : :
4086 [ # # # # : 252 : poller->cq = ibv_create_cq(device->context, num_cqe, poller, NULL, 0);
# # # # ]
4087 [ - + # # : 252 : if (!poller->cq) {
# # ]
4088 : 0 : SPDK_ERRLOG("Unable to create completion queue\n");
4089 : 0 : return -1;
4090 : : }
4091 [ # # # # ]: 252 : poller->num_cqe = num_cqe;
4092 : 252 : return 0;
4093 : 0 : }
4094 : :
4095 : : static void
4096 : 0 : _nvmf_rdma_register_poller_in_group(void *c)
4097 : : {
4098 : 0 : struct spdk_nvmf_rdma_poller *poller;
4099 : 0 : struct poller_manage_ctx *ctx = c;
4100 : 0 : struct spdk_nvmf_rdma_device *device;
4101 : 0 : int rc;
4102 : :
4103 [ # # # # : 0 : rc = nvmf_rdma_poller_create(ctx->rtransport, ctx->rgroup, ctx->device, &poller);
# # # # #
# # # ]
4104 [ # # # # ]: 0 : if (rc < 0 && poller) {
4105 : 0 : nvmf_rdma_poller_destroy(poller);
4106 : 0 : }
4107 : :
4108 [ # # # # ]: 0 : device = ctx->device;
4109 [ # # ]: 0 : if (nvmf_rdma_all_pollers_management_done(ctx)) {
4110 [ # # # # ]: 0 : device->is_ready = true;
4111 : 0 : }
4112 : 0 : }
4113 : :
4114 : : static void nvmf_rdma_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group);
4115 : :
4116 : : static struct spdk_nvmf_transport_poll_group *
4117 : 156 : nvmf_rdma_poll_group_create(struct spdk_nvmf_transport *transport,
4118 : : struct spdk_nvmf_poll_group *group)
4119 : : {
4120 : 5 : struct spdk_nvmf_rdma_transport *rtransport;
4121 : 5 : struct spdk_nvmf_rdma_poll_group *rgroup;
4122 : 25 : struct spdk_nvmf_rdma_poller *poller;
4123 : 5 : struct spdk_nvmf_rdma_device *device;
4124 : 5 : int rc;
4125 : :
4126 : 156 : rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
4127 : :
4128 : 156 : rgroup = calloc(1, sizeof(*rgroup));
4129 [ + + ]: 156 : if (!rgroup) {
4130 : 0 : return NULL;
4131 : : }
4132 : :
4133 [ + - + - : 156 : TAILQ_INIT(&rgroup->pollers);
+ - + - +
- + - + -
+ - ]
4134 : :
4135 [ + + + - : 408 : TAILQ_FOREACH(device, &rtransport->devices, link) {
+ - - + #
# # # #
# ]
4136 : 252 : rc = nvmf_rdma_poller_create(rtransport, rgroup, device, &poller);
4137 [ - + ]: 252 : if (rc < 0) {
4138 [ # # ]: 0 : nvmf_rdma_poll_group_destroy(&rgroup->group);
4139 : 0 : return NULL;
4140 : : }
4141 : 0 : }
4142 : :
4143 [ + - + - : 156 : TAILQ_INSERT_TAIL(&rtransport->poll_groups, rgroup, link);
+ - + - +
- + - + -
+ - + - +
- + - + -
+ - + - +
- + - + -
+ - ]
4144 [ + + + - : 156 : if (rtransport->conn_sched.next_admin_pg == NULL) {
+ - + + ]
4145 [ - + - + : 45 : rtransport->conn_sched.next_admin_pg = rgroup;
- + ]
4146 [ - + - + : 45 : rtransport->conn_sched.next_io_pg = rgroup;
- + ]
4147 : 1 : }
4148 : :
4149 [ + - ]: 156 : return &rgroup->group;
4150 : 5 : }
4151 : :
4152 : : static uint32_t
4153 : 6042 : nvmf_poll_group_get_io_qpair_count(struct spdk_nvmf_poll_group *pg)
4154 : : {
4155 : 12 : uint32_t count;
4156 : :
4157 : : /* Just assume that unassociated qpairs will eventually be io
4158 : : * qpairs. This is close enough for the use cases for this
4159 : : * function.
4160 : : */
4161 [ + + + - ]: 6042 : pthread_mutex_lock(&pg->mutex);
4162 [ + - + - : 6042 : count = pg->stat.current_io_qpairs + pg->current_unassociated_qpairs;
+ - + - +
- ]
4163 [ + + + - ]: 6042 : pthread_mutex_unlock(&pg->mutex);
4164 : :
4165 : 6054 : return count;
4166 : 12 : }
4167 : :
4168 : : static struct spdk_nvmf_transport_poll_group *
4169 : 2123 : nvmf_rdma_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair)
4170 : : {
4171 : 14 : struct spdk_nvmf_rdma_transport *rtransport;
4172 : 14 : struct spdk_nvmf_rdma_poll_group **pg;
4173 : 14 : struct spdk_nvmf_transport_poll_group *result;
4174 : 14 : uint32_t count;
4175 : :
4176 [ + - + - ]: 2123 : rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport);
4177 : :
4178 [ + + + - : 2123 : if (TAILQ_EMPTY(&rtransport->poll_groups)) {
+ - + + ]
4179 : 12 : return NULL;
4180 : : }
4181 : :
4182 [ + + + - : 2111 : if (qpair->qid == 0) {
+ + ]
4183 [ + - + - ]: 363 : pg = &rtransport->conn_sched.next_admin_pg;
4184 : 6 : } else {
4185 : 6 : struct spdk_nvmf_rdma_poll_group *pg_min, *pg_start, *pg_current;
4186 : 6 : uint32_t min_value;
4187 : :
4188 [ + - + - ]: 1748 : pg = &rtransport->conn_sched.next_io_pg;
4189 [ + - ]: 1748 : pg_min = *pg;
4190 [ + - ]: 1748 : pg_start = *pg;
4191 [ + - ]: 1748 : pg_current = *pg;
4192 [ + - + - : 1748 : min_value = nvmf_poll_group_get_io_qpair_count(pg_current->group.group);
+ - ]
4193 : :
4194 : 6 : while (1) {
4195 [ + - + - : 4294 : count = nvmf_poll_group_get_io_qpair_count(pg_current->group.group);
+ - ]
4196 : :
4197 [ + + ]: 4294 : if (count < min_value) {
4198 : 0 : min_value = count;
4199 : 0 : pg_min = pg_current;
4200 : 0 : }
4201 : :
4202 [ + - + - : 4294 : pg_current = TAILQ_NEXT(pg_current, link);
+ - ]
4203 [ + + ]: 4294 : if (pg_current == NULL) {
4204 [ + - + - : 1142 : pg_current = TAILQ_FIRST(&rtransport->poll_groups);
+ - ]
4205 : 2 : }
4206 : :
4207 [ + + + + ]: 4294 : if (pg_current == pg_start || min_value == 0) {
4208 : 6 : break;
4209 : : }
4210 : : }
4211 [ + - ]: 1748 : *pg = pg_min;
4212 : 6 : }
4213 : :
4214 [ + + + - : 2111 : assert(*pg != NULL);
# # ]
4215 : :
4216 [ + - + - ]: 2111 : result = &(*pg)->group;
4217 : :
4218 [ + - + - : 2111 : *pg = TAILQ_NEXT(*pg, link);
+ - + - +
- ]
4219 [ + + + + ]: 2111 : if (*pg == NULL) {
4220 [ + - + - : 572 : *pg = TAILQ_FIRST(&rtransport->poll_groups);
+ - + - ]
4221 : 4 : }
4222 : :
4223 : 2111 : return result;
4224 : 14 : }
4225 : :
4226 : : static void
4227 : 252 : nvmf_rdma_poller_destroy(struct spdk_nvmf_rdma_poller *poller)
4228 : : {
4229 : 0 : struct spdk_nvmf_rdma_qpair *qpair, *tmp_qpair;
4230 : 0 : int rc;
4231 : :
4232 [ + + # # : 252 : TAILQ_REMOVE(&poller->group->pollers, poller, link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
4233 [ - + - - : 252 : RB_FOREACH_SAFE(qpair, qpairs_tree, &poller->qpairs, tmp_qpair) {
# # ]
4234 : 0 : nvmf_rdma_qpair_destroy(qpair);
4235 : 0 : }
4236 : :
4237 [ + - # # : 252 : if (poller->srq) {
# # ]
4238 [ + - # # : 252 : if (poller->resources) {
# # ]
4239 [ # # # # ]: 252 : nvmf_rdma_resources_destroy(poller->resources);
4240 : 0 : }
4241 [ # # # # ]: 252 : spdk_rdma_srq_destroy(poller->srq);
4242 [ - + - + : 252 : SPDK_DEBUGLOG(rdma, "Destroyed RDMA shared queue %p\n", poller->srq);
# # # # #
# ]
4243 : 0 : }
4244 : :
4245 [ + - # # : 252 : if (poller->cq) {
# # ]
4246 [ # # # # ]: 252 : rc = ibv_destroy_cq(poller->cq);
4247 [ - + ]: 252 : if (rc != 0) {
4248 [ # # ]: 0 : SPDK_ERRLOG("Destroy cq return %d, error: %s\n", rc, strerror(errno));
4249 : 0 : }
4250 : 0 : }
4251 : :
4252 [ - + # # : 252 : if (poller->destroy_cb) {
# # ]
4253 [ # # # # : 0 : poller->destroy_cb(poller->destroy_cb_ctx);
# # # # #
# # # ]
4254 [ # # # # ]: 0 : poller->destroy_cb = NULL;
4255 : 0 : }
4256 : :
4257 : 252 : free(poller);
4258 : 252 : }
4259 : :
4260 : : static void
4261 : 156 : nvmf_rdma_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
4262 : : {
4263 : 5 : struct spdk_nvmf_rdma_poll_group *rgroup, *next_rgroup;
4264 : 5 : struct spdk_nvmf_rdma_poller *poller, *tmp;
4265 : 5 : struct spdk_nvmf_rdma_transport *rtransport;
4266 : :
4267 : 156 : rgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_rdma_poll_group, group);
4268 [ + + ]: 156 : if (!rgroup) {
4269 : 0 : return;
4270 : : }
4271 : :
4272 [ + + + - : 408 : TAILQ_FOREACH_SAFE(poller, &rgroup->pollers, link, tmp) {
+ - + - #
# # # # #
- + ]
4273 : 252 : nvmf_rdma_poller_destroy(poller);
4274 : 0 : }
4275 : :
4276 [ + + + - : 156 : if (rgroup->group.transport == NULL) {
+ - + - ]
4277 : : /* Transport can be NULL when nvmf_rdma_poll_group_create()
4278 : : * calls this function directly in a failure path. */
4279 : 0 : free(rgroup);
4280 : 0 : return;
4281 : : }
4282 : :
4283 [ + - + - : 156 : rtransport = SPDK_CONTAINEROF(rgroup->group.transport, struct spdk_nvmf_rdma_transport, transport);
+ - ]
4284 : :
4285 [ + - + - : 156 : next_rgroup = TAILQ_NEXT(rgroup, link);
+ - ]
4286 [ + + + - : 156 : TAILQ_REMOVE(&rtransport->poll_groups, rgroup, link);
+ - + + +
- + - + -
+ - + - +
- + - + -
+ - + - +
- + - + -
+ - + - +
- + - + -
+ - + - +
- + - ]
4287 [ + + ]: 156 : if (next_rgroup == NULL) {
4288 [ + - + - : 63 : next_rgroup = TAILQ_FIRST(&rtransport->poll_groups);
+ - ]
4289 : 1 : }
4290 [ + + + - : 156 : if (rtransport->conn_sched.next_admin_pg == rgroup) {
+ - - + ]
4291 [ + - + - : 109 : rtransport->conn_sched.next_admin_pg = next_rgroup;
+ - ]
4292 : 5 : }
4293 [ + + + - : 156 : if (rtransport->conn_sched.next_io_pg == rgroup) {
+ - - + ]
4294 [ + - + - : 104 : rtransport->conn_sched.next_io_pg = next_rgroup;
+ - ]
4295 : 5 : }
4296 : :
4297 : 156 : free(rgroup);
4298 [ - + ]: 5 : }
4299 : :
4300 : : static void
4301 : 0 : nvmf_rdma_qpair_reject_connection(struct spdk_nvmf_rdma_qpair *rqpair)
4302 : : {
4303 [ # # # # : 0 : if (rqpair->cm_id != NULL) {
# # ]
4304 [ # # # # ]: 0 : nvmf_rdma_event_reject(rqpair->cm_id, SPDK_NVMF_RDMA_ERROR_NO_RESOURCES);
4305 : 0 : }
4306 : 0 : }
4307 : :
4308 : : static int
4309 : 2039 : nvmf_rdma_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
4310 : : struct spdk_nvmf_qpair *qpair)
4311 : : {
4312 : 0 : struct spdk_nvmf_rdma_poll_group *rgroup;
4313 : 0 : struct spdk_nvmf_rdma_qpair *rqpair;
4314 : 0 : struct spdk_nvmf_rdma_device *device;
4315 : 0 : struct spdk_nvmf_rdma_poller *poller;
4316 : 0 : int rc;
4317 : :
4318 : 2039 : rgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_rdma_poll_group, group);
4319 : 2039 : rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
4320 : :
4321 [ # # # # ]: 2039 : device = rqpair->device;
4322 : :
4323 [ + - # # : 2040 : TAILQ_FOREACH(poller, &rgroup->pollers, link) {
# # # # #
# # # #
# ]
4324 [ + + # # : 2040 : if (poller->device == device) {
# # ]
4325 : 2039 : break;
4326 : : }
4327 : 0 : }
4328 : :
4329 [ - + ]: 2039 : if (!poller) {
4330 : 0 : SPDK_ERRLOG("No poller found for device.\n");
4331 : 0 : return -1;
4332 : : }
4333 : :
4334 [ - + - + : 2039 : if (poller->need_destroy) {
# # # # ]
4335 : 0 : SPDK_ERRLOG("Poller is destroying.\n");
4336 : 0 : return -1;
4337 : : }
4338 : :
4339 [ # # # # ]: 2039 : rqpair->poller = poller;
4340 [ # # # # : 2039 : rqpair->srq = rqpair->poller->srq;
# # # # #
# # # ]
4341 : :
4342 : 2039 : rc = nvmf_rdma_qpair_initialize(qpair);
4343 [ - + ]: 2039 : if (rc < 0) {
4344 : 0 : SPDK_ERRLOG("Failed to initialize nvmf_rdma_qpair with qpair=%p\n", qpair);
4345 [ # # # # ]: 0 : rqpair->poller = NULL;
4346 [ # # # # ]: 0 : rqpair->srq = NULL;
4347 : 0 : return -1;
4348 : : }
4349 : :
4350 [ # # ]: 2039 : RB_INSERT(qpairs_tree, &poller->qpairs, rqpair);
4351 : :
4352 [ # # # # ]: 2039 : rc = nvmf_rdma_event_accept(rqpair->cm_id, rqpair);
4353 [ - + ]: 2039 : if (rc) {
4354 : : /* Try to reject, but we probably can't */
4355 : 0 : nvmf_rdma_qpair_reject_connection(rqpair);
4356 : 0 : return -1;
4357 : : }
4358 : :
4359 : 2039 : nvmf_rdma_update_ibv_state(rqpair);
4360 : :
4361 : 2039 : return 0;
4362 : 0 : }
4363 : :
4364 : : static int
4365 : 2039 : nvmf_rdma_poll_group_remove(struct spdk_nvmf_transport_poll_group *group,
4366 : : struct spdk_nvmf_qpair *qpair)
4367 : : {
4368 : 0 : struct spdk_nvmf_rdma_qpair *rqpair;
4369 : :
4370 : 2039 : rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
4371 [ - + # # : 2039 : assert(group->transport->tgt != NULL);
# # # # #
# # # ]
4372 : :
4373 [ # # # # : 2039 : rqpair->destruct_channel = spdk_get_io_channel(group->transport->tgt);
# # # # #
# # # ]
4374 : :
4375 [ - + # # : 2039 : if (!rqpair->destruct_channel) {
# # ]
4376 : 0 : SPDK_WARNLOG("failed to get io_channel, qpair %p\n", qpair);
4377 : 0 : return 0;
4378 : : }
4379 : :
4380 : : /* Sanity check that we get io_channel on the correct thread */
4381 [ + - # # : 2039 : if (qpair->group) {
# # ]
4382 [ - + # # : 2039 : assert(qpair->group->thread == spdk_io_channel_get_thread(rqpair->destruct_channel));
# # # # #
# # # # #
# # ]
4383 : 0 : }
4384 : :
4385 : 2039 : return 0;
4386 : 0 : }
4387 : :
4388 : : static int
4389 : 888 : nvmf_rdma_request_free(struct spdk_nvmf_request *req)
4390 : : {
4391 : 888 : struct spdk_nvmf_rdma_request *rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req);
4392 [ # # # # : 888 : struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(req->qpair->transport,
# # # # ]
4393 : : struct spdk_nvmf_rdma_transport, transport);
4394 [ # # # # : 888 : struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair,
# # ]
4395 : : struct spdk_nvmf_rdma_qpair, qpair);
4396 : :
4397 : : /*
4398 : : * AER requests are freed when a qpair is destroyed. The recv corresponding to that request
4399 : : * needs to be returned to the shared receive queue or the poll group will eventually be
4400 : : * starved of RECV structures.
4401 : : */
4402 [ + - + - : 888 : if (rqpair->srq && rdma_req->recv) {
# # # # #
# # # ]
4403 : 0 : int rc;
4404 : 0 : struct ibv_recv_wr *bad_recv_wr;
4405 : :
4406 [ # # # # : 888 : spdk_rdma_srq_queue_recv_wrs(rqpair->srq, &rdma_req->recv->wr);
# # # # #
# ]
4407 [ # # # # ]: 888 : rc = spdk_rdma_srq_flush_recv_wrs(rqpair->srq, &bad_recv_wr);
4408 [ - + ]: 888 : if (rc) {
4409 : 0 : SPDK_ERRLOG("Unable to re-post rx descriptor\n");
4410 : 0 : }
4411 : 0 : }
4412 : :
4413 : 888 : _nvmf_rdma_request_free(rdma_req, rtransport);
4414 : 888 : return 0;
4415 : 0 : }
4416 : :
4417 : : static int
4418 : 6673639 : nvmf_rdma_request_complete(struct spdk_nvmf_request *req)
4419 : : {
4420 [ # # # # : 6673639 : struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(req->qpair->transport,
# # # # ]
4421 : : struct spdk_nvmf_rdma_transport, transport);
4422 : 6673639 : struct spdk_nvmf_rdma_request *rdma_req = SPDK_CONTAINEROF(req,
4423 : : struct spdk_nvmf_rdma_request, req);
4424 [ # # # # : 6673639 : struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair,
# # ]
4425 : : struct spdk_nvmf_rdma_qpair, qpair);
4426 : :
4427 [ + - # # : 6673639 : if (rqpair->ibv_state != IBV_QPS_ERR) {
# # ]
4428 : : /* The connection is alive, so process the request as normal */
4429 [ # # # # ]: 6673639 : rdma_req->state = RDMA_REQUEST_STATE_EXECUTED;
4430 : 0 : } else {
4431 : : /* The connection is dead. Move the request directly to the completed state. */
4432 [ # # # # ]: 0 : rdma_req->state = RDMA_REQUEST_STATE_COMPLETED;
4433 : : }
4434 : :
4435 : 6673639 : nvmf_rdma_request_process(rtransport, rdma_req);
4436 : :
4437 : 6673639 : return 0;
4438 : 0 : }
4439 : :
4440 : : static void
4441 : 2039 : nvmf_rdma_close_qpair(struct spdk_nvmf_qpair *qpair,
4442 : : spdk_nvmf_transport_qpair_fini_cb cb_fn, void *cb_arg)
4443 : : {
4444 : 2039 : struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
4445 : :
4446 [ # # # # ]: 2039 : rqpair->to_close = true;
4447 : :
4448 : : /* This happens only when the qpair is disconnected before
4449 : : * it is added to the poll group. Since there is no poll group,
4450 : : * the RDMA qp has not been initialized yet and the RDMA CM
4451 : : * event has not yet been acknowledged, so we need to reject it.
4452 : : */
4453 [ - + # # : 2039 : if (rqpair->qpair.state == SPDK_NVMF_QPAIR_UNINITIALIZED) {
# # # # ]
4454 : 0 : nvmf_rdma_qpair_reject_connection(rqpair);
4455 : 0 : nvmf_rdma_qpair_destroy(rqpair);
4456 : 0 : return;
4457 : : }
4458 : :
4459 [ + - # # : 2039 : if (rqpair->rdma_qp) {
# # ]
4460 [ # # # # ]: 2039 : spdk_rdma_qp_disconnect(rqpair->rdma_qp);
4461 : 0 : }
4462 : :
4463 : 2039 : nvmf_rdma_destroy_drained_qpair(rqpair);
4464 : :
4465 [ + - ]: 2039 : if (cb_fn) {
4466 [ # # # # ]: 2039 : cb_fn(cb_arg);
4467 : 0 : }
4468 [ # # ]: 0 : }
4469 : :
4470 : : static struct spdk_nvmf_rdma_qpair *
4471 : 6674914 : get_rdma_qpair_from_wc(struct spdk_nvmf_rdma_poller *rpoller, struct ibv_wc *wc)
4472 : : {
4473 : 0 : struct spdk_nvmf_rdma_qpair find;
4474 : :
4475 [ # # # # : 6674914 : find.qp_num = wc->qp_num;
# # ]
4476 : :
4477 [ # # ]: 6674914 : return RB_FIND(qpairs_tree, &rpoller->qpairs, &find);
4478 : 0 : }
4479 : :
4480 : : #ifdef DEBUG
4481 : : static int
4482 : 6673419 : nvmf_rdma_req_is_completing(struct spdk_nvmf_rdma_request *rdma_req)
4483 : : {
4484 [ + + # # : 10012491 : return rdma_req->state == RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST ||
# # ]
4485 [ + - # # ]: 3339072 : rdma_req->state == RDMA_REQUEST_STATE_COMPLETING;
4486 : : }
4487 : : #endif
4488 : :
4489 : : static void
4490 : 0 : _poller_reset_failed_recvs(struct spdk_nvmf_rdma_poller *rpoller, struct ibv_recv_wr *bad_recv_wr,
4491 : : int rc)
4492 : : {
4493 : 0 : struct spdk_nvmf_rdma_recv *rdma_recv;
4494 : 0 : struct spdk_nvmf_rdma_wr *bad_rdma_wr;
4495 : :
4496 [ # # ]: 0 : SPDK_ERRLOG("Failed to post a recv for the poller %p with errno %d\n", rpoller, -rc);
4497 [ # # ]: 0 : while (bad_recv_wr != NULL) {
4498 [ # # # # ]: 0 : bad_rdma_wr = (struct spdk_nvmf_rdma_wr *)bad_recv_wr->wr_id;
4499 : 0 : rdma_recv = SPDK_CONTAINEROF(bad_rdma_wr, struct spdk_nvmf_rdma_recv, rdma_wr);
4500 : :
4501 [ # # # # : 0 : rdma_recv->qpair->current_recv_depth++;
# # ]
4502 [ # # # # ]: 0 : bad_recv_wr = bad_recv_wr->next;
4503 [ # # # # : 0 : SPDK_ERRLOG("Failed to post a recv for the qpair %p with errno %d\n", rdma_recv->qpair, -rc);
# # ]
4504 [ # # # # : 0 : spdk_nvmf_qpair_disconnect(&rdma_recv->qpair->qpair, NULL, NULL);
# # ]
4505 : : }
4506 : 0 : }
4507 : :
4508 : : static void
4509 : 0 : _qp_reset_failed_recvs(struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_recv_wr *bad_recv_wr, int rc)
4510 : : {
4511 [ # # ]: 0 : SPDK_ERRLOG("Failed to post a recv for the qpair %p with errno %d\n", rqpair, -rc);
4512 [ # # ]: 0 : while (bad_recv_wr != NULL) {
4513 [ # # # # ]: 0 : bad_recv_wr = bad_recv_wr->next;
4514 [ # # ]: 0 : rqpair->current_recv_depth++;
4515 : : }
4516 [ # # ]: 0 : spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL);
4517 : 0 : }
4518 : :
4519 : : static void
4520 : 702666120 : _poller_submit_recvs(struct spdk_nvmf_rdma_transport *rtransport,
4521 : : struct spdk_nvmf_rdma_poller *rpoller)
4522 : : {
4523 : 0 : struct spdk_nvmf_rdma_qpair *rqpair;
4524 : 0 : struct ibv_recv_wr *bad_recv_wr;
4525 : 0 : int rc;
4526 : :
4527 [ + - # # : 702666120 : if (rpoller->srq) {
# # ]
4528 [ # # # # ]: 702666120 : rc = spdk_rdma_srq_flush_recv_wrs(rpoller->srq, &bad_recv_wr);
4529 [ - + ]: 702666120 : if (rc) {
4530 : 0 : _poller_reset_failed_recvs(rpoller, bad_recv_wr, rc);
4531 : 0 : }
4532 : 0 : } else {
4533 [ # # # # : 0 : while (!STAILQ_EMPTY(&rpoller->qpairs_pending_recv)) {
# # # # ]
4534 [ # # # # : 0 : rqpair = STAILQ_FIRST(&rpoller->qpairs_pending_recv);
# # ]
4535 [ # # # # ]: 0 : rc = spdk_rdma_qp_flush_recv_wrs(rqpair->rdma_qp, &bad_recv_wr);
4536 [ # # ]: 0 : if (rc) {
4537 : 0 : _qp_reset_failed_recvs(rqpair, bad_recv_wr, rc);
4538 : 0 : }
4539 [ # # # # : 0 : STAILQ_REMOVE_HEAD(&rpoller->qpairs_pending_recv, recv_link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# ]
4540 : : }
4541 : : }
4542 : 702666120 : }
4543 : :
4544 : : static void
4545 : 0 : _qp_reset_failed_sends(struct spdk_nvmf_rdma_transport *rtransport,
4546 : : struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_send_wr *bad_wr, int rc)
4547 : : {
4548 : 0 : struct spdk_nvmf_rdma_wr *bad_rdma_wr;
4549 : 0 : struct spdk_nvmf_rdma_request *prev_rdma_req = NULL, *cur_rdma_req = NULL;
4550 : :
4551 [ # # ]: 0 : SPDK_ERRLOG("Failed to post a send for the qpair %p with errno %d\n", rqpair, -rc);
4552 [ # # # # : 0 : for (; bad_wr != NULL; bad_wr = bad_wr->next) {
# # ]
4553 [ # # # # ]: 0 : bad_rdma_wr = (struct spdk_nvmf_rdma_wr *)bad_wr->wr_id;
4554 [ # # # # : 0 : assert(rqpair->current_send_depth > 0);
# # # # ]
4555 [ # # ]: 0 : rqpair->current_send_depth--;
4556 [ # # # # : 0 : switch (bad_rdma_wr->type) {
# # # ]
4557 : 0 : case RDMA_WR_TYPE_DATA:
4558 : 0 : cur_rdma_req = SPDK_CONTAINEROF(bad_rdma_wr, struct spdk_nvmf_rdma_request, data_wr);
4559 [ # # # # : 0 : if (bad_wr->opcode == IBV_WR_RDMA_READ) {
# # ]
4560 [ # # # # : 0 : assert(rqpair->current_read_depth > 0);
# # # # ]
4561 [ # # ]: 0 : rqpair->current_read_depth--;
4562 : 0 : }
4563 : 0 : break;
4564 : 0 : case RDMA_WR_TYPE_SEND:
4565 : 0 : cur_rdma_req = SPDK_CONTAINEROF(bad_rdma_wr, struct spdk_nvmf_rdma_request, rsp_wr);
4566 : 0 : break;
4567 : 0 : default:
4568 : 0 : SPDK_ERRLOG("Found a RECV in the list of pending SEND requests for qpair %p\n", rqpair);
4569 : 0 : prev_rdma_req = cur_rdma_req;
4570 : 0 : continue;
4571 : : }
4572 : :
4573 [ # # ]: 0 : if (prev_rdma_req == cur_rdma_req) {
4574 : : /* this request was handled by an earlier wr. i.e. we were performing an nvme read. */
4575 : : /* We only have to check against prev_wr since each requests wrs are contiguous in this list. */
4576 : 0 : continue;
4577 : : }
4578 : :
4579 [ # # # # : 0 : switch (cur_rdma_req->state) {
# # # ]
4580 : 0 : case RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
4581 [ # # # # : 0 : cur_rdma_req->req.rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
# # # # #
# # # #
# ]
4582 [ # # # # : 0 : STAILQ_INSERT_TAIL(&rqpair->pending_rdma_send_queue, cur_rdma_req, state_link);
# # # # #
# # # # #
# # # # #
# # # #
# ]
4583 [ # # # # ]: 0 : cur_rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE_PENDING;
4584 : 0 : break;
4585 : 0 : case RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST:
4586 : : case RDMA_REQUEST_STATE_COMPLETING:
4587 [ # # # # ]: 0 : cur_rdma_req->state = RDMA_REQUEST_STATE_COMPLETED;
4588 : 0 : break;
4589 : 0 : default:
4590 [ # # # # ]: 0 : SPDK_ERRLOG("Found a request in a bad state %d when draining pending SEND requests for qpair %p\n",
4591 : : cur_rdma_req->state, rqpair);
4592 : 0 : continue;
4593 : : }
4594 : :
4595 : 0 : nvmf_rdma_request_process(rtransport, cur_rdma_req);
4596 : 0 : prev_rdma_req = cur_rdma_req;
4597 : 0 : }
4598 : :
4599 [ # # # # : 0 : if (rqpair->qpair.state == SPDK_NVMF_QPAIR_ACTIVE) {
# # # # ]
4600 : : /* Disconnect the connection. */
4601 [ # # ]: 0 : spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL);
4602 : 0 : }
4603 : :
4604 : 0 : }
4605 : :
4606 : : static void
4607 : 702666120 : _poller_submit_sends(struct spdk_nvmf_rdma_transport *rtransport,
4608 : : struct spdk_nvmf_rdma_poller *rpoller)
4609 : : {
4610 : 0 : struct spdk_nvmf_rdma_qpair *rqpair;
4611 : 702666120 : struct ibv_send_wr *bad_wr = NULL;
4612 : 0 : int rc;
4613 : :
4614 [ + + # # : 1406479778 : while (!STAILQ_EMPTY(&rpoller->qpairs_pending_send)) {
# # # # ]
4615 [ # # # # : 1147538 : rqpair = STAILQ_FIRST(&rpoller->qpairs_pending_send);
# # ]
4616 [ # # # # ]: 1147538 : rc = spdk_rdma_qp_flush_send_wrs(rqpair->rdma_qp, &bad_wr);
4617 : :
4618 : : /* bad wr always points to the first wr that failed. */
4619 [ - + ]: 1147538 : if (rc) {
4620 : 0 : _qp_reset_failed_sends(rtransport, rqpair, bad_wr, rc);
4621 : 0 : }
4622 [ + + # # : 1147538 : STAILQ_REMOVE_HEAD(&rpoller->qpairs_pending_send, send_link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# ]
4623 : : }
4624 : 702666120 : }
4625 : :
4626 : : static const char *
4627 : 0 : nvmf_rdma_wr_type_str(enum spdk_nvmf_rdma_wr_type wr_type)
4628 : : {
4629 [ # # # # ]: 0 : switch (wr_type) {
4630 : 0 : case RDMA_WR_TYPE_RECV:
4631 : 0 : return "RECV";
4632 : 0 : case RDMA_WR_TYPE_SEND:
4633 : 0 : return "SEND";
4634 : 0 : case RDMA_WR_TYPE_DATA:
4635 : 0 : return "DATA";
4636 : 0 : default:
4637 : 0 : SPDK_ERRLOG("Unknown WR type %d\n", wr_type);
4638 [ # # ]: 0 : SPDK_UNREACHABLE();
4639 : : }
4640 : 0 : }
4641 : :
4642 : : static inline void
4643 : 1 : nvmf_rdma_log_wc_status(struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_wc *wc)
4644 : : {
4645 [ # # # # : 1 : enum spdk_nvmf_rdma_wr_type wr_type = ((struct spdk_nvmf_rdma_wr *)wc->wr_id)->type;
# # # # ]
4646 : :
4647 [ + - # # : 1 : if (wc->status == IBV_WC_WR_FLUSH_ERR) {
# # ]
4648 : : /* If qpair is in ERR state, we will receive completions for all posted and not completed
4649 : : * Work Requests with IBV_WC_WR_FLUSH_ERR status. Don't log an error in that case */
4650 [ - + - + : 1 : SPDK_DEBUGLOG(rdma,
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
4651 : : "Error on CQ %p, (qp state %d ibv_state %d) request 0x%lu, type %s, status: (%d): %s\n",
4652 : : rqpair->poller->cq, rqpair->qpair.state, rqpair->ibv_state, wc->wr_id,
4653 : : nvmf_rdma_wr_type_str(wr_type), wc->status, ibv_wc_status_str(wc->status));
4654 : 0 : } else {
4655 [ # # # # : 0 : SPDK_ERRLOG("Error on CQ %p, (qp state %d ibv_state %d) request 0x%lu, type %s, status: (%d): %s\n",
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# ]
4656 : : rqpair->poller->cq, rqpair->qpair.state, rqpair->ibv_state, wc->wr_id,
4657 : : nvmf_rdma_wr_type_str(wr_type), wc->status, ibv_wc_status_str(wc->status));
4658 : : }
4659 : 1 : }
4660 : :
4661 : : static int
4662 : 702666121 : nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport,
4663 : : struct spdk_nvmf_rdma_poller *rpoller)
4664 : : {
4665 : 0 : struct ibv_wc wc[32];
4666 : 0 : struct spdk_nvmf_rdma_wr *rdma_wr;
4667 : 0 : struct spdk_nvmf_rdma_request *rdma_req;
4668 : 0 : struct spdk_nvmf_rdma_recv *rdma_recv;
4669 : 0 : struct spdk_nvmf_rdma_qpair *rqpair, *tmp_rqpair;
4670 : 0 : int reaped, i;
4671 : 702666121 : int count = 0;
4672 : 0 : int rc;
4673 : 702666121 : bool error = false;
4674 : 702666121 : uint64_t poll_tsc = spdk_get_ticks();
4675 : :
4676 [ - + - + : 702666121 : if (spdk_unlikely(rpoller->need_destroy)) {
# # # # ]
4677 : : /* If qpair is closed before poller destroy, nvmf_rdma_destroy_drained_qpair may not
4678 : : * be called because we cannot poll anything from cq. So we call that here to force
4679 : : * destroy the qpair after to_close turning true.
4680 : : */
4681 [ # # # # : 0 : RB_FOREACH_SAFE(rqpair, qpairs_tree, &rpoller->qpairs, tmp_rqpair) {
# # ]
4682 : 0 : nvmf_rdma_destroy_drained_qpair(rqpair);
4683 : 0 : }
4684 : 0 : return 0;
4685 : : }
4686 : :
4687 : : /* Poll for completing operations. */
4688 [ # # # # ]: 702666121 : reaped = ibv_poll_cq(rpoller->cq, 32, wc);
4689 [ - + ]: 702666121 : if (reaped < 0) {
4690 [ # # # # ]: 0 : SPDK_ERRLOG("Error polling CQ! (%d): %s\n",
4691 : : errno, spdk_strerror(errno));
4692 : 0 : return -1;
4693 [ + + ]: 702666121 : } else if (reaped == 0) {
4694 [ # # # # ]: 701143494 : rpoller->stat.idle_polls++;
4695 : 0 : }
4696 : :
4697 [ # # # # ]: 702666121 : rpoller->stat.polls++;
4698 [ # # # # : 702666121 : rpoller->stat.completions += reaped;
# # ]
4699 : :
4700 [ + + # # ]: 716822505 : for (i = 0; i < reaped; i++) {
4701 : :
4702 [ # # # # : 14156384 : rdma_wr = (struct spdk_nvmf_rdma_wr *)wc[i].wr_id;
# # # # ]
4703 : :
4704 [ + + + - : 14156384 : switch (rdma_wr->type) {
# # # # ]
4705 : 6673419 : case RDMA_WR_TYPE_SEND:
4706 : 6673419 : rdma_req = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvmf_rdma_request, rsp_wr);
4707 [ # # # # : 6673419 : rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
# # ]
4708 : :
4709 [ + - # # : 6673419 : if (!wc[i].status) {
# # # # #
# ]
4710 [ # # ]: 6673419 : count++;
4711 [ - + # # : 6673419 : assert(wc[i].opcode == IBV_WC_SEND);
# # # # #
# # # ]
4712 [ - + # # ]: 6673419 : assert(nvmf_rdma_req_is_completing(rdma_req));
4713 : 0 : }
4714 : :
4715 [ # # # # ]: 6673419 : rdma_req->state = RDMA_REQUEST_STATE_COMPLETED;
4716 : : /* RDMA_WRITE operation completed. +1 since it was chained with rsp WR */
4717 [ - + # # : 6673419 : assert(rqpair->current_send_depth >= (uint32_t)rdma_req->num_outstanding_data_wr + 1);
# # # # #
# # # ]
4718 [ # # # # : 6673419 : rqpair->current_send_depth -= rdma_req->num_outstanding_data_wr + 1;
# # # # #
# ]
4719 [ # # # # ]: 6673419 : rdma_req->num_outstanding_data_wr = 0;
4720 : :
4721 : 6673419 : nvmf_rdma_request_process(rtransport, rdma_req);
4722 : 6673419 : break;
4723 : 6674914 : case RDMA_WR_TYPE_RECV:
4724 : : /* rdma_recv->qpair will be invalid if using an SRQ. In that case we have to get the qpair from the wc. */
4725 : 6674914 : rdma_recv = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvmf_rdma_recv, rdma_wr);
4726 [ + - # # : 6674914 : if (rpoller->srq != NULL) {
# # ]
4727 [ # # # # : 6674914 : rdma_recv->qpair = get_rdma_qpair_from_wc(rpoller, &wc[i]);
# # # # ]
4728 : : /* It is possible that there are still some completions for destroyed QP
4729 : : * associated with SRQ. We just ignore these late completions and re-post
4730 : : * receive WRs back to SRQ.
4731 : : */
4732 [ - + # # : 6674914 : if (spdk_unlikely(NULL == rdma_recv->qpair)) {
# # ]
4733 : 0 : struct ibv_recv_wr *bad_wr;
4734 : :
4735 [ # # # # : 0 : rdma_recv->wr.next = NULL;
# # ]
4736 [ # # # # : 0 : spdk_rdma_srq_queue_recv_wrs(rpoller->srq, &rdma_recv->wr);
# # ]
4737 [ # # # # ]: 0 : rc = spdk_rdma_srq_flush_recv_wrs(rpoller->srq, &bad_wr);
4738 [ # # ]: 0 : if (rc) {
4739 : 0 : SPDK_ERRLOG("Failed to re-post recv WR to SRQ, err %d\n", rc);
4740 : 0 : }
4741 : 0 : continue;
4742 : 0 : }
4743 : 0 : }
4744 [ # # # # ]: 6674914 : rqpair = rdma_recv->qpair;
4745 : :
4746 [ - + # # ]: 6674914 : assert(rqpair != NULL);
4747 [ + - # # : 6674914 : if (!wc[i].status) {
# # # # #
# ]
4748 [ - + # # : 6674914 : assert(wc[i].opcode == IBV_WC_RECV);
# # # # #
# # # ]
4749 [ - + # # : 6674914 : if (rqpair->current_recv_depth >= rqpair->max_queue_depth) {
# # # # #
# ]
4750 [ # # ]: 0 : spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL);
4751 : 0 : break;
4752 : : }
4753 : 0 : }
4754 : :
4755 [ # # # # : 6674914 : rdma_recv->wr.next = NULL;
# # ]
4756 [ # # ]: 6674914 : rqpair->current_recv_depth++;
4757 [ # # # # ]: 6674914 : rdma_recv->receive_tsc = poll_tsc;
4758 [ # # # # ]: 6674914 : rpoller->stat.requests++;
4759 [ + - # # : 6674914 : STAILQ_INSERT_HEAD(&rqpair->resources->incoming_queue, rdma_recv, link);
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # ]
4760 : 6674914 : break;
4761 : 808051 : case RDMA_WR_TYPE_DATA:
4762 : 808051 : rdma_req = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvmf_rdma_request, data_wr);
4763 [ # # # # : 808051 : rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
# # ]
4764 : :
4765 [ - + # # : 808051 : assert(rdma_req->num_outstanding_data_wr > 0);
# # # # ]
4766 : :
4767 [ # # ]: 808051 : rqpair->current_send_depth--;
4768 [ # # ]: 808051 : rdma_req->num_outstanding_data_wr--;
4769 [ + + # # : 808051 : if (!wc[i].status) {
# # # # #
# ]
4770 [ - + # # : 808050 : assert(wc[i].opcode == IBV_WC_RDMA_READ);
# # # # #
# # # ]
4771 [ # # ]: 808050 : rqpair->current_read_depth--;
4772 : : /* wait for all outstanding reads associated with the same rdma_req to complete before proceeding. */
4773 [ + + # # : 808050 : if (rdma_req->num_outstanding_data_wr == 0) {
# # ]
4774 [ + + # # : 658840 : if (rdma_req->num_remaining_data_wr) {
# # ]
4775 : : /* Only part of RDMA_READ operations was submitted, process the rest */
4776 : 12203 : nvmf_rdma_request_reset_transfer_in(rdma_req, rtransport);
4777 [ # # # # ]: 12203 : rdma_req->state = RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING;
4778 : 12203 : nvmf_rdma_request_process(rtransport, rdma_req);
4779 : 12203 : break;
4780 : : }
4781 [ # # # # ]: 646637 : rdma_req->state = RDMA_REQUEST_STATE_READY_TO_EXECUTE;
4782 : 646637 : nvmf_rdma_request_process(rtransport, rdma_req);
4783 : 0 : }
4784 : 0 : } else {
4785 : : /* If the data transfer fails still force the queue into the error state,
4786 : : * if we were performing an RDMA_READ, we need to force the request into a
4787 : : * completed state since it wasn't linked to a send. However, in the RDMA_WRITE
4788 : : * case, we should wait for the SEND to complete. */
4789 [ + - # # : 1 : if (rdma_req->data.wr.opcode == IBV_WR_RDMA_READ) {
# # # # #
# ]
4790 [ # # ]: 1 : rqpair->current_read_depth--;
4791 [ + - # # : 1 : if (rdma_req->num_outstanding_data_wr == 0) {
# # ]
4792 [ # # # # ]: 1 : rdma_req->state = RDMA_REQUEST_STATE_COMPLETED;
4793 : 0 : }
4794 : 0 : }
4795 : : }
4796 : 795848 : break;
4797 : 0 : default:
4798 [ # # # # : 0 : SPDK_ERRLOG("Received an unknown opcode on the CQ: %d\n", wc[i].opcode);
# # # # ]
4799 : 0 : continue;
4800 : : }
4801 : :
4802 : : /* Handle error conditions */
4803 [ + + # # : 14156384 : if (wc[i].status) {
# # # # #
# ]
4804 : 1 : nvmf_rdma_update_ibv_state(rqpair);
4805 [ # # # # ]: 1 : nvmf_rdma_log_wc_status(rqpair, &wc[i]);
4806 : :
4807 : 1 : error = true;
4808 : :
4809 [ - + # # : 1 : if (rqpair->qpair.state == SPDK_NVMF_QPAIR_ACTIVE) {
# # # # ]
4810 : : /* Disconnect the connection. */
4811 [ # # ]: 0 : spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL);
4812 : 0 : } else {
4813 : 1 : nvmf_rdma_destroy_drained_qpair(rqpair);
4814 : : }
4815 : 1 : continue;
4816 : : }
4817 : :
4818 : 14156383 : nvmf_rdma_qpair_process_pending(rtransport, rqpair, false);
4819 : :
4820 [ + + # # : 14156383 : if (rqpair->qpair.state != SPDK_NVMF_QPAIR_ACTIVE) {
# # # # ]
4821 : 699 : nvmf_rdma_destroy_drained_qpair(rqpair);
4822 : 0 : }
4823 : 0 : }
4824 : :
4825 [ + + # # ]: 702666121 : if (error == true) {
4826 : 1 : return -1;
4827 : : }
4828 : :
4829 : : /* submit outstanding work requests. */
4830 : 702666120 : _poller_submit_recvs(rtransport, rpoller);
4831 : 702666120 : _poller_submit_sends(rtransport, rpoller);
4832 : :
4833 : 702666120 : return count;
4834 : 0 : }
4835 : :
4836 : : static void
4837 : 0 : _nvmf_rdma_remove_destroyed_device(void *c)
4838 : : {
4839 : 0 : struct spdk_nvmf_rdma_transport *rtransport = c;
4840 : 0 : struct spdk_nvmf_rdma_device *device, *device_tmp;
4841 : 0 : int rc;
4842 : :
4843 [ # # # # : 0 : TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, device_tmp) {
# # # # #
# # # # #
# # ]
4844 [ # # # # : 0 : if (device->ready_to_destroy) {
# # # # ]
4845 : 0 : destroy_ib_device(rtransport, device);
4846 : 0 : }
4847 : 0 : }
4848 : :
4849 : 0 : free_poll_fds(rtransport);
4850 : 0 : rc = generate_poll_fds(rtransport);
4851 : : /* cannot handle fd allocation error here */
4852 [ # # ]: 0 : if (rc != 0) {
4853 : 0 : SPDK_ERRLOG("Failed to generate poll fds after remove ib device.\n");
4854 : 0 : }
4855 : 0 : }
4856 : :
4857 : : static void
4858 : 0 : _nvmf_rdma_remove_poller_in_group_cb(void *c)
4859 : : {
4860 : 0 : struct poller_manage_ctx *ctx = c;
4861 [ # # # # ]: 0 : struct spdk_nvmf_rdma_transport *rtransport = ctx->rtransport;
4862 [ # # # # ]: 0 : struct spdk_nvmf_rdma_device *device = ctx->device;
4863 [ # # # # ]: 0 : struct spdk_thread *thread = ctx->thread;
4864 : :
4865 [ # # ]: 0 : if (nvmf_rdma_all_pollers_management_done(c)) {
4866 : : /* destroy device when last poller is destroyed */
4867 [ # # # # ]: 0 : device->ready_to_destroy = true;
4868 : 0 : spdk_thread_send_msg(thread, _nvmf_rdma_remove_destroyed_device, rtransport);
4869 : 0 : }
4870 : 0 : }
4871 : :
4872 : : static void
4873 : 0 : _nvmf_rdma_remove_poller_in_group(void *c)
4874 : : {
4875 : 0 : struct poller_manage_ctx *ctx = c;
4876 : :
4877 [ # # # # : 0 : ctx->rpoller->need_destroy = true;
# # # # ]
4878 [ # # # # : 0 : ctx->rpoller->destroy_cb_ctx = ctx;
# # # # ]
4879 [ # # # # : 0 : ctx->rpoller->destroy_cb = _nvmf_rdma_remove_poller_in_group_cb;
# # # # ]
4880 : :
4881 : : /* qp will be disconnected after receiving a RDMA_CM_EVENT_DEVICE_REMOVAL event. */
4882 [ # # # # : 0 : if (RB_EMPTY(&ctx->rpoller->qpairs)) {
# # # # #
# # # ]
4883 [ # # # # ]: 0 : nvmf_rdma_poller_destroy(ctx->rpoller);
4884 : 0 : }
4885 : 0 : }
4886 : :
4887 : : static int
4888 : 351333061 : nvmf_rdma_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
4889 : : {
4890 : 0 : struct spdk_nvmf_rdma_transport *rtransport;
4891 : 0 : struct spdk_nvmf_rdma_poll_group *rgroup;
4892 : 0 : struct spdk_nvmf_rdma_poller *rpoller, *tmp;
4893 : 0 : int count, rc;
4894 : :
4895 [ # # # # ]: 351333061 : rtransport = SPDK_CONTAINEROF(group->transport, struct spdk_nvmf_rdma_transport, transport);
4896 : 351333061 : rgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_rdma_poll_group, group);
4897 : :
4898 : 351333061 : count = 0;
4899 [ + + # # : 1053999181 : TAILQ_FOREACH_SAFE(rpoller, &rgroup->pollers, link, tmp) {
# # # # #
# # # # #
# # ]
4900 : 702666121 : rc = nvmf_rdma_poller_poll(rtransport, rpoller);
4901 [ + + ]: 702666121 : if (rc < 0) {
4902 : 1 : return rc;
4903 : : }
4904 [ # # ]: 702666120 : count += rc;
4905 : 0 : }
4906 : :
4907 : 351333060 : return count;
4908 : 0 : }
4909 : :
4910 : : static int
4911 : 2393 : nvmf_rdma_trid_from_cm_id(struct rdma_cm_id *id,
4912 : : struct spdk_nvme_transport_id *trid,
4913 : : bool peer)
4914 : : {
4915 : 0 : struct sockaddr *saddr;
4916 : 0 : uint16_t port;
4917 : :
4918 : 2393 : spdk_nvme_trid_populate_transport(trid, SPDK_NVME_TRANSPORT_RDMA);
4919 : :
4920 [ - + # # ]: 2393 : if (peer) {
4921 : 0 : saddr = rdma_get_peer_addr(id);
4922 : 0 : } else {
4923 : 2393 : saddr = rdma_get_local_addr(id);
4924 : : }
4925 [ + - - # : 2393 : switch (saddr->sa_family) {
# # # ]
4926 : 2393 : case AF_INET: {
4927 : 2393 : struct sockaddr_in *saddr_in = (struct sockaddr_in *)saddr;
4928 : :
4929 [ # # # # ]: 2393 : trid->adrfam = SPDK_NVMF_ADRFAM_IPV4;
4930 [ # # ]: 2393 : inet_ntop(AF_INET, &saddr_in->sin_addr,
4931 [ # # ]: 2393 : trid->traddr, sizeof(trid->traddr));
4932 [ - + # # ]: 2393 : if (peer) {
4933 : 0 : port = ntohs(rdma_get_dst_port(id));
4934 : 0 : } else {
4935 : 2393 : port = ntohs(rdma_get_src_port(id));
4936 : : }
4937 [ # # ]: 2393 : snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%u", port);
4938 : 2393 : break;
4939 : 0 : }
4940 : 0 : case AF_INET6: {
4941 : 0 : struct sockaddr_in6 *saddr_in = (struct sockaddr_in6 *)saddr;
4942 [ # # # # ]: 0 : trid->adrfam = SPDK_NVMF_ADRFAM_IPV6;
4943 [ # # ]: 0 : inet_ntop(AF_INET6, &saddr_in->sin6_addr,
4944 [ # # ]: 0 : trid->traddr, sizeof(trid->traddr));
4945 [ # # # # ]: 0 : if (peer) {
4946 : 0 : port = ntohs(rdma_get_dst_port(id));
4947 : 0 : } else {
4948 : 0 : port = ntohs(rdma_get_src_port(id));
4949 : : }
4950 [ # # ]: 0 : snprintf(trid->trsvcid, sizeof(trid->trsvcid), "%u", port);
4951 : 0 : break;
4952 : 0 : }
4953 : 0 : default:
4954 : 0 : return -1;
4955 : :
4956 : : }
4957 : :
4958 : 2393 : return 0;
4959 : 0 : }
4960 : :
4961 : : static int
4962 : 0 : nvmf_rdma_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
4963 : : struct spdk_nvme_transport_id *trid)
4964 : : {
4965 : 0 : struct spdk_nvmf_rdma_qpair *rqpair;
4966 : :
4967 : 0 : rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
4968 : :
4969 [ # # # # ]: 0 : return nvmf_rdma_trid_from_cm_id(rqpair->cm_id, trid, true);
4970 : 0 : }
4971 : :
4972 : : static int
4973 : 0 : nvmf_rdma_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
4974 : : struct spdk_nvme_transport_id *trid)
4975 : : {
4976 : 0 : struct spdk_nvmf_rdma_qpair *rqpair;
4977 : :
4978 : 0 : rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
4979 : :
4980 [ # # # # ]: 0 : return nvmf_rdma_trid_from_cm_id(rqpair->cm_id, trid, false);
4981 : 0 : }
4982 : :
4983 : : static int
4984 : 2393 : nvmf_rdma_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
4985 : : struct spdk_nvme_transport_id *trid)
4986 : : {
4987 : 0 : struct spdk_nvmf_rdma_qpair *rqpair;
4988 : :
4989 : 2393 : rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
4990 : :
4991 [ # # # # ]: 2393 : return nvmf_rdma_trid_from_cm_id(rqpair->listen_id, trid, false);
4992 : 0 : }
4993 : :
4994 : : void
4995 : 0 : spdk_nvmf_rdma_init_hooks(struct spdk_nvme_rdma_hooks *hooks)
4996 : : {
4997 : 0 : g_nvmf_hooks = *hooks;
4998 : 0 : }
4999 : :
5000 : : static void
5001 : 0 : nvmf_rdma_request_set_abort_status(struct spdk_nvmf_request *req,
5002 : : struct spdk_nvmf_rdma_request *rdma_req_to_abort,
5003 : : struct spdk_nvmf_rdma_qpair *rqpair)
5004 : : {
5005 [ # # # # : 0 : rdma_req_to_abort->req.rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
# # # # #
# # # #
# ]
5006 [ # # # # : 0 : rdma_req_to_abort->req.rsp->nvme_cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
# # # # #
# # # #
# ]
5007 : :
5008 [ # # # # : 0 : STAILQ_INSERT_TAIL(&rqpair->pending_rdma_send_queue, rdma_req_to_abort, state_link);
# # # # #
# # # # #
# # # # #
# # # #
# ]
5009 [ # # # # ]: 0 : rdma_req_to_abort->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE_PENDING;
5010 : :
5011 [ # # # # : 0 : req->rsp->nvme_cpl.cdw0 &= ~1U; /* Command was successfully aborted. */
# # # # #
# ]
5012 : 0 : }
5013 : :
5014 : : static int
5015 : 9714 : _nvmf_rdma_qpair_abort_request(void *ctx)
5016 : : {
5017 : 9714 : struct spdk_nvmf_request *req = ctx;
5018 [ # # # # ]: 9714 : struct spdk_nvmf_rdma_request *rdma_req_to_abort = SPDK_CONTAINEROF(
5019 : : req->req_to_abort, struct spdk_nvmf_rdma_request, req);
5020 [ # # # # : 9714 : struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(req->req_to_abort->qpair,
# # # # ]
5021 : : struct spdk_nvmf_rdma_qpair, qpair);
5022 : 0 : int rc;
5023 : :
5024 [ # # ]: 9714 : spdk_poller_unregister(&req->poller);
5025 : :
5026 [ + - - - : 9714 : switch (rdma_req_to_abort->state) {
- - - # #
# # ]
5027 : 9714 : case RDMA_REQUEST_STATE_EXECUTING:
5028 : 9714 : rc = nvmf_ctrlr_abort_request(req);
5029 [ + - ]: 9714 : if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS) {
5030 : 9714 : return SPDK_POLLER_BUSY;
5031 : : }
5032 : 0 : break;
5033 : :
5034 : 0 : case RDMA_REQUEST_STATE_NEED_BUFFER:
5035 [ # # # # : 0 : STAILQ_REMOVE(&rqpair->poller->group->group.pending_buf_queue,
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # ]
5036 : : &rdma_req_to_abort->req, spdk_nvmf_request, buf_link);
5037 : :
5038 : 0 : nvmf_rdma_request_set_abort_status(req, rdma_req_to_abort, rqpair);
5039 : 0 : break;
5040 : :
5041 : 0 : case RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING:
5042 [ # # # # : 0 : STAILQ_REMOVE(&rqpair->pending_rdma_read_queue, rdma_req_to_abort,
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # ]
5043 : : spdk_nvmf_rdma_request, state_link);
5044 : :
5045 : 0 : nvmf_rdma_request_set_abort_status(req, rdma_req_to_abort, rqpair);
5046 : 0 : break;
5047 : :
5048 : 0 : case RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING:
5049 [ # # # # : 0 : STAILQ_REMOVE(&rqpair->pending_rdma_write_queue, rdma_req_to_abort,
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # ]
5050 : : spdk_nvmf_rdma_request, state_link);
5051 : :
5052 : 0 : nvmf_rdma_request_set_abort_status(req, rdma_req_to_abort, rqpair);
5053 : 0 : break;
5054 : :
5055 : 0 : case RDMA_REQUEST_STATE_READY_TO_COMPLETE_PENDING:
5056 : : /* Remove req from the list here to re-use common function */
5057 [ # # # # : 0 : STAILQ_REMOVE(&rqpair->pending_rdma_send_queue, rdma_req_to_abort,
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # # ]
5058 : : spdk_nvmf_rdma_request, state_link);
5059 : :
5060 : 0 : nvmf_rdma_request_set_abort_status(req, rdma_req_to_abort, rqpair);
5061 : 0 : break;
5062 : :
5063 : 0 : case RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
5064 [ # # # # : 0 : if (spdk_get_ticks() < req->timeout_tsc) {
# # ]
5065 [ # # # # ]: 0 : req->poller = SPDK_POLLER_REGISTER(_nvmf_rdma_qpair_abort_request, req, 0);
5066 : 0 : return SPDK_POLLER_BUSY;
5067 : : }
5068 : 0 : break;
5069 : :
5070 : 0 : default:
5071 : 0 : break;
5072 : : }
5073 : :
5074 : 0 : spdk_nvmf_request_complete(req);
5075 : 0 : return SPDK_POLLER_BUSY;
5076 : 0 : }
5077 : :
5078 : : static void
5079 : 9774 : nvmf_rdma_qpair_abort_request(struct spdk_nvmf_qpair *qpair,
5080 : : struct spdk_nvmf_request *req)
5081 : : {
5082 : 0 : struct spdk_nvmf_rdma_qpair *rqpair;
5083 : 0 : struct spdk_nvmf_rdma_transport *rtransport;
5084 : 0 : struct spdk_nvmf_transport *transport;
5085 : 0 : uint16_t cid;
5086 : 0 : uint32_t i, max_req_count;
5087 : 9774 : struct spdk_nvmf_rdma_request *rdma_req_to_abort = NULL, *rdma_req;
5088 : :
5089 : 9774 : rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
5090 [ # # # # ]: 9774 : rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport);
5091 [ # # ]: 9774 : transport = &rtransport->transport;
5092 : :
5093 [ # # # # : 9774 : cid = req->cmd->nvme_cmd.cdw10_bits.abort.cid;
# # # # #
# # # #
# ]
5094 [ - + # # : 9774 : max_req_count = rqpair->srq == NULL ? rqpair->max_queue_depth : rqpair->poller->max_srq_depth;
# # # # #
# # # # #
# # # # ]
5095 : :
5096 [ + + ]: 594631 : for (i = 0; i < max_req_count; i++) {
5097 [ # # # # : 594571 : rdma_req = &rqpair->resources->reqs[i];
# # # # #
# ]
5098 : : /* When SRQ == NULL, rqpair has its own requests and req.qpair pointer always points to the qpair
5099 : : * When SRQ != NULL all rqpairs share common requests and qpair pointer is assigned when we start to
5100 : : * process a request. So in both cases all requests which are not in FREE state have valid qpair ptr */
5101 [ + + + + : 594571 : if (rdma_req->state != RDMA_REQUEST_STATE_FREE && rdma_req->req.cmd->nvme_cmd.cid == cid &&
# # # # #
# # # # #
# # # # #
# # # ]
5102 [ + - # # : 9714 : rdma_req->req.qpair == qpair) {
# # ]
5103 : 9714 : rdma_req_to_abort = rdma_req;
5104 : 9714 : break;
5105 : : }
5106 : 0 : }
5107 : :
5108 [ + + ]: 9774 : if (rdma_req_to_abort == NULL) {
5109 : 60 : spdk_nvmf_request_complete(req);
5110 : 60 : return;
5111 : : }
5112 : :
5113 [ # # # # : 9714 : req->req_to_abort = &rdma_req_to_abort->req;
# # ]
5114 [ # # # # ]: 19428 : req->timeout_tsc = spdk_get_ticks() +
5115 [ # # # # : 9714 : transport->opts.abort_timeout_sec * spdk_get_ticks_hz();
# # ]
5116 [ # # # # ]: 9714 : req->poller = NULL;
5117 : :
5118 : 9714 : _nvmf_rdma_qpair_abort_request(req);
5119 [ # # ]: 0 : }
5120 : :
5121 : : static void
5122 : 8 : nvmf_rdma_poll_group_dump_stat(struct spdk_nvmf_transport_poll_group *group,
5123 : : struct spdk_json_write_ctx *w)
5124 : : {
5125 : 0 : struct spdk_nvmf_rdma_poll_group *rgroup;
5126 : 0 : struct spdk_nvmf_rdma_poller *rpoller;
5127 : :
5128 [ - + # # ]: 8 : assert(w != NULL);
5129 : :
5130 : 8 : rgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_rdma_poll_group, group);
5131 : :
5132 [ # # # # : 8 : spdk_json_write_named_uint64(w, "pending_data_buffer", rgroup->stat.pending_data_buffer);
# # ]
5133 : :
5134 : 8 : spdk_json_write_named_array_begin(w, "devices");
5135 : :
5136 [ + + # # : 24 : TAILQ_FOREACH(rpoller, &rgroup->pollers, link) {
# # # # #
# # # #
# ]
5137 : 16 : spdk_json_write_object_begin(w);
5138 : 16 : spdk_json_write_named_string(w, "name",
5139 [ # # # # : 16 : ibv_get_device_name(rpoller->device->context->device));
# # # # #
# # # ]
5140 : 16 : spdk_json_write_named_uint64(w, "polls",
5141 [ # # # # : 0 : rpoller->stat.polls);
# # ]
5142 : 16 : spdk_json_write_named_uint64(w, "idle_polls",
5143 [ # # # # : 0 : rpoller->stat.idle_polls);
# # ]
5144 : 16 : spdk_json_write_named_uint64(w, "completions",
5145 [ # # # # : 0 : rpoller->stat.completions);
# # ]
5146 : 16 : spdk_json_write_named_uint64(w, "requests",
5147 [ # # # # : 0 : rpoller->stat.requests);
# # ]
5148 : 16 : spdk_json_write_named_uint64(w, "request_latency",
5149 [ # # # # : 0 : rpoller->stat.request_latency);
# # ]
5150 : 16 : spdk_json_write_named_uint64(w, "pending_free_request",
5151 [ # # # # : 0 : rpoller->stat.pending_free_request);
# # ]
5152 : 16 : spdk_json_write_named_uint64(w, "pending_rdma_read",
5153 [ # # # # : 0 : rpoller->stat.pending_rdma_read);
# # ]
5154 : 16 : spdk_json_write_named_uint64(w, "pending_rdma_write",
5155 [ # # # # : 0 : rpoller->stat.pending_rdma_write);
# # ]
5156 : 16 : spdk_json_write_named_uint64(w, "pending_rdma_send",
5157 [ # # # # : 0 : rpoller->stat.pending_rdma_send);
# # ]
5158 : 16 : spdk_json_write_named_uint64(w, "total_send_wrs",
5159 [ # # # # : 0 : rpoller->stat.qp_stats.send.num_submitted_wrs);
# # # # #
# ]
5160 : 16 : spdk_json_write_named_uint64(w, "send_doorbell_updates",
5161 [ # # # # : 0 : rpoller->stat.qp_stats.send.doorbell_updates);
# # # # #
# ]
5162 : 16 : spdk_json_write_named_uint64(w, "total_recv_wrs",
5163 [ # # # # : 0 : rpoller->stat.qp_stats.recv.num_submitted_wrs);
# # # # #
# ]
5164 : 16 : spdk_json_write_named_uint64(w, "recv_doorbell_updates",
5165 [ # # # # : 0 : rpoller->stat.qp_stats.recv.doorbell_updates);
# # # # #
# ]
5166 : 16 : spdk_json_write_object_end(w);
5167 : 0 : }
5168 : :
5169 : 8 : spdk_json_write_array_end(w);
5170 : 8 : }
5171 : :
5172 : : const struct spdk_nvmf_transport_ops spdk_nvmf_transport_rdma = {
5173 : : .name = "RDMA",
5174 : : .type = SPDK_NVME_TRANSPORT_RDMA,
5175 : : .opts_init = nvmf_rdma_opts_init,
5176 : : .create = nvmf_rdma_create,
5177 : : .dump_opts = nvmf_rdma_dump_opts,
5178 : : .destroy = nvmf_rdma_destroy,
5179 : :
5180 : : .listen = nvmf_rdma_listen,
5181 : : .stop_listen = nvmf_rdma_stop_listen,
5182 : : .cdata_init = nvmf_rdma_cdata_init,
5183 : :
5184 : : .listener_discover = nvmf_rdma_discover,
5185 : :
5186 : : .poll_group_create = nvmf_rdma_poll_group_create,
5187 : : .get_optimal_poll_group = nvmf_rdma_get_optimal_poll_group,
5188 : : .poll_group_destroy = nvmf_rdma_poll_group_destroy,
5189 : : .poll_group_add = nvmf_rdma_poll_group_add,
5190 : : .poll_group_remove = nvmf_rdma_poll_group_remove,
5191 : : .poll_group_poll = nvmf_rdma_poll_group_poll,
5192 : :
5193 : : .req_free = nvmf_rdma_request_free,
5194 : : .req_complete = nvmf_rdma_request_complete,
5195 : :
5196 : : .qpair_fini = nvmf_rdma_close_qpair,
5197 : : .qpair_get_peer_trid = nvmf_rdma_qpair_get_peer_trid,
5198 : : .qpair_get_local_trid = nvmf_rdma_qpair_get_local_trid,
5199 : : .qpair_get_listen_trid = nvmf_rdma_qpair_get_listen_trid,
5200 : : .qpair_abort_request = nvmf_rdma_qpair_abort_request,
5201 : :
5202 : : .poll_group_dump_stat = nvmf_rdma_poll_group_dump_stat,
5203 : : };
5204 : :
5205 : 783 : SPDK_NVMF_TRANSPORT_REGISTER(rdma, &spdk_nvmf_transport_rdma);
5206 : 783 : SPDK_LOG_REGISTER_COMPONENT(rdma)
|