Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2018 Intel Corporation. All rights reserved.
3 : * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
4 : * Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5 : */
6 :
7 : /*
8 : * NVMe/TCP transport
9 : */
10 :
11 : #include "nvme_internal.h"
12 :
13 : #include "spdk/endian.h"
14 : #include "spdk/likely.h"
15 : #include "spdk/string.h"
16 : #include "spdk/stdinc.h"
17 : #include "spdk/crc32.h"
18 : #include "spdk/assert.h"
19 : #include "spdk/trace.h"
20 : #include "spdk/util.h"
21 : #include "spdk/nvmf.h"
22 : #include "spdk/dma.h"
23 :
24 : #include "spdk_internal/nvme_tcp.h"
25 : #include "spdk_internal/trace_defs.h"
26 :
27 : #define NVME_TCP_RW_BUFFER_SIZE 131072
28 :
29 : /* For async connect workloads, allow more time since we are more likely
30 : * to be processing lots ICREQs at once.
31 : */
32 : #define ICREQ_TIMEOUT_SYNC 2 /* in seconds */
33 : #define ICREQ_TIMEOUT_ASYNC 10 /* in seconds */
34 :
35 : #define NVME_TCP_HPDA_DEFAULT 0
36 : #define NVME_TCP_MAX_R2T_DEFAULT 1
37 : #define NVME_TCP_PDU_H2C_MIN_DATA_SIZE 4096
38 :
39 : /*
40 : * Maximum value of transport_ack_timeout used by TCP controller
41 : */
42 : #define NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT 31
43 :
44 :
45 : /* NVMe TCP transport extensions for spdk_nvme_ctrlr */
46 : struct nvme_tcp_ctrlr {
47 : struct spdk_nvme_ctrlr ctrlr;
48 : char psk_identity[NVMF_PSK_IDENTITY_LEN];
49 : uint8_t psk[SPDK_TLS_PSK_MAX_LEN];
50 : int psk_size;
51 : char *tls_cipher_suite;
52 : };
53 :
54 : struct nvme_tcp_poll_group {
55 : struct spdk_nvme_transport_poll_group group;
56 : struct spdk_sock_group *sock_group;
57 : uint32_t completions_per_qpair;
58 : int64_t num_completions;
59 :
60 : TAILQ_HEAD(, nvme_tcp_qpair) needs_poll;
61 : struct spdk_nvme_tcp_stat stats;
62 : };
63 :
64 : /* NVMe TCP qpair extensions for spdk_nvme_qpair */
65 : struct nvme_tcp_qpair {
66 : struct spdk_nvme_qpair qpair;
67 : struct spdk_sock *sock;
68 :
69 : TAILQ_HEAD(, nvme_tcp_req) free_reqs;
70 : TAILQ_HEAD(, nvme_tcp_req) outstanding_reqs;
71 :
72 : TAILQ_HEAD(, nvme_tcp_pdu) send_queue;
73 : struct nvme_tcp_pdu *recv_pdu;
74 : struct nvme_tcp_pdu *send_pdu; /* only for error pdu and init pdu */
75 : struct nvme_tcp_pdu *send_pdus; /* Used by tcp_reqs */
76 : enum nvme_tcp_pdu_recv_state recv_state;
77 : struct nvme_tcp_req *tcp_reqs;
78 : struct spdk_nvme_tcp_stat *stats;
79 :
80 : uint16_t num_entries;
81 : uint16_t async_complete;
82 :
83 : struct {
84 : uint16_t host_hdgst_enable: 1;
85 : uint16_t host_ddgst_enable: 1;
86 : uint16_t icreq_send_ack: 1;
87 : uint16_t in_connect_poll: 1;
88 : uint16_t reserved: 12;
89 : } flags;
90 :
91 : /** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */
92 : uint32_t maxh2cdata;
93 :
94 : uint32_t maxr2t;
95 :
96 : /* 0 based value, which is used to guide the padding */
97 : uint8_t cpda;
98 :
99 : enum nvme_tcp_qpair_state state;
100 :
101 : TAILQ_ENTRY(nvme_tcp_qpair) link;
102 : bool needs_poll;
103 :
104 : uint64_t icreq_timeout_tsc;
105 :
106 : bool shared_stats;
107 : };
108 :
109 : enum nvme_tcp_req_state {
110 : NVME_TCP_REQ_FREE,
111 : NVME_TCP_REQ_ACTIVE,
112 : NVME_TCP_REQ_ACTIVE_R2T,
113 : };
114 :
115 : struct nvme_tcp_req {
116 : struct nvme_request *req;
117 : enum nvme_tcp_req_state state;
118 : uint16_t cid;
119 : uint16_t ttag;
120 : uint32_t datao;
121 : uint32_t expected_datao;
122 : uint32_t r2tl_remain;
123 : uint32_t active_r2ts;
124 : /* Used to hold a value received from subsequent R2T while we are still
125 : * waiting for H2C complete */
126 : uint16_t ttag_r2t_next;
127 : bool in_capsule_data;
128 : /* It is used to track whether the req can be safely freed */
129 : union {
130 : uint8_t raw;
131 : struct {
132 : /* The last send operation completed - kernel released send buffer */
133 : uint8_t send_ack : 1;
134 : /* Data transfer completed - target send resp or last data bit */
135 : uint8_t data_recv : 1;
136 : /* tcp_req is waiting for completion of the previous send operation (buffer reclaim notification
137 : * from kernel) to send H2C */
138 : uint8_t h2c_send_waiting_ack : 1;
139 : /* tcp_req received subsequent r2t while it is still waiting for send_ack.
140 : * Rare case, actual when dealing with target that can send several R2T requests.
141 : * SPDK TCP target sends 1 R2T for the whole data buffer */
142 : uint8_t r2t_waiting_h2c_complete : 1;
143 : /* Accel operation is in progress */
144 : uint8_t in_progress_accel : 1;
145 : uint8_t domain_in_use: 1;
146 : uint8_t reserved : 2;
147 : } bits;
148 : } ordering;
149 : struct nvme_tcp_pdu *pdu;
150 : struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS];
151 : uint32_t iovcnt;
152 : /* Used to hold a value received from subsequent R2T while we are still
153 : * waiting for H2C ack */
154 : uint32_t r2tl_remain_next;
155 : struct nvme_tcp_qpair *tqpair;
156 : TAILQ_ENTRY(nvme_tcp_req) link;
157 : struct spdk_nvme_cpl rsp;
158 : };
159 :
160 : static struct spdk_nvme_tcp_stat g_dummy_stats = {};
161 :
162 : static void nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req);
163 : static int64_t nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group
164 : *tgroup, uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb);
165 : static void nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu);
166 : static void nvme_tcp_req_complete(struct nvme_tcp_req *tcp_req, struct nvme_tcp_qpair *tqpair,
167 : struct spdk_nvme_cpl *rsp, bool print_on_error);
168 :
169 : static inline struct nvme_tcp_qpair *
170 46 : nvme_tcp_qpair(struct spdk_nvme_qpair *qpair)
171 : {
172 46 : assert(qpair->trtype == SPDK_NVME_TRANSPORT_TCP);
173 46 : return SPDK_CONTAINEROF(qpair, struct nvme_tcp_qpair, qpair);
174 : }
175 :
176 : static inline struct nvme_tcp_poll_group *
177 9 : nvme_tcp_poll_group(struct spdk_nvme_transport_poll_group *group)
178 : {
179 9 : return SPDK_CONTAINEROF(group, struct nvme_tcp_poll_group, group);
180 : }
181 :
182 : static inline struct nvme_tcp_ctrlr *
183 5 : nvme_tcp_ctrlr(struct spdk_nvme_ctrlr *ctrlr)
184 : {
185 5 : assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP);
186 5 : return SPDK_CONTAINEROF(ctrlr, struct nvme_tcp_ctrlr, ctrlr);
187 : }
188 :
189 : static struct nvme_tcp_req *
190 6 : nvme_tcp_req_get(struct nvme_tcp_qpair *tqpair)
191 : {
192 : struct nvme_tcp_req *tcp_req;
193 :
194 6 : tcp_req = TAILQ_FIRST(&tqpair->free_reqs);
195 6 : if (!tcp_req) {
196 2 : return NULL;
197 : }
198 :
199 4 : assert(tcp_req->state == NVME_TCP_REQ_FREE);
200 4 : tcp_req->state = NVME_TCP_REQ_ACTIVE;
201 4 : TAILQ_REMOVE(&tqpair->free_reqs, tcp_req, link);
202 4 : tcp_req->datao = 0;
203 4 : tcp_req->expected_datao = 0;
204 4 : tcp_req->req = NULL;
205 4 : tcp_req->in_capsule_data = false;
206 4 : tcp_req->r2tl_remain = 0;
207 4 : tcp_req->r2tl_remain_next = 0;
208 4 : tcp_req->active_r2ts = 0;
209 4 : tcp_req->iovcnt = 0;
210 4 : tcp_req->ordering.raw = 0;
211 4 : memset(tcp_req->pdu, 0, sizeof(struct nvme_tcp_pdu));
212 4 : memset(&tcp_req->rsp, 0, sizeof(struct spdk_nvme_cpl));
213 :
214 4 : return tcp_req;
215 : }
216 :
217 : static void
218 10 : nvme_tcp_req_put(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req)
219 : {
220 10 : assert(tcp_req->state != NVME_TCP_REQ_FREE);
221 10 : tcp_req->state = NVME_TCP_REQ_FREE;
222 10 : TAILQ_INSERT_HEAD(&tqpair->free_reqs, tcp_req, link);
223 10 : }
224 :
225 : static inline void
226 0 : nvme_tcp_accel_submit_crc32c(struct nvme_tcp_poll_group *tgroup, struct nvme_tcp_req *treq,
227 : uint32_t *dst, struct iovec *iovs, uint32_t iovcnt, uint32_t seed,
228 : spdk_nvme_accel_completion_cb cb_fn, void *cb_arg)
229 : {
230 0 : struct spdk_nvme_poll_group *pg = tgroup->group.group;
231 :
232 0 : treq->ordering.bits.in_progress_accel = 1;
233 0 : pg->accel_fn_table.submit_accel_crc32c(pg->ctx, dst, iovs, iovcnt, seed, cb_fn, cb_arg);
234 0 : }
235 :
236 : static inline void
237 0 : nvme_tcp_accel_finish_sequence(struct nvme_tcp_poll_group *tgroup, struct nvme_tcp_req *treq,
238 : void *seq, spdk_nvme_accel_completion_cb cb_fn, void *cb_arg)
239 : {
240 0 : struct spdk_nvme_poll_group *pg = tgroup->group.group;
241 :
242 0 : treq->ordering.bits.in_progress_accel = 1;
243 0 : pg->accel_fn_table.finish_sequence(seq, cb_fn, cb_arg);
244 0 : }
245 :
246 : static inline void
247 0 : nvme_tcp_accel_reverse_sequence(struct nvme_tcp_poll_group *tgroup, void *seq)
248 : {
249 0 : struct spdk_nvme_poll_group *pg = tgroup->group.group;
250 :
251 0 : pg->accel_fn_table.reverse_sequence(seq);
252 0 : }
253 :
254 : static inline int
255 0 : nvme_tcp_accel_append_crc32c(struct nvme_tcp_poll_group *tgroup, void **seq, uint32_t *dst,
256 : struct iovec *iovs, uint32_t iovcnt, uint32_t seed,
257 : spdk_nvme_accel_step_cb cb_fn, void *cb_arg)
258 : {
259 0 : struct spdk_nvme_poll_group *pg = tgroup->group.group;
260 :
261 0 : return pg->accel_fn_table.append_crc32c(pg->ctx, seq, dst, iovs, iovcnt, NULL, NULL,
262 : seed, cb_fn, cb_arg);
263 : }
264 :
265 : static void
266 6 : nvme_tcp_free_reqs(struct nvme_tcp_qpair *tqpair)
267 : {
268 6 : free(tqpair->tcp_reqs);
269 6 : tqpair->tcp_reqs = NULL;
270 :
271 6 : spdk_free(tqpair->send_pdus);
272 6 : tqpair->send_pdus = NULL;
273 6 : }
274 :
275 : static int
276 9 : nvme_tcp_alloc_reqs(struct nvme_tcp_qpair *tqpair)
277 : {
278 : uint16_t i;
279 : struct nvme_tcp_req *tcp_req;
280 :
281 9 : tqpair->tcp_reqs = calloc(tqpair->num_entries, sizeof(struct nvme_tcp_req));
282 9 : if (tqpair->tcp_reqs == NULL) {
283 0 : SPDK_ERRLOG("Failed to allocate tcp_reqs on tqpair=%p\n", tqpair);
284 0 : goto fail;
285 : }
286 :
287 : /* Add additional 2 member for the send_pdu, recv_pdu owned by the tqpair */
288 9 : tqpair->send_pdus = spdk_zmalloc((tqpair->num_entries + 2) * sizeof(struct nvme_tcp_pdu),
289 : 0x1000, NULL,
290 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
291 :
292 9 : if (tqpair->send_pdus == NULL) {
293 0 : SPDK_ERRLOG("Failed to allocate send_pdus on tqpair=%p\n", tqpair);
294 0 : goto fail;
295 : }
296 :
297 9 : TAILQ_INIT(&tqpair->send_queue);
298 9 : TAILQ_INIT(&tqpair->free_reqs);
299 9 : TAILQ_INIT(&tqpair->outstanding_reqs);
300 9 : tqpair->qpair.queue_depth = 0;
301 65555 : for (i = 0; i < tqpair->num_entries; i++) {
302 65546 : tcp_req = &tqpair->tcp_reqs[i];
303 65546 : tcp_req->cid = i;
304 65546 : tcp_req->tqpair = tqpair;
305 65546 : tcp_req->pdu = &tqpair->send_pdus[i];
306 65546 : TAILQ_INSERT_TAIL(&tqpair->free_reqs, tcp_req, link);
307 : }
308 :
309 9 : tqpair->send_pdu = &tqpair->send_pdus[i];
310 9 : tqpair->recv_pdu = &tqpair->send_pdus[i + 1];
311 :
312 9 : return 0;
313 0 : fail:
314 0 : nvme_tcp_free_reqs(tqpair);
315 0 : return -ENOMEM;
316 : }
317 :
318 : static inline void
319 32 : nvme_tcp_qpair_set_recv_state(struct nvme_tcp_qpair *tqpair,
320 : enum nvme_tcp_pdu_recv_state state)
321 : {
322 32 : if (tqpair->recv_state == state) {
323 15 : SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n",
324 : tqpair, state);
325 15 : return;
326 : }
327 :
328 17 : if (state == NVME_TCP_PDU_RECV_STATE_ERROR) {
329 1 : assert(TAILQ_EMPTY(&tqpair->outstanding_reqs));
330 : }
331 :
332 17 : tqpair->recv_state = state;
333 : }
334 :
335 : static void nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr);
336 :
337 : static void
338 5 : nvme_tcp_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
339 : {
340 5 : struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
341 : struct nvme_tcp_pdu *pdu;
342 : int rc;
343 : struct nvme_tcp_poll_group *group;
344 :
345 5 : if (tqpair->needs_poll) {
346 1 : group = nvme_tcp_poll_group(qpair->poll_group);
347 1 : TAILQ_REMOVE(&group->needs_poll, tqpair, link);
348 1 : tqpair->needs_poll = false;
349 : }
350 :
351 5 : rc = spdk_sock_close(&tqpair->sock);
352 :
353 5 : if (tqpair->sock != NULL) {
354 1 : SPDK_ERRLOG("tqpair=%p, errno=%d, rc=%d\n", tqpair, errno, rc);
355 : /* Set it to NULL manually */
356 1 : tqpair->sock = NULL;
357 : }
358 :
359 : /* clear the send_queue */
360 6 : while (!TAILQ_EMPTY(&tqpair->send_queue)) {
361 1 : pdu = TAILQ_FIRST(&tqpair->send_queue);
362 : /* Remove the pdu from the send_queue to prevent the wrong sending out
363 : * in the next round connection
364 : */
365 1 : TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
366 : }
367 :
368 5 : nvme_tcp_qpair_abort_reqs(qpair, 0);
369 :
370 : /* If the qpair is marked as asynchronous, let it go through the process_completions() to
371 : * let any outstanding requests (e.g. those with outstanding accel operations) complete.
372 : * Otherwise, there's no way of waiting for them, so tqpair->outstanding_reqs has to be
373 : * empty.
374 : */
375 5 : if (qpair->async) {
376 4 : nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
377 : } else {
378 1 : assert(TAILQ_EMPTY(&tqpair->outstanding_reqs));
379 1 : nvme_transport_ctrlr_disconnect_qpair_done(qpair);
380 : }
381 5 : }
382 :
383 : static int
384 4 : nvme_tcp_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
385 : {
386 4 : struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
387 :
388 4 : assert(qpair != NULL);
389 4 : nvme_tcp_qpair_abort_reqs(qpair, 0);
390 4 : assert(TAILQ_EMPTY(&tqpair->outstanding_reqs));
391 :
392 4 : nvme_qpair_deinit(qpair);
393 4 : nvme_tcp_free_reqs(tqpair);
394 4 : if (!tqpair->shared_stats) {
395 4 : free(tqpair->stats);
396 : }
397 4 : free(tqpair);
398 :
399 4 : return 0;
400 : }
401 :
402 : static int
403 0 : nvme_tcp_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)
404 : {
405 0 : return 0;
406 : }
407 :
408 : static int
409 3 : nvme_tcp_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
410 : {
411 3 : struct nvme_tcp_ctrlr *tctrlr = nvme_tcp_ctrlr(ctrlr);
412 :
413 3 : if (ctrlr->adminq) {
414 0 : nvme_tcp_ctrlr_delete_io_qpair(ctrlr, ctrlr->adminq);
415 : }
416 :
417 3 : nvme_ctrlr_destruct_finish(ctrlr);
418 :
419 3 : free(tctrlr);
420 :
421 3 : return 0;
422 : }
423 :
424 : static void
425 0 : pdu_write_done(void *cb_arg, int err)
426 : {
427 0 : struct nvme_tcp_pdu *pdu = cb_arg;
428 0 : struct nvme_tcp_qpair *tqpair = pdu->qpair;
429 : struct nvme_tcp_poll_group *pgroup;
430 :
431 : /* If there are queued requests, we assume they are queued because they are waiting
432 : * for resources to be released. Those resources are almost certainly released in
433 : * response to a PDU completing here. However, to attempt to make forward progress
434 : * the qpair needs to be polled and we can't rely on another network event to make
435 : * that happen. Add it to a list of qpairs to poll regardless of network activity
436 : * here.
437 : * Besides, when tqpair state is NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL or
438 : * NVME_TCP_QPAIR_STATE_INITIALIZING, need to add it to needs_poll list too to make
439 : * forward progress in case that the resources are released after icreq's or CONNECT's
440 : * resp is processed. */
441 0 : if (tqpair->qpair.poll_group && !tqpair->needs_poll && (!STAILQ_EMPTY(&tqpair->qpair.queued_req) ||
442 0 : tqpair->state == NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL ||
443 0 : tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING)) {
444 0 : pgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
445 :
446 0 : TAILQ_INSERT_TAIL(&pgroup->needs_poll, tqpair, link);
447 0 : tqpair->needs_poll = true;
448 : }
449 :
450 0 : TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
451 :
452 0 : if (err != 0) {
453 0 : nvme_transport_ctrlr_disconnect_qpair(tqpair->qpair.ctrlr, &tqpair->qpair);
454 0 : return;
455 : }
456 :
457 0 : assert(pdu->cb_fn != NULL);
458 0 : pdu->cb_fn(pdu->cb_arg);
459 : }
460 :
461 : static void
462 0 : pdu_write_fail(struct nvme_tcp_pdu *pdu, int status)
463 : {
464 0 : struct nvme_tcp_qpair *tqpair = pdu->qpair;
465 :
466 : /* This function is similar to pdu_write_done(), but it should be called before a PDU is
467 : * sent over the socket */
468 0 : TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq);
469 0 : pdu_write_done(pdu, status);
470 0 : }
471 :
472 : static void
473 23 : _tcp_write_pdu(struct nvme_tcp_pdu *pdu)
474 : {
475 23 : uint32_t mapped_length = 0;
476 23 : struct nvme_tcp_qpair *tqpair = pdu->qpair;
477 :
478 46 : pdu->sock_req.iovcnt = nvme_tcp_build_iovs(pdu->iov, SPDK_COUNTOF(pdu->iov), pdu,
479 23 : (bool)tqpair->flags.host_hdgst_enable, (bool)tqpair->flags.host_ddgst_enable,
480 : &mapped_length);
481 23 : TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq);
482 23 : if (spdk_unlikely(mapped_length < pdu->data_len)) {
483 0 : SPDK_ERRLOG("could not map the whole %u bytes (mapped only %u bytes)\n", pdu->data_len,
484 : mapped_length);
485 0 : pdu_write_done(pdu, -EINVAL);
486 0 : return;
487 : }
488 23 : pdu->sock_req.cb_fn = pdu_write_done;
489 23 : pdu->sock_req.cb_arg = pdu;
490 23 : tqpair->stats->submitted_requests++;
491 23 : spdk_sock_writev_async(tqpair->sock, &pdu->sock_req);
492 : }
493 :
494 : static void
495 0 : tcp_write_pdu_seq_cb(void *ctx, int status)
496 : {
497 0 : struct nvme_tcp_pdu *pdu = ctx;
498 0 : struct nvme_tcp_req *treq = pdu->req;
499 0 : struct nvme_request *req = treq->req;
500 :
501 0 : assert(treq->ordering.bits.in_progress_accel);
502 0 : treq->ordering.bits.in_progress_accel = 0;
503 :
504 0 : req->accel_sequence = NULL;
505 0 : if (spdk_unlikely(status != 0)) {
506 0 : SPDK_ERRLOG("Failed to execute accel sequence: %d\n", status);
507 0 : pdu_write_fail(pdu, status);
508 0 : return;
509 : }
510 :
511 0 : _tcp_write_pdu(pdu);
512 : }
513 :
514 : static void
515 23 : tcp_write_pdu(struct nvme_tcp_pdu *pdu)
516 : {
517 23 : struct nvme_tcp_req *treq = pdu->req;
518 23 : struct nvme_tcp_qpair *tqpair = pdu->qpair;
519 : struct nvme_tcp_poll_group *tgroup;
520 : struct nvme_request *req;
521 :
522 23 : if (spdk_likely(treq != NULL)) {
523 6 : req = treq->req;
524 6 : if (req->accel_sequence != NULL &&
525 0 : spdk_nvme_opc_get_data_transfer(req->cmd.opc) == SPDK_NVME_DATA_HOST_TO_CONTROLLER &&
526 0 : pdu->data_len > 0) {
527 0 : assert(tqpair->qpair.poll_group != NULL);
528 0 : tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
529 0 : nvme_tcp_accel_finish_sequence(tgroup, treq, req->accel_sequence,
530 : tcp_write_pdu_seq_cb, pdu);
531 0 : return;
532 : }
533 : }
534 :
535 23 : _tcp_write_pdu(pdu);
536 : }
537 :
538 : static void
539 0 : pdu_accel_compute_crc32_done(void *cb_arg, int status)
540 : {
541 0 : struct nvme_tcp_pdu *pdu = cb_arg;
542 0 : struct nvme_tcp_req *req = pdu->req;
543 :
544 0 : assert(req->ordering.bits.in_progress_accel);
545 0 : req->ordering.bits.in_progress_accel = 0;
546 :
547 0 : if (spdk_unlikely(status)) {
548 0 : SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu);
549 0 : pdu_write_fail(pdu, status);
550 0 : return;
551 : }
552 :
553 0 : pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR;
554 0 : MAKE_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32);
555 :
556 0 : _tcp_write_pdu(pdu);
557 : }
558 :
559 : static void
560 0 : pdu_accel_compute_crc32_seq_cb(void *cb_arg, int status)
561 : {
562 0 : struct nvme_tcp_pdu *pdu = cb_arg;
563 0 : struct nvme_tcp_qpair *tqpair = pdu->qpair;
564 0 : struct nvme_tcp_poll_group *tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
565 0 : struct nvme_tcp_req *treq = pdu->req;
566 0 : struct nvme_request *req = treq->req;
567 :
568 0 : assert(treq->ordering.bits.in_progress_accel);
569 0 : treq->ordering.bits.in_progress_accel = 0;
570 :
571 0 : req->accel_sequence = NULL;
572 0 : if (spdk_unlikely(status != 0)) {
573 0 : SPDK_ERRLOG("Failed to execute accel sequence: %d\n", status);
574 0 : pdu_write_fail(pdu, status);
575 0 : return;
576 : }
577 :
578 0 : nvme_tcp_accel_submit_crc32c(tgroup, pdu->req, &pdu->data_digest_crc32,
579 0 : pdu->data_iov, pdu->data_iovcnt, 0,
580 : pdu_accel_compute_crc32_done, pdu);
581 : }
582 :
583 : static void
584 0 : pdu_accel_seq_compute_crc32_done(void *cb_arg)
585 : {
586 0 : struct nvme_tcp_pdu *pdu = cb_arg;
587 :
588 0 : pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR;
589 0 : MAKE_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32);
590 0 : }
591 :
592 : static bool
593 3 : pdu_accel_compute_crc32(struct nvme_tcp_pdu *pdu)
594 : {
595 3 : struct nvme_tcp_qpair *tqpair = pdu->qpair;
596 3 : struct nvme_tcp_poll_group *tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
597 3 : struct nvme_request *req = ((struct nvme_tcp_req *)pdu->req)->req;
598 : int rc;
599 :
600 : /* Only support this limited case for the first step */
601 3 : if (spdk_unlikely(nvme_qpair_get_state(&tqpair->qpair) < NVME_QPAIR_CONNECTED ||
602 : pdu->dif_ctx != NULL ||
603 : pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT != 0)) {
604 3 : return false;
605 : }
606 :
607 0 : if (tqpair->qpair.poll_group == NULL) {
608 0 : return false;
609 : }
610 :
611 0 : if (tgroup->group.group->accel_fn_table.append_crc32c != NULL) {
612 0 : rc = nvme_tcp_accel_append_crc32c(tgroup, &req->accel_sequence,
613 : &pdu->data_digest_crc32,
614 0 : pdu->data_iov, pdu->data_iovcnt, 0,
615 : pdu_accel_seq_compute_crc32_done, pdu);
616 0 : if (spdk_unlikely(rc != 0)) {
617 : /* If accel is out of resources, fall back to non-accelerated crc32 */
618 0 : if (rc == -ENOMEM) {
619 0 : return false;
620 : }
621 :
622 0 : SPDK_ERRLOG("Failed to append crc32c operation: %d\n", rc);
623 0 : pdu_write_fail(pdu, rc);
624 0 : return true;
625 : }
626 :
627 0 : tcp_write_pdu(pdu);
628 0 : return true;
629 0 : } else if (tgroup->group.group->accel_fn_table.submit_accel_crc32c != NULL) {
630 0 : if (req->accel_sequence != NULL) {
631 0 : nvme_tcp_accel_finish_sequence(tgroup, pdu->req, req->accel_sequence,
632 : pdu_accel_compute_crc32_seq_cb, pdu);
633 : } else {
634 0 : nvme_tcp_accel_submit_crc32c(tgroup, pdu->req, &pdu->data_digest_crc32,
635 0 : pdu->data_iov, pdu->data_iovcnt, 0,
636 : pdu_accel_compute_crc32_done, pdu);
637 : }
638 :
639 0 : return true;
640 : }
641 :
642 0 : return false;
643 : }
644 :
645 : static void
646 0 : pdu_compute_crc32_seq_cb(void *cb_arg, int status)
647 : {
648 0 : struct nvme_tcp_pdu *pdu = cb_arg;
649 0 : struct nvme_tcp_req *treq = pdu->req;
650 0 : struct nvme_request *req = treq->req;
651 : uint32_t crc32c;
652 :
653 0 : assert(treq->ordering.bits.in_progress_accel);
654 0 : treq->ordering.bits.in_progress_accel = 0;
655 :
656 0 : req->accel_sequence = NULL;
657 0 : if (spdk_unlikely(status != 0)) {
658 0 : SPDK_ERRLOG("Failed to execute accel sequence: %d\n", status);
659 0 : pdu_write_fail(pdu, status);
660 0 : return;
661 : }
662 :
663 0 : crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
664 0 : crc32c = crc32c ^ SPDK_CRC32C_XOR;
665 0 : MAKE_DIGEST_WORD(pdu->data_digest, crc32c);
666 :
667 0 : _tcp_write_pdu(pdu);
668 : }
669 :
670 : static void
671 23 : pdu_compute_crc32(struct nvme_tcp_pdu *pdu)
672 : {
673 23 : struct nvme_tcp_qpair *tqpair = pdu->qpair;
674 : struct nvme_tcp_poll_group *tgroup;
675 : struct nvme_request *req;
676 : uint32_t crc32c;
677 :
678 : /* Data Digest */
679 23 : if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] &&
680 : tqpair->flags.host_ddgst_enable) {
681 3 : if (pdu_accel_compute_crc32(pdu)) {
682 0 : return;
683 : }
684 :
685 3 : req = ((struct nvme_tcp_req *)pdu->req)->req;
686 3 : if (req->accel_sequence != NULL) {
687 0 : tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
688 0 : nvme_tcp_accel_finish_sequence(tgroup, pdu->req, req->accel_sequence,
689 : pdu_compute_crc32_seq_cb, pdu);
690 0 : return;
691 : }
692 :
693 3 : crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
694 3 : crc32c = crc32c ^ SPDK_CRC32C_XOR;
695 3 : MAKE_DIGEST_WORD(pdu->data_digest, crc32c);
696 : }
697 :
698 23 : tcp_write_pdu(pdu);
699 : }
700 :
701 : static int
702 23 : nvme_tcp_qpair_write_pdu(struct nvme_tcp_qpair *tqpair,
703 : struct nvme_tcp_pdu *pdu,
704 : nvme_tcp_qpair_xfer_complete_cb cb_fn,
705 : void *cb_arg)
706 : {
707 : int hlen;
708 : uint32_t crc32c;
709 :
710 23 : hlen = pdu->hdr.common.hlen;
711 23 : pdu->cb_fn = cb_fn;
712 23 : pdu->cb_arg = cb_arg;
713 23 : pdu->qpair = tqpair;
714 :
715 : /* Header Digest */
716 23 : if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && tqpair->flags.host_hdgst_enable) {
717 3 : crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
718 3 : MAKE_DIGEST_WORD((uint8_t *)&pdu->hdr.raw[hlen], crc32c);
719 : }
720 :
721 23 : pdu_compute_crc32(pdu);
722 :
723 23 : return 0;
724 : }
725 :
726 : static int
727 27 : nvme_tcp_try_memory_translation(struct nvme_tcp_req *tcp_req, void **addr, uint32_t length)
728 : {
729 27 : struct nvme_request *req = tcp_req->req;
730 27 : struct spdk_memory_domain_translation_result translation = {
731 : .iov_count = 0,
732 : .size = sizeof(translation)
733 : };
734 : int rc;
735 :
736 27 : if (!tcp_req->ordering.bits.domain_in_use) {
737 27 : return 0;
738 : }
739 :
740 0 : rc = spdk_memory_domain_translate_data(req->payload.opts->memory_domain,
741 0 : req->payload.opts->memory_domain_ctx, spdk_memory_domain_get_system_domain(), NULL, *addr, length,
742 : &translation);
743 0 : if (spdk_unlikely(rc || translation.iov_count != 1)) {
744 0 : SPDK_ERRLOG("DMA memory translation failed, rc %d, iov_count %u\n", rc, translation.iov_count);
745 0 : return -EFAULT;
746 : }
747 :
748 0 : assert(length == translation.iov.iov_len);
749 0 : *addr = translation.iov.iov_base;
750 0 : return 0;
751 : }
752 :
753 : /*
754 : * Build SGL describing contiguous payload buffer.
755 : */
756 : static int
757 2 : nvme_tcp_build_contig_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req)
758 : {
759 2 : struct nvme_request *req = tcp_req->req;
760 :
761 : /* ubsan complains about applying zero offset to null pointer if contig_or_cb_arg is NULL,
762 : * so just double cast it to make it go away */
763 2 : void *addr = (void *)((uintptr_t)req->payload.contig_or_cb_arg + req->payload_offset);
764 2 : size_t length = req->payload_size;
765 : int rc;
766 :
767 2 : SPDK_DEBUGLOG(nvme, "enter\n");
768 :
769 2 : assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG);
770 2 : rc = nvme_tcp_try_memory_translation(tcp_req, &addr, length);
771 2 : if (spdk_unlikely(rc)) {
772 0 : return rc;
773 : }
774 :
775 2 : tcp_req->iov[0].iov_base = addr;
776 2 : tcp_req->iov[0].iov_len = length;
777 2 : tcp_req->iovcnt = 1;
778 2 : return 0;
779 : }
780 :
781 : /*
782 : * Build SGL describing scattered payload buffer.
783 : */
784 : static int
785 6 : nvme_tcp_build_sgl_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req)
786 : {
787 : int rc;
788 6 : uint32_t length, remaining_size, iovcnt = 0, max_num_sgl;
789 6 : struct nvme_request *req = tcp_req->req;
790 :
791 6 : SPDK_DEBUGLOG(nvme, "enter\n");
792 :
793 6 : assert(req->payload_size != 0);
794 6 : assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL);
795 6 : assert(req->payload.reset_sgl_fn != NULL);
796 6 : assert(req->payload.next_sge_fn != NULL);
797 6 : req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset);
798 :
799 6 : max_num_sgl = spdk_min(req->qpair->ctrlr->max_sges, NVME_TCP_MAX_SGL_DESCRIPTORS);
800 6 : remaining_size = req->payload_size;
801 :
802 : do {
803 25 : void *addr;
804 :
805 25 : rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &addr, &length);
806 25 : if (rc) {
807 0 : return -1;
808 : }
809 :
810 25 : rc = nvme_tcp_try_memory_translation(tcp_req, &addr, length);
811 25 : if (spdk_unlikely(rc)) {
812 0 : return rc;
813 : }
814 :
815 25 : length = spdk_min(length, remaining_size);
816 25 : tcp_req->iov[iovcnt].iov_base = addr;
817 25 : tcp_req->iov[iovcnt].iov_len = length;
818 25 : remaining_size -= length;
819 25 : iovcnt++;
820 25 : } while (remaining_size > 0 && iovcnt < max_num_sgl);
821 :
822 :
823 : /* Should be impossible if we did our sgl checks properly up the stack, but do a sanity check here. */
824 6 : if (remaining_size > 0) {
825 2 : SPDK_ERRLOG("Failed to construct tcp_req=%p, and the iovcnt=%u, remaining_size=%u\n",
826 : tcp_req, iovcnt, remaining_size);
827 2 : return -1;
828 : }
829 :
830 4 : tcp_req->iovcnt = iovcnt;
831 :
832 4 : return 0;
833 : }
834 :
835 : static int
836 5 : nvme_tcp_req_init(struct nvme_tcp_qpair *tqpair, struct nvme_request *req,
837 : struct nvme_tcp_req *tcp_req)
838 : {
839 5 : struct spdk_nvme_ctrlr *ctrlr = tqpair->qpair.ctrlr;
840 5 : int rc = 0;
841 : enum spdk_nvme_data_transfer xfer;
842 : uint32_t max_in_capsule_data_size;
843 :
844 5 : tcp_req->req = req;
845 5 : tcp_req->ordering.bits.domain_in_use = (req->payload.opts && req->payload.opts->memory_domain);
846 :
847 5 : req->cmd.cid = tcp_req->cid;
848 5 : req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG;
849 5 : req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK;
850 5 : req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_TRANSPORT;
851 5 : req->cmd.dptr.sgl1.unkeyed.length = req->payload_size;
852 :
853 5 : if (spdk_unlikely(req->cmd.opc == SPDK_NVME_OPC_FABRIC)) {
854 0 : struct spdk_nvmf_capsule_cmd *nvmf_cmd = (struct spdk_nvmf_capsule_cmd *)&req->cmd;
855 :
856 0 : xfer = spdk_nvme_opc_get_data_transfer(nvmf_cmd->fctype);
857 : } else {
858 5 : xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc);
859 : }
860 :
861 : /* For c2h delay filling in the iov until the data arrives.
862 : * For h2c some delay is also possible if data doesn't fit into cmd capsule (not implemented). */
863 5 : if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG) {
864 2 : if (xfer != SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
865 2 : rc = nvme_tcp_build_contig_request(tqpair, tcp_req);
866 : }
867 3 : } else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL) {
868 3 : if (xfer != SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
869 3 : rc = nvme_tcp_build_sgl_request(tqpair, tcp_req);
870 : }
871 : } else {
872 0 : rc = -1;
873 : }
874 :
875 5 : if (rc) {
876 1 : return rc;
877 : }
878 :
879 4 : if (xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
880 3 : max_in_capsule_data_size = ctrlr->ioccsz_bytes;
881 3 : if (spdk_unlikely((req->cmd.opc == SPDK_NVME_OPC_FABRIC) ||
882 : nvme_qpair_is_admin_queue(&tqpair->qpair))) {
883 3 : max_in_capsule_data_size = SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE;
884 : }
885 :
886 3 : if (req->payload_size <= max_in_capsule_data_size) {
887 3 : req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK;
888 3 : req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET;
889 3 : req->cmd.dptr.sgl1.address = 0;
890 3 : tcp_req->in_capsule_data = true;
891 : }
892 : }
893 :
894 4 : return 0;
895 : }
896 :
897 : static inline bool
898 8 : nvme_tcp_req_complete_safe(struct nvme_tcp_req *tcp_req)
899 : {
900 8 : if (!(tcp_req->ordering.bits.send_ack && tcp_req->ordering.bits.data_recv &&
901 7 : !tcp_req->ordering.bits.in_progress_accel)) {
902 1 : return false;
903 : }
904 :
905 7 : assert(tcp_req->state == NVME_TCP_REQ_ACTIVE);
906 7 : assert(tcp_req->tqpair != NULL);
907 7 : assert(tcp_req->req != NULL);
908 :
909 7 : SPDK_DEBUGLOG(nvme, "complete tcp_req(%p) on tqpair=%p\n", tcp_req, tcp_req->tqpair);
910 :
911 7 : if (!tcp_req->tqpair->qpair.in_completion_context) {
912 6 : tcp_req->tqpair->async_complete++;
913 : }
914 :
915 7 : nvme_tcp_req_complete(tcp_req, tcp_req->tqpair, &tcp_req->rsp, true);
916 7 : return true;
917 : }
918 :
919 : static void
920 0 : nvme_tcp_qpair_cmd_send_complete(void *cb_arg)
921 : {
922 0 : struct nvme_tcp_req *tcp_req = cb_arg;
923 :
924 0 : SPDK_DEBUGLOG(nvme, "tcp req %p, cid %u, qid %u\n", tcp_req, tcp_req->cid,
925 : tcp_req->tqpair->qpair.id);
926 0 : tcp_req->ordering.bits.send_ack = 1;
927 : /* Handle the r2t case */
928 0 : if (spdk_unlikely(tcp_req->ordering.bits.h2c_send_waiting_ack)) {
929 0 : SPDK_DEBUGLOG(nvme, "tcp req %p, send H2C data\n", tcp_req);
930 0 : nvme_tcp_send_h2c_data(tcp_req);
931 : } else {
932 0 : if (tcp_req->in_capsule_data && tcp_req->ordering.bits.domain_in_use) {
933 0 : spdk_memory_domain_invalidate_data(tcp_req->req->payload.opts->memory_domain,
934 0 : tcp_req->req->payload.opts->memory_domain_ctx, tcp_req->iov, tcp_req->iovcnt);
935 : }
936 :
937 0 : nvme_tcp_req_complete_safe(tcp_req);
938 : }
939 0 : }
940 :
941 : static int
942 4 : nvme_tcp_qpair_capsule_cmd_send(struct nvme_tcp_qpair *tqpair,
943 : struct nvme_tcp_req *tcp_req)
944 : {
945 : struct nvme_tcp_pdu *pdu;
946 : struct spdk_nvme_tcp_cmd *capsule_cmd;
947 4 : uint32_t plen = 0, alignment;
948 : uint8_t pdo;
949 :
950 4 : SPDK_DEBUGLOG(nvme, "enter\n");
951 4 : pdu = tcp_req->pdu;
952 4 : pdu->req = tcp_req;
953 :
954 4 : capsule_cmd = &pdu->hdr.capsule_cmd;
955 4 : capsule_cmd->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD;
956 4 : plen = capsule_cmd->common.hlen = sizeof(*capsule_cmd);
957 4 : capsule_cmd->ccsqe = tcp_req->req->cmd;
958 :
959 4 : SPDK_DEBUGLOG(nvme, "capsule_cmd cid=%u on tqpair(%p)\n", tcp_req->req->cmd.cid, tqpair);
960 :
961 4 : if (tqpair->flags.host_hdgst_enable) {
962 2 : SPDK_DEBUGLOG(nvme, "Header digest is enabled for capsule command on tcp_req=%p\n",
963 : tcp_req);
964 2 : capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
965 2 : plen += SPDK_NVME_TCP_DIGEST_LEN;
966 : }
967 :
968 4 : if ((tcp_req->req->payload_size == 0) || !tcp_req->in_capsule_data) {
969 0 : goto end;
970 : }
971 :
972 4 : pdo = plen;
973 4 : pdu->padding_len = 0;
974 4 : if (tqpair->cpda) {
975 1 : alignment = (tqpair->cpda + 1) << 2;
976 1 : if (alignment > plen) {
977 1 : pdu->padding_len = alignment - plen;
978 1 : pdo = alignment;
979 1 : plen = alignment;
980 : }
981 : }
982 :
983 4 : capsule_cmd->common.pdo = pdo;
984 4 : plen += tcp_req->req->payload_size;
985 4 : if (tqpair->flags.host_ddgst_enable) {
986 2 : capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF;
987 2 : plen += SPDK_NVME_TCP_DIGEST_LEN;
988 : }
989 :
990 4 : tcp_req->datao = 0;
991 4 : nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt,
992 4 : 0, tcp_req->req->payload_size);
993 4 : end:
994 4 : capsule_cmd->common.plen = plen;
995 4 : return nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_qpair_cmd_send_complete, tcp_req);
996 :
997 : }
998 :
999 : static int
1000 3 : nvme_tcp_qpair_submit_request(struct spdk_nvme_qpair *qpair,
1001 : struct nvme_request *req)
1002 : {
1003 : struct nvme_tcp_qpair *tqpair;
1004 : struct nvme_tcp_req *tcp_req;
1005 :
1006 3 : tqpair = nvme_tcp_qpair(qpair);
1007 3 : assert(tqpair != NULL);
1008 3 : assert(req != NULL);
1009 :
1010 3 : tcp_req = nvme_tcp_req_get(tqpair);
1011 3 : if (!tcp_req) {
1012 1 : tqpair->stats->queued_requests++;
1013 : /* Inform the upper layer to try again later. */
1014 1 : return -EAGAIN;
1015 : }
1016 :
1017 2 : if (spdk_unlikely(nvme_tcp_req_init(tqpair, req, tcp_req))) {
1018 1 : SPDK_ERRLOG("nvme_tcp_req_init() failed\n");
1019 1 : nvme_tcp_req_put(tqpair, tcp_req);
1020 1 : return -1;
1021 : }
1022 :
1023 1 : tqpair->qpair.queue_depth++;
1024 1 : spdk_trace_record(TRACE_NVME_TCP_SUBMIT, qpair->id, 0, (uintptr_t)req, req->cb_arg,
1025 : (uint32_t)req->cmd.cid, (uint32_t)req->cmd.opc,
1026 : req->cmd.cdw10, req->cmd.cdw11, req->cmd.cdw12, tqpair->qpair.queue_depth);
1027 1 : TAILQ_INSERT_TAIL(&tqpair->outstanding_reqs, tcp_req, link);
1028 1 : return nvme_tcp_qpair_capsule_cmd_send(tqpair, tcp_req);
1029 : }
1030 :
1031 : static int
1032 0 : nvme_tcp_qpair_reset(struct spdk_nvme_qpair *qpair)
1033 : {
1034 0 : return 0;
1035 : }
1036 :
1037 : static void
1038 9 : nvme_tcp_req_complete(struct nvme_tcp_req *tcp_req,
1039 : struct nvme_tcp_qpair *tqpair,
1040 : struct spdk_nvme_cpl *rsp,
1041 : bool print_on_error)
1042 : {
1043 9 : struct spdk_nvme_cpl cpl;
1044 : struct spdk_nvme_qpair *qpair;
1045 : struct nvme_request *req;
1046 : bool print_error;
1047 :
1048 9 : assert(tcp_req->req != NULL);
1049 9 : req = tcp_req->req;
1050 9 : qpair = req->qpair;
1051 :
1052 : /* Cache arguments to be passed to nvme_complete_request since tcp_req can be zeroed when released */
1053 9 : memcpy(&cpl, rsp, sizeof(cpl));
1054 :
1055 9 : if (spdk_unlikely(spdk_nvme_cpl_is_error(rsp))) {
1056 3 : print_error = print_on_error && !qpair->ctrlr->opts.disable_error_logging;
1057 :
1058 3 : if (print_error) {
1059 3 : spdk_nvme_qpair_print_command(qpair, &req->cmd);
1060 : }
1061 :
1062 3 : if (print_error || SPDK_DEBUGLOG_FLAG_ENABLED("nvme")) {
1063 3 : spdk_nvme_qpair_print_completion(qpair, rsp);
1064 : }
1065 : }
1066 :
1067 9 : tqpair->qpair.queue_depth--;
1068 9 : spdk_trace_record(TRACE_NVME_TCP_COMPLETE, qpair->id, 0, (uintptr_t)req, req->cb_arg,
1069 : (uint32_t)req->cmd.cid, (uint32_t)cpl.status_raw, tqpair->qpair.queue_depth);
1070 9 : TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link);
1071 9 : nvme_tcp_req_put(tqpair, tcp_req);
1072 9 : nvme_complete_request(req->cb_fn, req->cb_arg, req->qpair, req, &cpl);
1073 9 : }
1074 :
1075 : static void
1076 9 : nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr)
1077 : {
1078 : struct nvme_tcp_req *tcp_req, *tmp;
1079 9 : struct spdk_nvme_cpl cpl = {};
1080 9 : struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
1081 :
1082 9 : cpl.sqid = qpair->id;
1083 9 : cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION;
1084 9 : cpl.status.sct = SPDK_NVME_SCT_GENERIC;
1085 9 : cpl.status.dnr = dnr;
1086 :
1087 13 : TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) {
1088 : /* We cannot abort requests with accel operations in progress */
1089 4 : if (tcp_req->ordering.bits.in_progress_accel) {
1090 2 : continue;
1091 : }
1092 :
1093 2 : nvme_tcp_req_complete(tcp_req, tqpair, &cpl, true);
1094 : }
1095 9 : }
1096 :
1097 : static void
1098 0 : nvme_tcp_qpair_send_h2c_term_req_complete(void *cb_arg)
1099 : {
1100 0 : struct nvme_tcp_qpair *tqpair = cb_arg;
1101 :
1102 0 : tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
1103 0 : }
1104 :
1105 : static void
1106 15 : nvme_tcp_qpair_send_h2c_term_req(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu,
1107 : enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset)
1108 : {
1109 : struct nvme_tcp_pdu *rsp_pdu;
1110 : struct spdk_nvme_tcp_term_req_hdr *h2c_term_req;
1111 15 : uint32_t h2c_term_req_hdr_len = sizeof(*h2c_term_req);
1112 : uint8_t copy_len;
1113 :
1114 15 : rsp_pdu = tqpair->send_pdu;
1115 15 : memset(rsp_pdu, 0, sizeof(*rsp_pdu));
1116 15 : h2c_term_req = &rsp_pdu->hdr.term_req;
1117 15 : h2c_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ;
1118 15 : h2c_term_req->common.hlen = h2c_term_req_hdr_len;
1119 :
1120 15 : if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
1121 : (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
1122 13 : DSET32(&h2c_term_req->fei, error_offset);
1123 : }
1124 :
1125 15 : copy_len = pdu->hdr.common.hlen;
1126 15 : if (copy_len > SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) {
1127 1 : copy_len = SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE;
1128 : }
1129 :
1130 : /* Copy the error info into the buffer */
1131 15 : memcpy((uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, pdu->hdr.raw, copy_len);
1132 15 : nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, copy_len);
1133 :
1134 : /* Contain the header len of the wrong received pdu */
1135 15 : h2c_term_req->common.plen = h2c_term_req->common.hlen + copy_len;
1136 15 : nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
1137 15 : nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_send_h2c_term_req_complete, tqpair);
1138 15 : }
1139 :
1140 : static bool
1141 6 : nvme_tcp_qpair_recv_state_valid(struct nvme_tcp_qpair *tqpair)
1142 : {
1143 6 : switch (tqpair->state) {
1144 5 : case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND:
1145 : case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL:
1146 : case NVME_TCP_QPAIR_STATE_RUNNING:
1147 5 : return true;
1148 1 : default:
1149 1 : return false;
1150 : }
1151 : }
1152 :
1153 : static void
1154 11 : nvme_tcp_pdu_ch_handle(struct nvme_tcp_qpair *tqpair)
1155 : {
1156 : struct nvme_tcp_pdu *pdu;
1157 11 : uint32_t error_offset = 0;
1158 : enum spdk_nvme_tcp_term_req_fes fes;
1159 11 : uint32_t expected_hlen, hd_len = 0;
1160 11 : bool plen_error = false;
1161 :
1162 11 : pdu = tqpair->recv_pdu;
1163 :
1164 11 : SPDK_DEBUGLOG(nvme, "pdu type = %d\n", pdu->hdr.common.pdu_type);
1165 11 : if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP) {
1166 5 : if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) {
1167 1 : SPDK_ERRLOG("Already received IC_RESP PDU, and we should reject this pdu=%p\n", pdu);
1168 1 : fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
1169 1 : goto err;
1170 : }
1171 4 : expected_hlen = sizeof(struct spdk_nvme_tcp_ic_resp);
1172 4 : if (pdu->hdr.common.plen != expected_hlen) {
1173 1 : plen_error = true;
1174 : }
1175 : } else {
1176 6 : if (spdk_unlikely(!nvme_tcp_qpair_recv_state_valid(tqpair))) {
1177 1 : SPDK_ERRLOG("The TCP/IP tqpair connection is not negotiated\n");
1178 1 : fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
1179 1 : goto err;
1180 : }
1181 :
1182 5 : switch (pdu->hdr.common.pdu_type) {
1183 1 : case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP:
1184 1 : expected_hlen = sizeof(struct spdk_nvme_tcp_rsp);
1185 1 : if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) {
1186 1 : hd_len = SPDK_NVME_TCP_DIGEST_LEN;
1187 : }
1188 :
1189 1 : if (pdu->hdr.common.plen != (expected_hlen + hd_len)) {
1190 1 : plen_error = true;
1191 : }
1192 1 : break;
1193 1 : case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA:
1194 1 : expected_hlen = sizeof(struct spdk_nvme_tcp_c2h_data_hdr);
1195 1 : if (pdu->hdr.common.plen < pdu->hdr.common.pdo) {
1196 1 : plen_error = true;
1197 : }
1198 1 : break;
1199 1 : case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ:
1200 1 : expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr);
1201 1 : if ((pdu->hdr.common.plen <= expected_hlen) ||
1202 0 : (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) {
1203 1 : plen_error = true;
1204 : }
1205 1 : break;
1206 1 : case SPDK_NVME_TCP_PDU_TYPE_R2T:
1207 1 : expected_hlen = sizeof(struct spdk_nvme_tcp_r2t_hdr);
1208 1 : if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) {
1209 1 : hd_len = SPDK_NVME_TCP_DIGEST_LEN;
1210 : }
1211 :
1212 1 : if (pdu->hdr.common.plen != (expected_hlen + hd_len)) {
1213 1 : plen_error = true;
1214 : }
1215 1 : break;
1216 :
1217 1 : default:
1218 1 : SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu->hdr.common.pdu_type);
1219 1 : fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1220 1 : error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type);
1221 1 : goto err;
1222 : }
1223 : }
1224 :
1225 8 : if (pdu->hdr.common.hlen != expected_hlen) {
1226 1 : SPDK_ERRLOG("Expected PDU header length %u, got %u\n",
1227 : expected_hlen, pdu->hdr.common.hlen);
1228 1 : fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1229 1 : error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen);
1230 1 : goto err;
1231 :
1232 7 : } else if (plen_error) {
1233 5 : fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1234 5 : error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen);
1235 5 : goto err;
1236 : } else {
1237 2 : nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
1238 2 : nvme_tcp_pdu_calc_psh_len(tqpair->recv_pdu, tqpair->flags.host_hdgst_enable);
1239 2 : return;
1240 : }
1241 9 : err:
1242 9 : nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
1243 : }
1244 :
1245 : static struct nvme_tcp_req *
1246 2 : get_nvme_active_req_by_cid(struct nvme_tcp_qpair *tqpair, uint32_t cid)
1247 : {
1248 2 : assert(tqpair != NULL);
1249 2 : if ((cid >= tqpair->num_entries) || (tqpair->tcp_reqs[cid].state == NVME_TCP_REQ_FREE)) {
1250 1 : return NULL;
1251 : }
1252 :
1253 1 : return &tqpair->tcp_reqs[cid];
1254 : }
1255 :
1256 : static void
1257 0 : nvme_tcp_recv_payload_seq_cb(void *cb_arg, int status)
1258 : {
1259 0 : struct nvme_tcp_req *treq = cb_arg;
1260 0 : struct nvme_request *req = treq->req;
1261 0 : struct nvme_tcp_qpair *tqpair = treq->tqpair;
1262 : struct nvme_tcp_poll_group *group;
1263 :
1264 0 : assert(treq->ordering.bits.in_progress_accel);
1265 0 : treq->ordering.bits.in_progress_accel = 0;
1266 :
1267 : /* We need to force poll the qpair to make sure any queued requests will be resubmitted, see
1268 : * comment in pdu_write_done(). */
1269 0 : if (tqpair->qpair.poll_group && !tqpair->needs_poll && !STAILQ_EMPTY(&tqpair->qpair.queued_req)) {
1270 0 : group = nvme_tcp_poll_group(tqpair->qpair.poll_group);
1271 0 : TAILQ_INSERT_TAIL(&group->needs_poll, tqpair, link);
1272 0 : tqpair->needs_poll = true;
1273 : }
1274 :
1275 0 : req->accel_sequence = NULL;
1276 0 : if (spdk_unlikely(status != 0)) {
1277 0 : SPDK_ERRLOG("Failed to execute accel sequence: %d\n", status);
1278 0 : treq->rsp.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
1279 : }
1280 :
1281 0 : nvme_tcp_req_complete_safe(treq);
1282 0 : }
1283 :
1284 : static void
1285 4 : nvme_tcp_c2h_data_payload_handle(struct nvme_tcp_qpair *tqpair,
1286 : struct nvme_tcp_pdu *pdu, uint32_t *reaped)
1287 : {
1288 : struct nvme_tcp_req *tcp_req;
1289 : struct nvme_tcp_poll_group *tgroup;
1290 : struct spdk_nvme_tcp_c2h_data_hdr *c2h_data;
1291 : uint8_t flags;
1292 :
1293 4 : tcp_req = pdu->req;
1294 4 : assert(tcp_req != NULL);
1295 :
1296 4 : SPDK_DEBUGLOG(nvme, "enter\n");
1297 4 : c2h_data = &pdu->hdr.c2h_data;
1298 4 : tcp_req->datao += pdu->data_len;
1299 4 : flags = c2h_data->common.flags;
1300 :
1301 4 : if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU) {
1302 4 : if (tcp_req->datao == tcp_req->req->payload_size) {
1303 2 : tcp_req->rsp.status.p = 0;
1304 : } else {
1305 2 : tcp_req->rsp.status.p = 1;
1306 : }
1307 :
1308 4 : tcp_req->rsp.cid = tcp_req->cid;
1309 4 : tcp_req->rsp.sqid = tqpair->qpair.id;
1310 4 : if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) {
1311 3 : tcp_req->ordering.bits.data_recv = 1;
1312 3 : if (tcp_req->req->accel_sequence != NULL) {
1313 0 : tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
1314 0 : nvme_tcp_accel_reverse_sequence(tgroup, tcp_req->req->accel_sequence);
1315 0 : nvme_tcp_accel_finish_sequence(tgroup, tcp_req,
1316 0 : tcp_req->req->accel_sequence,
1317 : nvme_tcp_recv_payload_seq_cb,
1318 : tcp_req);
1319 0 : return;
1320 : }
1321 :
1322 3 : if (nvme_tcp_req_complete_safe(tcp_req)) {
1323 3 : (*reaped)++;
1324 : }
1325 : }
1326 : }
1327 : }
1328 :
1329 : static const char *spdk_nvme_tcp_term_req_fes_str[] = {
1330 : "Invalid PDU Header Field",
1331 : "PDU Sequence Error",
1332 : "Header Digest Error",
1333 : "Data Transfer Out of Range",
1334 : "Data Transfer Limit Exceeded",
1335 : "Unsupported parameter",
1336 : };
1337 :
1338 : static void
1339 2 : nvme_tcp_c2h_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *c2h_term_req)
1340 : {
1341 2 : SPDK_ERRLOG("Error info of pdu(%p): %s\n", c2h_term_req,
1342 : spdk_nvme_tcp_term_req_fes_str[c2h_term_req->fes]);
1343 2 : if ((c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
1344 0 : (c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
1345 2 : SPDK_DEBUGLOG(nvme, "The offset from the start of the PDU header is %u\n",
1346 : DGET32(c2h_term_req->fei));
1347 : }
1348 : /* we may also need to dump some other info here */
1349 2 : }
1350 :
1351 : static void
1352 2 : nvme_tcp_c2h_term_req_payload_handle(struct nvme_tcp_qpair *tqpair,
1353 : struct nvme_tcp_pdu *pdu)
1354 : {
1355 2 : nvme_tcp_c2h_term_req_dump(&pdu->hdr.term_req);
1356 2 : nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
1357 2 : }
1358 :
1359 : static void
1360 2 : _nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped)
1361 : {
1362 : struct nvme_tcp_pdu *pdu;
1363 :
1364 2 : assert(tqpair != NULL);
1365 2 : pdu = tqpair->recv_pdu;
1366 :
1367 2 : switch (pdu->hdr.common.pdu_type) {
1368 1 : case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA:
1369 1 : nvme_tcp_c2h_data_payload_handle(tqpair, pdu, reaped);
1370 1 : nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1371 1 : break;
1372 :
1373 1 : case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ:
1374 1 : nvme_tcp_c2h_term_req_payload_handle(tqpair, pdu);
1375 1 : break;
1376 :
1377 0 : default:
1378 : /* The code should not go to here */
1379 0 : SPDK_ERRLOG("The code should not go to here\n");
1380 0 : break;
1381 : }
1382 2 : }
1383 :
1384 : static void
1385 0 : nvme_tcp_accel_recv_compute_crc32_done(void *cb_arg, int status)
1386 : {
1387 0 : struct nvme_tcp_req *tcp_req = cb_arg;
1388 : struct nvme_tcp_pdu *pdu;
1389 : struct nvme_tcp_qpair *tqpair;
1390 : int rc;
1391 : struct nvme_tcp_poll_group *pgroup;
1392 0 : int dummy_reaped = 0;
1393 :
1394 0 : pdu = tcp_req->pdu;
1395 0 : assert(pdu != NULL);
1396 :
1397 0 : tqpair = tcp_req->tqpair;
1398 0 : assert(tqpair != NULL);
1399 :
1400 0 : assert(tcp_req->ordering.bits.in_progress_accel);
1401 0 : tcp_req->ordering.bits.in_progress_accel = 0;
1402 :
1403 : /* We need to force poll the qpair to make sure any queued requests will be resubmitted, see
1404 : * comment in pdu_write_done(). */
1405 0 : if (tqpair->qpair.poll_group && !tqpair->needs_poll && !STAILQ_EMPTY(&tqpair->qpair.queued_req)) {
1406 0 : pgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
1407 0 : TAILQ_INSERT_TAIL(&pgroup->needs_poll, tqpair, link);
1408 0 : tqpair->needs_poll = true;
1409 : }
1410 :
1411 0 : if (spdk_unlikely(status)) {
1412 0 : SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu);
1413 0 : tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR;
1414 0 : goto end;
1415 : }
1416 :
1417 0 : pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR;
1418 0 : rc = MATCH_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32);
1419 0 : if (rc == 0) {
1420 0 : SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
1421 0 : tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR;
1422 : }
1423 :
1424 0 : end:
1425 0 : nvme_tcp_c2h_data_payload_handle(tqpair, tcp_req->pdu, &dummy_reaped);
1426 0 : }
1427 :
1428 : static void
1429 0 : nvme_tcp_req_copy_pdu(struct nvme_tcp_req *treq, struct nvme_tcp_pdu *pdu)
1430 : {
1431 0 : treq->pdu->hdr = pdu->hdr;
1432 0 : treq->pdu->req = treq;
1433 0 : memcpy(treq->pdu->data_digest, pdu->data_digest, sizeof(pdu->data_digest));
1434 0 : memcpy(treq->pdu->data_iov, pdu->data_iov, sizeof(pdu->data_iov[0]) * pdu->data_iovcnt);
1435 0 : treq->pdu->data_iovcnt = pdu->data_iovcnt;
1436 0 : treq->pdu->data_len = pdu->data_len;
1437 0 : }
1438 :
1439 : static void
1440 0 : nvme_tcp_accel_seq_recv_compute_crc32_done(void *cb_arg)
1441 : {
1442 0 : struct nvme_tcp_req *treq = cb_arg;
1443 0 : struct nvme_tcp_qpair *tqpair = treq->tqpair;
1444 0 : struct nvme_tcp_pdu *pdu = treq->pdu;
1445 : bool result;
1446 :
1447 0 : pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR;
1448 0 : result = MATCH_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32);
1449 0 : if (spdk_unlikely(!result)) {
1450 0 : SPDK_ERRLOG("data digest error on tqpair=(%p)\n", tqpair);
1451 0 : treq->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR;
1452 : }
1453 0 : }
1454 :
1455 : static bool
1456 0 : nvme_tcp_accel_recv_compute_crc32(struct nvme_tcp_req *treq, struct nvme_tcp_pdu *pdu)
1457 : {
1458 0 : struct nvme_tcp_qpair *tqpair = treq->tqpair;
1459 0 : struct nvme_tcp_poll_group *tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
1460 0 : struct nvme_request *req = treq->req;
1461 0 : int rc, dummy = 0;
1462 :
1463 : /* Only support this limited case that the request has only one c2h pdu */
1464 0 : if (spdk_unlikely(nvme_qpair_get_state(&tqpair->qpair) < NVME_QPAIR_CONNECTED ||
1465 : tqpair->qpair.poll_group == NULL || pdu->dif_ctx != NULL ||
1466 : pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT != 0 ||
1467 : pdu->data_len != req->payload_size)) {
1468 0 : return false;
1469 : }
1470 :
1471 0 : if (tgroup->group.group->accel_fn_table.append_crc32c != NULL) {
1472 0 : nvme_tcp_req_copy_pdu(treq, pdu);
1473 0 : rc = nvme_tcp_accel_append_crc32c(tgroup, &req->accel_sequence,
1474 0 : &treq->pdu->data_digest_crc32,
1475 0 : treq->pdu->data_iov, treq->pdu->data_iovcnt, 0,
1476 : nvme_tcp_accel_seq_recv_compute_crc32_done, treq);
1477 0 : if (spdk_unlikely(rc != 0)) {
1478 : /* If accel is out of resources, fall back to non-accelerated crc32 */
1479 0 : if (rc == -ENOMEM) {
1480 0 : return false;
1481 : }
1482 :
1483 0 : SPDK_ERRLOG("Failed to append crc32c operation: %d\n", rc);
1484 0 : treq->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR;
1485 : }
1486 :
1487 0 : nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1488 0 : nvme_tcp_c2h_data_payload_handle(tqpair, treq->pdu, &dummy);
1489 0 : return true;
1490 0 : } else if (tgroup->group.group->accel_fn_table.submit_accel_crc32c != NULL) {
1491 0 : nvme_tcp_req_copy_pdu(treq, pdu);
1492 0 : nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1493 0 : nvme_tcp_accel_submit_crc32c(tgroup, treq, &treq->pdu->data_digest_crc32,
1494 0 : treq->pdu->data_iov, treq->pdu->data_iovcnt, 0,
1495 : nvme_tcp_accel_recv_compute_crc32_done, treq);
1496 0 : return true;
1497 : }
1498 :
1499 0 : return false;
1500 : }
1501 :
1502 : static void
1503 2 : nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair,
1504 : uint32_t *reaped)
1505 : {
1506 2 : int rc = 0;
1507 2 : struct nvme_tcp_pdu *pdu = tqpair->recv_pdu;
1508 : uint32_t crc32c;
1509 2 : struct nvme_tcp_req *tcp_req = pdu->req;
1510 :
1511 2 : assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1512 2 : SPDK_DEBUGLOG(nvme, "enter\n");
1513 :
1514 : /* The request can be NULL, e.g. in case of C2HTermReq */
1515 2 : if (spdk_likely(tcp_req != NULL)) {
1516 2 : tcp_req->expected_datao += pdu->data_len;
1517 : }
1518 :
1519 : /* check data digest if need */
1520 2 : if (pdu->ddgst_enable) {
1521 : /* But if the data digest is enabled, tcp_req cannot be NULL */
1522 0 : assert(tcp_req != NULL);
1523 0 : if (nvme_tcp_accel_recv_compute_crc32(tcp_req, pdu)) {
1524 0 : return;
1525 : }
1526 :
1527 0 : crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
1528 0 : crc32c = crc32c ^ SPDK_CRC32C_XOR;
1529 0 : rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c);
1530 0 : if (rc == 0) {
1531 0 : SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
1532 0 : tcp_req = pdu->req;
1533 0 : assert(tcp_req != NULL);
1534 0 : tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR;
1535 : }
1536 : }
1537 :
1538 2 : _nvme_tcp_pdu_payload_handle(tqpair, reaped);
1539 : }
1540 :
1541 : static void
1542 0 : nvme_tcp_send_icreq_complete(void *cb_arg)
1543 : {
1544 0 : struct nvme_tcp_qpair *tqpair = cb_arg;
1545 :
1546 0 : SPDK_DEBUGLOG(nvme, "Complete the icreq send for tqpair=%p %u\n", tqpair, tqpair->qpair.id);
1547 :
1548 0 : tqpair->flags.icreq_send_ack = true;
1549 :
1550 0 : if (tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING) {
1551 0 : SPDK_DEBUGLOG(nvme, "tqpair %p %u, finalize icresp\n", tqpair, tqpair->qpair.id);
1552 0 : tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND;
1553 : }
1554 0 : }
1555 :
1556 : static void
1557 6 : nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair,
1558 : struct nvme_tcp_pdu *pdu)
1559 : {
1560 6 : struct spdk_nvme_tcp_ic_resp *ic_resp = &pdu->hdr.ic_resp;
1561 6 : uint32_t error_offset = 0;
1562 : enum spdk_nvme_tcp_term_req_fes fes;
1563 : int recv_buf_size;
1564 :
1565 : /* Only PFV 0 is defined currently */
1566 6 : if (ic_resp->pfv != 0) {
1567 1 : SPDK_ERRLOG("Expected ICResp PFV %u, got %u\n", 0u, ic_resp->pfv);
1568 1 : fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1569 1 : error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, pfv);
1570 1 : goto end;
1571 : }
1572 :
1573 5 : if (ic_resp->maxh2cdata < NVME_TCP_PDU_H2C_MIN_DATA_SIZE) {
1574 1 : SPDK_ERRLOG("Expected ICResp maxh2cdata >=%u, got %u\n", NVME_TCP_PDU_H2C_MIN_DATA_SIZE,
1575 : ic_resp->maxh2cdata);
1576 1 : fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1577 1 : error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, maxh2cdata);
1578 1 : goto end;
1579 : }
1580 4 : tqpair->maxh2cdata = ic_resp->maxh2cdata;
1581 :
1582 4 : if (ic_resp->cpda > SPDK_NVME_TCP_CPDA_MAX) {
1583 1 : SPDK_ERRLOG("Expected ICResp cpda <=%u, got %u\n", SPDK_NVME_TCP_CPDA_MAX, ic_resp->cpda);
1584 1 : fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1585 1 : error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, cpda);
1586 1 : goto end;
1587 : }
1588 3 : tqpair->cpda = ic_resp->cpda;
1589 :
1590 3 : tqpair->flags.host_hdgst_enable = ic_resp->dgst.bits.hdgst_enable ? true : false;
1591 3 : tqpair->flags.host_ddgst_enable = ic_resp->dgst.bits.ddgst_enable ? true : false;
1592 3 : SPDK_DEBUGLOG(nvme, "host_hdgst_enable: %u\n", tqpair->flags.host_hdgst_enable);
1593 3 : SPDK_DEBUGLOG(nvme, "host_ddgst_enable: %u\n", tqpair->flags.host_ddgst_enable);
1594 :
1595 : /* Now that we know whether digests are enabled, properly size the receive buffer to
1596 : * handle several incoming 4K read commands according to SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR
1597 : * parameter. */
1598 3 : recv_buf_size = 0x1000 + sizeof(struct spdk_nvme_tcp_c2h_data_hdr);
1599 :
1600 3 : if (tqpair->flags.host_hdgst_enable) {
1601 2 : recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN;
1602 : }
1603 :
1604 3 : if (tqpair->flags.host_ddgst_enable) {
1605 2 : recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN;
1606 : }
1607 :
1608 3 : if (spdk_sock_set_recvbuf(tqpair->sock, recv_buf_size * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR) < 0) {
1609 0 : SPDK_WARNLOG("Unable to allocate enough memory for receive buffer on tqpair=%p with size=%d\n",
1610 : tqpair,
1611 : recv_buf_size);
1612 : /* Not fatal. */
1613 : }
1614 :
1615 3 : nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1616 :
1617 3 : if (!tqpair->flags.icreq_send_ack) {
1618 1 : tqpair->state = NVME_TCP_QPAIR_STATE_INITIALIZING;
1619 1 : SPDK_DEBUGLOG(nvme, "tqpair %p %u, waiting icreq ack\n", tqpair, tqpair->qpair.id);
1620 1 : return;
1621 : }
1622 :
1623 2 : tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND;
1624 2 : return;
1625 3 : end:
1626 3 : nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
1627 : }
1628 :
1629 : static void
1630 2 : nvme_tcp_capsule_resp_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu,
1631 : uint32_t *reaped)
1632 : {
1633 : struct nvme_tcp_req *tcp_req;
1634 : struct nvme_tcp_poll_group *tgroup;
1635 2 : struct spdk_nvme_tcp_rsp *capsule_resp = &pdu->hdr.capsule_resp;
1636 2 : uint32_t cid, error_offset = 0;
1637 : enum spdk_nvme_tcp_term_req_fes fes;
1638 :
1639 2 : SPDK_DEBUGLOG(nvme, "enter\n");
1640 2 : cid = capsule_resp->rccqe.cid;
1641 2 : tcp_req = get_nvme_active_req_by_cid(tqpair, cid);
1642 :
1643 2 : if (!tcp_req) {
1644 1 : SPDK_ERRLOG("no tcp_req is found with cid=%u for tqpair=%p\n", cid, tqpair);
1645 1 : fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1646 1 : error_offset = offsetof(struct spdk_nvme_tcp_rsp, rccqe);
1647 1 : goto end;
1648 : }
1649 :
1650 1 : assert(tcp_req->req != NULL);
1651 :
1652 1 : tcp_req->rsp = capsule_resp->rccqe;
1653 1 : tcp_req->ordering.bits.data_recv = 1;
1654 :
1655 : /* Recv the pdu again */
1656 1 : nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1657 :
1658 1 : if (tcp_req->req->accel_sequence != NULL) {
1659 0 : tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
1660 0 : nvme_tcp_accel_reverse_sequence(tgroup, tcp_req->req->accel_sequence);
1661 0 : nvme_tcp_accel_finish_sequence(tgroup, tcp_req, tcp_req->req->accel_sequence,
1662 : nvme_tcp_recv_payload_seq_cb, tcp_req);
1663 0 : return;
1664 : }
1665 :
1666 1 : if (nvme_tcp_req_complete_safe(tcp_req)) {
1667 1 : (*reaped)++;
1668 : }
1669 :
1670 1 : return;
1671 :
1672 1 : end:
1673 1 : nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
1674 : }
1675 :
1676 : static void
1677 0 : nvme_tcp_c2h_term_req_hdr_handle(struct nvme_tcp_qpair *tqpair,
1678 : struct nvme_tcp_pdu *pdu)
1679 : {
1680 0 : struct spdk_nvme_tcp_term_req_hdr *c2h_term_req = &pdu->hdr.term_req;
1681 0 : uint32_t error_offset = 0;
1682 : enum spdk_nvme_tcp_term_req_fes fes;
1683 :
1684 0 : if (c2h_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) {
1685 0 : SPDK_ERRLOG("Fatal Error Status(FES) is unknown for c2h_term_req pdu=%p\n", pdu);
1686 0 : fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1687 0 : error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes);
1688 0 : goto end;
1689 : }
1690 :
1691 : /* set the data buffer */
1692 0 : nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + c2h_term_req->common.hlen,
1693 0 : c2h_term_req->common.plen - c2h_term_req->common.hlen);
1694 0 : nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1695 0 : return;
1696 0 : end:
1697 0 : nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
1698 : }
1699 :
1700 : static void
1701 0 : nvme_tcp_c2h_data_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu)
1702 : {
1703 : struct nvme_tcp_req *tcp_req;
1704 0 : struct spdk_nvme_tcp_c2h_data_hdr *c2h_data = &pdu->hdr.c2h_data;
1705 0 : uint32_t error_offset = 0;
1706 : enum spdk_nvme_tcp_term_req_fes fes;
1707 0 : int flags = c2h_data->common.flags;
1708 : int rc;
1709 :
1710 0 : SPDK_DEBUGLOG(nvme, "enter\n");
1711 0 : SPDK_DEBUGLOG(nvme, "c2h_data info on tqpair(%p): datao=%u, datal=%u, cccid=%d\n",
1712 : tqpair, c2h_data->datao, c2h_data->datal, c2h_data->cccid);
1713 0 : tcp_req = get_nvme_active_req_by_cid(tqpair, c2h_data->cccid);
1714 0 : if (!tcp_req) {
1715 0 : SPDK_ERRLOG("no tcp_req found for c2hdata cid=%d\n", c2h_data->cccid);
1716 0 : fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1717 0 : error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, cccid);
1718 0 : goto end;
1719 :
1720 : }
1721 :
1722 0 : SPDK_DEBUGLOG(nvme, "tcp_req(%p) on tqpair(%p): expected_datao=%u, payload_size=%u\n",
1723 : tcp_req, tqpair, tcp_req->expected_datao, tcp_req->req->payload_size);
1724 :
1725 0 : if (spdk_unlikely((flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) &&
1726 : !(flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU))) {
1727 0 : SPDK_ERRLOG("Invalid flag flags=%d in c2h_data=%p\n", flags, c2h_data);
1728 0 : fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1729 0 : error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, common);
1730 0 : goto end;
1731 : }
1732 :
1733 0 : if (c2h_data->datal > tcp_req->req->payload_size) {
1734 0 : SPDK_ERRLOG("Invalid datal for tcp_req(%p), datal(%u) exceeds payload_size(%u)\n",
1735 : tcp_req, c2h_data->datal, tcp_req->req->payload_size);
1736 0 : fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
1737 0 : goto end;
1738 : }
1739 :
1740 0 : if (tcp_req->expected_datao != c2h_data->datao) {
1741 0 : SPDK_ERRLOG("Invalid datao for tcp_req(%p), received datal(%u) != expected datao(%u) in tcp_req\n",
1742 : tcp_req, c2h_data->datao, tcp_req->expected_datao);
1743 0 : fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1744 0 : error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datao);
1745 0 : goto end;
1746 : }
1747 :
1748 0 : if ((c2h_data->datao + c2h_data->datal) > tcp_req->req->payload_size) {
1749 0 : SPDK_ERRLOG("Invalid data range for tcp_req(%p), received (datao(%u) + datal(%u)) > datao(%u) in tcp_req\n",
1750 : tcp_req, c2h_data->datao, c2h_data->datal, tcp_req->req->payload_size);
1751 0 : fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
1752 0 : error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datal);
1753 0 : goto end;
1754 :
1755 : }
1756 :
1757 0 : if (nvme_payload_type(&tcp_req->req->payload) == NVME_PAYLOAD_TYPE_CONTIG) {
1758 0 : rc = nvme_tcp_build_contig_request(tqpair, tcp_req);
1759 : } else {
1760 0 : assert(nvme_payload_type(&tcp_req->req->payload) == NVME_PAYLOAD_TYPE_SGL);
1761 0 : rc = nvme_tcp_build_sgl_request(tqpair, tcp_req);
1762 : }
1763 :
1764 0 : if (rc) {
1765 : /* Not the right error message but at least it handles the failure. */
1766 0 : fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED;
1767 0 : goto end;
1768 : }
1769 :
1770 0 : nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt,
1771 : c2h_data->datao, c2h_data->datal);
1772 0 : pdu->req = tcp_req;
1773 :
1774 0 : nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
1775 0 : return;
1776 :
1777 0 : end:
1778 0 : nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
1779 : }
1780 :
1781 : static void
1782 0 : nvme_tcp_qpair_h2c_data_send_complete(void *cb_arg)
1783 : {
1784 0 : struct nvme_tcp_req *tcp_req = cb_arg;
1785 :
1786 0 : assert(tcp_req != NULL);
1787 :
1788 0 : tcp_req->ordering.bits.send_ack = 1;
1789 0 : if (tcp_req->r2tl_remain) {
1790 0 : nvme_tcp_send_h2c_data(tcp_req);
1791 : } else {
1792 0 : assert(tcp_req->active_r2ts > 0);
1793 0 : tcp_req->active_r2ts--;
1794 0 : tcp_req->state = NVME_TCP_REQ_ACTIVE;
1795 :
1796 0 : if (tcp_req->ordering.bits.r2t_waiting_h2c_complete) {
1797 0 : tcp_req->ordering.bits.r2t_waiting_h2c_complete = 0;
1798 0 : SPDK_DEBUGLOG(nvme, "tcp_req %p: continue r2t\n", tcp_req);
1799 0 : assert(tcp_req->active_r2ts > 0);
1800 0 : tcp_req->ttag = tcp_req->ttag_r2t_next;
1801 0 : tcp_req->r2tl_remain = tcp_req->r2tl_remain_next;
1802 0 : tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T;
1803 0 : nvme_tcp_send_h2c_data(tcp_req);
1804 0 : return;
1805 : }
1806 :
1807 0 : if (tcp_req->ordering.bits.domain_in_use) {
1808 0 : spdk_memory_domain_invalidate_data(tcp_req->req->payload.opts->memory_domain,
1809 0 : tcp_req->req->payload.opts->memory_domain_ctx, tcp_req->iov, tcp_req->iovcnt);
1810 : }
1811 :
1812 : /* Need also call this function to free the resource */
1813 0 : nvme_tcp_req_complete_safe(tcp_req);
1814 : }
1815 : }
1816 :
1817 : static void
1818 0 : nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req)
1819 : {
1820 0 : struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(tcp_req->req->qpair);
1821 : struct nvme_tcp_pdu *rsp_pdu;
1822 : struct spdk_nvme_tcp_h2c_data_hdr *h2c_data;
1823 : uint32_t plen, pdo, alignment;
1824 :
1825 : /* Reinit the send_ack and h2c_send_waiting_ack bits */
1826 0 : tcp_req->ordering.bits.send_ack = 0;
1827 0 : tcp_req->ordering.bits.h2c_send_waiting_ack = 0;
1828 0 : rsp_pdu = tcp_req->pdu;
1829 0 : memset(rsp_pdu, 0, sizeof(*rsp_pdu));
1830 0 : rsp_pdu->req = tcp_req;
1831 0 : h2c_data = &rsp_pdu->hdr.h2c_data;
1832 :
1833 0 : h2c_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_DATA;
1834 0 : plen = h2c_data->common.hlen = sizeof(*h2c_data);
1835 0 : h2c_data->cccid = tcp_req->cid;
1836 0 : h2c_data->ttag = tcp_req->ttag;
1837 0 : h2c_data->datao = tcp_req->datao;
1838 :
1839 0 : h2c_data->datal = spdk_min(tcp_req->r2tl_remain, tqpair->maxh2cdata);
1840 0 : nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->iov, tcp_req->iovcnt,
1841 : h2c_data->datao, h2c_data->datal);
1842 0 : tcp_req->r2tl_remain -= h2c_data->datal;
1843 :
1844 0 : if (tqpair->flags.host_hdgst_enable) {
1845 0 : h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
1846 0 : plen += SPDK_NVME_TCP_DIGEST_LEN;
1847 : }
1848 :
1849 0 : rsp_pdu->padding_len = 0;
1850 0 : pdo = plen;
1851 0 : if (tqpair->cpda) {
1852 0 : alignment = (tqpair->cpda + 1) << 2;
1853 0 : if (alignment > plen) {
1854 0 : rsp_pdu->padding_len = alignment - plen;
1855 0 : pdo = plen = alignment;
1856 : }
1857 : }
1858 :
1859 0 : h2c_data->common.pdo = pdo;
1860 0 : plen += h2c_data->datal;
1861 0 : if (tqpair->flags.host_ddgst_enable) {
1862 0 : h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF;
1863 0 : plen += SPDK_NVME_TCP_DIGEST_LEN;
1864 : }
1865 :
1866 0 : h2c_data->common.plen = plen;
1867 0 : tcp_req->datao += h2c_data->datal;
1868 0 : if (!tcp_req->r2tl_remain) {
1869 0 : h2c_data->common.flags |= SPDK_NVME_TCP_H2C_DATA_FLAGS_LAST_PDU;
1870 : }
1871 :
1872 0 : SPDK_DEBUGLOG(nvme, "h2c_data info: datao=%u, datal=%u, pdu_len=%u for tqpair=%p\n",
1873 : h2c_data->datao, h2c_data->datal, h2c_data->common.plen, tqpair);
1874 :
1875 0 : nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_h2c_data_send_complete, tcp_req);
1876 0 : }
1877 :
1878 : static void
1879 0 : nvme_tcp_r2t_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu)
1880 : {
1881 : struct nvme_tcp_req *tcp_req;
1882 0 : struct spdk_nvme_tcp_r2t_hdr *r2t = &pdu->hdr.r2t;
1883 0 : uint32_t cid, error_offset = 0;
1884 : enum spdk_nvme_tcp_term_req_fes fes;
1885 :
1886 0 : SPDK_DEBUGLOG(nvme, "enter\n");
1887 0 : cid = r2t->cccid;
1888 0 : tcp_req = get_nvme_active_req_by_cid(tqpair, cid);
1889 0 : if (!tcp_req) {
1890 0 : SPDK_ERRLOG("Cannot find tcp_req for tqpair=%p\n", tqpair);
1891 0 : fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1892 0 : error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, cccid);
1893 0 : goto end;
1894 : }
1895 :
1896 0 : SPDK_DEBUGLOG(nvme, "r2t info: r2to=%u, r2tl=%u for tqpair=%p\n", r2t->r2to, r2t->r2tl,
1897 : tqpair);
1898 :
1899 0 : if (tcp_req->state == NVME_TCP_REQ_ACTIVE) {
1900 0 : assert(tcp_req->active_r2ts == 0);
1901 0 : tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T;
1902 : }
1903 :
1904 0 : if (tcp_req->datao != r2t->r2to) {
1905 0 : fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
1906 0 : error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2to);
1907 0 : goto end;
1908 :
1909 : }
1910 :
1911 0 : if ((r2t->r2tl + r2t->r2to) > tcp_req->req->payload_size) {
1912 0 : SPDK_ERRLOG("Invalid R2T info for tcp_req=%p: (r2to(%u) + r2tl(%u)) exceeds payload_size(%u)\n",
1913 : tcp_req, r2t->r2to, r2t->r2tl, tqpair->maxh2cdata);
1914 0 : fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
1915 0 : error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2tl);
1916 0 : goto end;
1917 : }
1918 :
1919 0 : tcp_req->active_r2ts++;
1920 0 : if (spdk_unlikely(tcp_req->active_r2ts > tqpair->maxr2t)) {
1921 0 : if (tcp_req->state == NVME_TCP_REQ_ACTIVE_R2T && !tcp_req->ordering.bits.send_ack) {
1922 : /* We receive a subsequent R2T while we are waiting for H2C transfer to complete */
1923 0 : SPDK_DEBUGLOG(nvme, "received a subsequent R2T\n");
1924 0 : assert(tcp_req->active_r2ts == tqpair->maxr2t + 1);
1925 0 : tcp_req->ttag_r2t_next = r2t->ttag;
1926 0 : tcp_req->r2tl_remain_next = r2t->r2tl;
1927 0 : tcp_req->ordering.bits.r2t_waiting_h2c_complete = 1;
1928 0 : nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1929 0 : return;
1930 : } else {
1931 0 : fes = SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED;
1932 0 : SPDK_ERRLOG("Invalid R2T: Maximum number of R2T exceeded! Max: %u for tqpair=%p\n", tqpair->maxr2t,
1933 : tqpair);
1934 0 : goto end;
1935 : }
1936 : }
1937 :
1938 0 : tcp_req->ttag = r2t->ttag;
1939 0 : tcp_req->r2tl_remain = r2t->r2tl;
1940 0 : nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
1941 :
1942 0 : if (spdk_likely(tcp_req->ordering.bits.send_ack)) {
1943 0 : nvme_tcp_send_h2c_data(tcp_req);
1944 : } else {
1945 0 : tcp_req->ordering.bits.h2c_send_waiting_ack = 1;
1946 : }
1947 :
1948 0 : return;
1949 :
1950 0 : end:
1951 0 : nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
1952 :
1953 : }
1954 :
1955 : static void
1956 1 : nvme_tcp_pdu_psh_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped)
1957 : {
1958 : struct nvme_tcp_pdu *pdu;
1959 : int rc;
1960 1 : uint32_t crc32c, error_offset = 0;
1961 : enum spdk_nvme_tcp_term_req_fes fes;
1962 :
1963 1 : assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
1964 1 : pdu = tqpair->recv_pdu;
1965 :
1966 1 : SPDK_DEBUGLOG(nvme, "enter: pdu type =%u\n", pdu->hdr.common.pdu_type);
1967 : /* check header digest if needed */
1968 1 : if (pdu->has_hdgst) {
1969 0 : crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
1970 0 : rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c);
1971 0 : if (rc == 0) {
1972 0 : SPDK_ERRLOG("header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
1973 0 : fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR;
1974 0 : nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
1975 0 : return;
1976 :
1977 : }
1978 : }
1979 :
1980 1 : switch (pdu->hdr.common.pdu_type) {
1981 1 : case SPDK_NVME_TCP_PDU_TYPE_IC_RESP:
1982 1 : nvme_tcp_icresp_handle(tqpair, pdu);
1983 1 : break;
1984 0 : case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP:
1985 0 : nvme_tcp_capsule_resp_hdr_handle(tqpair, pdu, reaped);
1986 0 : break;
1987 0 : case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA:
1988 0 : nvme_tcp_c2h_data_hdr_handle(tqpair, pdu);
1989 0 : break;
1990 :
1991 0 : case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ:
1992 0 : nvme_tcp_c2h_term_req_hdr_handle(tqpair, pdu);
1993 0 : break;
1994 0 : case SPDK_NVME_TCP_PDU_TYPE_R2T:
1995 0 : nvme_tcp_r2t_hdr_handle(tqpair, pdu);
1996 0 : break;
1997 :
1998 0 : default:
1999 0 : SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu->hdr.common.pdu_type);
2000 0 : fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
2001 0 : error_offset = 1;
2002 0 : nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
2003 0 : break;
2004 : }
2005 :
2006 : }
2007 :
2008 : static int
2009 4 : nvme_tcp_read_pdu(struct nvme_tcp_qpair *tqpair, uint32_t *reaped, uint32_t max_completions)
2010 : {
2011 4 : int rc = 0;
2012 : struct nvme_tcp_pdu *pdu;
2013 : uint32_t data_len;
2014 : enum nvme_tcp_pdu_recv_state prev_state;
2015 :
2016 4 : *reaped = tqpair->async_complete;
2017 4 : tqpair->async_complete = 0;
2018 :
2019 : /* The loop here is to allow for several back-to-back state changes. */
2020 : do {
2021 8 : if (*reaped >= max_completions) {
2022 0 : break;
2023 : }
2024 :
2025 8 : prev_state = tqpair->recv_state;
2026 8 : pdu = tqpair->recv_pdu;
2027 8 : switch (tqpair->recv_state) {
2028 : /* If in a new state */
2029 1 : case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
2030 1 : memset(pdu, 0, sizeof(struct nvme_tcp_pdu));
2031 1 : nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
2032 1 : break;
2033 : /* Wait for the pdu common header */
2034 3 : case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
2035 3 : assert(pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr));
2036 3 : rc = nvme_tcp_read_data(tqpair->sock,
2037 3 : sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes,
2038 3 : (uint8_t *)&pdu->hdr.common + pdu->ch_valid_bytes);
2039 3 : if (rc < 0) {
2040 0 : nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
2041 0 : break;
2042 : }
2043 3 : pdu->ch_valid_bytes += rc;
2044 3 : if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) {
2045 2 : return NVME_TCP_PDU_IN_PROGRESS;
2046 : }
2047 :
2048 : /* The command header of this PDU has now been read from the socket. */
2049 1 : nvme_tcp_pdu_ch_handle(tqpair);
2050 1 : break;
2051 : /* Wait for the pdu specific header */
2052 1 : case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
2053 1 : assert(pdu->psh_valid_bytes < pdu->psh_len);
2054 1 : rc = nvme_tcp_read_data(tqpair->sock,
2055 1 : pdu->psh_len - pdu->psh_valid_bytes,
2056 1 : (uint8_t *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes);
2057 1 : if (rc < 0) {
2058 0 : nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
2059 0 : break;
2060 : }
2061 :
2062 1 : pdu->psh_valid_bytes += rc;
2063 1 : if (pdu->psh_valid_bytes < pdu->psh_len) {
2064 0 : return NVME_TCP_PDU_IN_PROGRESS;
2065 : }
2066 :
2067 : /* All header(ch, psh, head digist) of this PDU has now been read from the socket. */
2068 1 : nvme_tcp_pdu_psh_handle(tqpair, reaped);
2069 1 : break;
2070 0 : case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
2071 : /* check whether the data is valid, if not we just return */
2072 0 : if (!pdu->data_len) {
2073 0 : return NVME_TCP_PDU_IN_PROGRESS;
2074 : }
2075 :
2076 0 : data_len = pdu->data_len;
2077 : /* data digest */
2078 0 : if (spdk_unlikely((pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) &&
2079 : tqpair->flags.host_ddgst_enable)) {
2080 0 : data_len += SPDK_NVME_TCP_DIGEST_LEN;
2081 0 : pdu->ddgst_enable = true;
2082 : }
2083 :
2084 0 : rc = nvme_tcp_read_payload_data(tqpair->sock, pdu);
2085 0 : if (rc < 0) {
2086 0 : nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
2087 0 : break;
2088 : }
2089 :
2090 0 : pdu->rw_offset += rc;
2091 0 : if (pdu->rw_offset < data_len) {
2092 0 : return NVME_TCP_PDU_IN_PROGRESS;
2093 : }
2094 :
2095 0 : assert(pdu->rw_offset == data_len);
2096 : /* All of this PDU has now been read from the socket. */
2097 0 : nvme_tcp_pdu_payload_handle(tqpair, reaped);
2098 0 : break;
2099 2 : case NVME_TCP_PDU_RECV_STATE_QUIESCING:
2100 2 : if (TAILQ_EMPTY(&tqpair->outstanding_reqs)) {
2101 1 : if (nvme_qpair_get_state(&tqpair->qpair) == NVME_QPAIR_DISCONNECTING) {
2102 1 : nvme_transport_ctrlr_disconnect_qpair_done(&tqpair->qpair);
2103 : }
2104 :
2105 1 : nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
2106 : }
2107 2 : break;
2108 1 : case NVME_TCP_PDU_RECV_STATE_ERROR:
2109 1 : memset(pdu, 0, sizeof(struct nvme_tcp_pdu));
2110 1 : return NVME_TCP_PDU_FATAL;
2111 0 : default:
2112 0 : assert(0);
2113 : break;
2114 : }
2115 5 : } while (prev_state != tqpair->recv_state);
2116 :
2117 1 : return rc > 0 ? 0 : rc;
2118 : }
2119 :
2120 : static void
2121 0 : nvme_tcp_qpair_check_timeout(struct spdk_nvme_qpair *qpair)
2122 : {
2123 : uint64_t t02;
2124 : struct nvme_tcp_req *tcp_req, *tmp;
2125 0 : struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
2126 0 : struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
2127 : struct spdk_nvme_ctrlr_process *active_proc;
2128 :
2129 : /* Don't check timeouts during controller initialization. */
2130 0 : if (ctrlr->state != NVME_CTRLR_STATE_READY) {
2131 0 : return;
2132 : }
2133 :
2134 0 : if (nvme_qpair_is_admin_queue(qpair)) {
2135 0 : active_proc = nvme_ctrlr_get_current_process(ctrlr);
2136 : } else {
2137 0 : active_proc = qpair->active_proc;
2138 : }
2139 :
2140 : /* Only check timeouts if the current process has a timeout callback. */
2141 0 : if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) {
2142 0 : return;
2143 : }
2144 :
2145 0 : t02 = spdk_get_ticks();
2146 0 : TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) {
2147 0 : if (ctrlr->is_failed) {
2148 : /* The controller state may be changed to failed in one of the nvme_request_check_timeout callbacks. */
2149 0 : return;
2150 : }
2151 0 : assert(tcp_req->req != NULL);
2152 :
2153 0 : if (nvme_request_check_timeout(tcp_req->req, tcp_req->cid, active_proc, t02)) {
2154 : /*
2155 : * The requests are in order, so as soon as one has not timed out,
2156 : * stop iterating.
2157 : */
2158 0 : break;
2159 : }
2160 : }
2161 : }
2162 :
2163 : static int nvme_tcp_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr,
2164 : struct spdk_nvme_qpair *qpair);
2165 :
2166 : static int
2167 6 : nvme_tcp_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions)
2168 : {
2169 6 : struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
2170 6 : uint32_t reaped;
2171 : int rc;
2172 :
2173 6 : if (qpair->poll_group == NULL) {
2174 6 : rc = spdk_sock_flush(tqpair->sock);
2175 6 : if (rc < 0 && errno != EAGAIN) {
2176 2 : SPDK_ERRLOG("Failed to flush tqpair=%p (%d): %s\n", tqpair,
2177 : errno, spdk_strerror(errno));
2178 2 : if (spdk_unlikely(tqpair->qpair.ctrlr->timeout_enabled)) {
2179 0 : nvme_tcp_qpair_check_timeout(qpair);
2180 : }
2181 :
2182 2 : if (nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTING) {
2183 1 : if (TAILQ_EMPTY(&tqpair->outstanding_reqs)) {
2184 1 : nvme_transport_ctrlr_disconnect_qpair_done(qpair);
2185 : }
2186 :
2187 : /* Don't return errors until the qpair gets disconnected */
2188 1 : return 0;
2189 : }
2190 :
2191 1 : goto fail;
2192 : }
2193 : }
2194 :
2195 4 : if (max_completions == 0) {
2196 4 : max_completions = spdk_max(tqpair->num_entries, 1);
2197 : } else {
2198 0 : max_completions = spdk_min(max_completions, tqpair->num_entries);
2199 : }
2200 :
2201 4 : reaped = 0;
2202 4 : rc = nvme_tcp_read_pdu(tqpair, &reaped, max_completions);
2203 4 : if (rc < 0) {
2204 1 : SPDK_DEBUGLOG(nvme, "Error polling CQ! (%d): %s\n",
2205 : errno, spdk_strerror(errno));
2206 1 : goto fail;
2207 : }
2208 :
2209 3 : if (spdk_unlikely(tqpair->qpair.ctrlr->timeout_enabled)) {
2210 0 : nvme_tcp_qpair_check_timeout(qpair);
2211 : }
2212 :
2213 3 : if (spdk_unlikely(nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING)) {
2214 2 : rc = nvme_tcp_ctrlr_connect_qpair_poll(qpair->ctrlr, qpair);
2215 2 : if (rc != 0 && rc != -EAGAIN) {
2216 0 : SPDK_ERRLOG("Failed to connect tqpair=%p\n", tqpair);
2217 0 : goto fail;
2218 2 : } else if (rc == 0) {
2219 : /* Once the connection is completed, we can submit queued requests */
2220 1 : nvme_qpair_resubmit_requests(qpair, tqpair->num_entries);
2221 : }
2222 : }
2223 :
2224 3 : return reaped;
2225 2 : fail:
2226 :
2227 : /*
2228 : * Since admin queues take the ctrlr_lock before entering this function,
2229 : * we can call nvme_transport_ctrlr_disconnect_qpair. For other qpairs we need
2230 : * to call the generic function which will take the lock for us.
2231 : */
2232 2 : qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN;
2233 :
2234 2 : if (nvme_qpair_is_admin_queue(qpair)) {
2235 2 : enum nvme_qpair_state state_prev = nvme_qpair_get_state(qpair);
2236 :
2237 2 : nvme_transport_ctrlr_disconnect_qpair(qpair->ctrlr, qpair);
2238 :
2239 2 : if (state_prev == NVME_QPAIR_CONNECTING && qpair->poll_status != NULL) {
2240 : /* Needed to free the poll_status */
2241 0 : nvme_tcp_ctrlr_connect_qpair_poll(qpair->ctrlr, qpair);
2242 : }
2243 : } else {
2244 0 : nvme_ctrlr_disconnect_qpair(qpair);
2245 : }
2246 2 : return -ENXIO;
2247 : }
2248 :
2249 : static void
2250 0 : nvme_tcp_qpair_sock_cb(void *ctx, struct spdk_sock_group *group, struct spdk_sock *sock)
2251 : {
2252 0 : struct spdk_nvme_qpair *qpair = ctx;
2253 0 : struct nvme_tcp_poll_group *pgroup = nvme_tcp_poll_group(qpair->poll_group);
2254 : int32_t num_completions;
2255 0 : struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
2256 :
2257 0 : if (tqpair->needs_poll) {
2258 0 : TAILQ_REMOVE(&pgroup->needs_poll, tqpair, link);
2259 0 : tqpair->needs_poll = false;
2260 : }
2261 :
2262 0 : num_completions = spdk_nvme_qpair_process_completions(qpair, pgroup->completions_per_qpair);
2263 :
2264 0 : if (pgroup->num_completions >= 0 && num_completions >= 0) {
2265 0 : pgroup->num_completions += num_completions;
2266 0 : pgroup->stats.nvme_completions += num_completions;
2267 : } else {
2268 0 : pgroup->num_completions = -ENXIO;
2269 : }
2270 0 : }
2271 :
2272 : static int
2273 2 : nvme_tcp_qpair_icreq_send(struct nvme_tcp_qpair *tqpair)
2274 : {
2275 : struct spdk_nvme_tcp_ic_req *ic_req;
2276 : struct nvme_tcp_pdu *pdu;
2277 : uint32_t timeout_in_sec;
2278 :
2279 2 : pdu = tqpair->send_pdu;
2280 2 : memset(tqpair->send_pdu, 0, sizeof(*tqpair->send_pdu));
2281 2 : ic_req = &pdu->hdr.ic_req;
2282 :
2283 2 : ic_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_REQ;
2284 2 : ic_req->common.hlen = ic_req->common.plen = sizeof(*ic_req);
2285 2 : ic_req->pfv = 0;
2286 2 : ic_req->maxr2t = NVME_TCP_MAX_R2T_DEFAULT - 1;
2287 2 : ic_req->hpda = NVME_TCP_HPDA_DEFAULT;
2288 :
2289 2 : ic_req->dgst.bits.hdgst_enable = tqpair->qpair.ctrlr->opts.header_digest;
2290 2 : ic_req->dgst.bits.ddgst_enable = tqpair->qpair.ctrlr->opts.data_digest;
2291 :
2292 2 : nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_send_icreq_complete, tqpair);
2293 :
2294 2 : timeout_in_sec = tqpair->qpair.async ? ICREQ_TIMEOUT_ASYNC : ICREQ_TIMEOUT_SYNC;
2295 2 : tqpair->icreq_timeout_tsc = spdk_get_ticks() + (timeout_in_sec * spdk_get_ticks_hz());
2296 2 : return 0;
2297 : }
2298 :
2299 : static int
2300 10 : nvme_tcp_qpair_connect_sock(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
2301 : {
2302 10 : struct sockaddr_storage dst_addr;
2303 10 : struct sockaddr_storage src_addr;
2304 : int rc;
2305 : struct nvme_tcp_qpair *tqpair;
2306 : int family;
2307 10 : long int port, src_port;
2308 : char *sock_impl_name;
2309 10 : struct spdk_sock_impl_opts impl_opts = {};
2310 10 : size_t impl_opts_size = sizeof(impl_opts);
2311 10 : struct spdk_sock_opts opts;
2312 : struct nvme_tcp_ctrlr *tcp_ctrlr;
2313 :
2314 10 : tqpair = nvme_tcp_qpair(qpair);
2315 :
2316 10 : switch (ctrlr->trid.adrfam) {
2317 8 : case SPDK_NVMF_ADRFAM_IPV4:
2318 8 : family = AF_INET;
2319 8 : break;
2320 0 : case SPDK_NVMF_ADRFAM_IPV6:
2321 0 : family = AF_INET6;
2322 0 : break;
2323 2 : default:
2324 2 : SPDK_ERRLOG("Unhandled ADRFAM %d\n", ctrlr->trid.adrfam);
2325 2 : rc = -1;
2326 2 : return rc;
2327 : }
2328 :
2329 8 : SPDK_DEBUGLOG(nvme, "adrfam %d ai_family %d\n", ctrlr->trid.adrfam, family);
2330 :
2331 8 : memset(&dst_addr, 0, sizeof(dst_addr));
2332 :
2333 8 : SPDK_DEBUGLOG(nvme, "trsvcid is %s\n", ctrlr->trid.trsvcid);
2334 8 : rc = nvme_parse_addr(&dst_addr, family, ctrlr->trid.traddr, ctrlr->trid.trsvcid, &port);
2335 8 : if (rc != 0) {
2336 2 : SPDK_ERRLOG("dst_addr nvme_parse_addr() failed\n");
2337 2 : return rc;
2338 : }
2339 :
2340 6 : if (ctrlr->opts.src_addr[0] || ctrlr->opts.src_svcid[0]) {
2341 6 : memset(&src_addr, 0, sizeof(src_addr));
2342 6 : rc = nvme_parse_addr(&src_addr, family, ctrlr->opts.src_addr, ctrlr->opts.src_svcid, &src_port);
2343 6 : if (rc != 0) {
2344 0 : SPDK_ERRLOG("src_addr nvme_parse_addr() failed\n");
2345 0 : return rc;
2346 : }
2347 : }
2348 :
2349 6 : tcp_ctrlr = SPDK_CONTAINEROF(ctrlr, struct nvme_tcp_ctrlr, ctrlr);
2350 6 : sock_impl_name = tcp_ctrlr->psk[0] ? "ssl" : NULL;
2351 6 : SPDK_DEBUGLOG(nvme, "sock_impl_name is %s\n", sock_impl_name);
2352 :
2353 6 : if (sock_impl_name) {
2354 0 : spdk_sock_impl_get_opts(sock_impl_name, &impl_opts, &impl_opts_size);
2355 0 : impl_opts.tls_version = SPDK_TLS_VERSION_1_3;
2356 0 : impl_opts.psk_identity = tcp_ctrlr->psk_identity;
2357 0 : impl_opts.psk_key = tcp_ctrlr->psk;
2358 0 : impl_opts.psk_key_size = tcp_ctrlr->psk_size;
2359 0 : impl_opts.tls_cipher_suites = tcp_ctrlr->tls_cipher_suite;
2360 : }
2361 6 : opts.opts_size = sizeof(opts);
2362 6 : spdk_sock_get_default_opts(&opts);
2363 6 : opts.priority = ctrlr->trid.priority;
2364 6 : opts.zcopy = !nvme_qpair_is_admin_queue(qpair);
2365 6 : if (ctrlr->opts.transport_ack_timeout) {
2366 3 : opts.ack_timeout = 1ULL << ctrlr->opts.transport_ack_timeout;
2367 : }
2368 6 : if (sock_impl_name) {
2369 0 : opts.impl_opts = &impl_opts;
2370 0 : opts.impl_opts_size = sizeof(impl_opts);
2371 : }
2372 6 : tqpair->sock = spdk_sock_connect_ext(ctrlr->trid.traddr, port, sock_impl_name, &opts);
2373 6 : if (!tqpair->sock) {
2374 1 : SPDK_ERRLOG("sock connection error of tqpair=%p with addr=%s, port=%ld\n",
2375 : tqpair, ctrlr->trid.traddr, port);
2376 1 : rc = -1;
2377 1 : return rc;
2378 : }
2379 :
2380 5 : return 0;
2381 : }
2382 :
2383 : static int
2384 2 : nvme_tcp_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
2385 : {
2386 : struct nvme_tcp_qpair *tqpair;
2387 : int rc;
2388 :
2389 2 : tqpair = nvme_tcp_qpair(qpair);
2390 :
2391 : /* Prevent this function from being called recursively, as it could lead to issues with
2392 : * nvme_fabric_qpair_connect_poll() if the connect response is received in the recursive
2393 : * call.
2394 : */
2395 2 : if (tqpair->flags.in_connect_poll) {
2396 0 : return -EAGAIN;
2397 : }
2398 :
2399 2 : tqpair->flags.in_connect_poll = 1;
2400 :
2401 2 : switch (tqpair->state) {
2402 0 : case NVME_TCP_QPAIR_STATE_INVALID:
2403 : case NVME_TCP_QPAIR_STATE_INITIALIZING:
2404 0 : if (spdk_get_ticks() > tqpair->icreq_timeout_tsc) {
2405 0 : SPDK_ERRLOG("Failed to construct the tqpair=%p via correct icresp\n", tqpair);
2406 0 : rc = -ETIMEDOUT;
2407 0 : break;
2408 : }
2409 0 : rc = -EAGAIN;
2410 0 : break;
2411 1 : case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND:
2412 1 : rc = nvme_fabric_qpair_connect_async(&tqpair->qpair, tqpair->num_entries + 1);
2413 1 : if (rc < 0) {
2414 0 : SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n");
2415 0 : break;
2416 : }
2417 1 : tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL;
2418 1 : rc = -EAGAIN;
2419 1 : break;
2420 1 : case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL:
2421 1 : rc = nvme_fabric_qpair_connect_poll(&tqpair->qpair);
2422 1 : if (rc == 0) {
2423 1 : tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING;
2424 1 : nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED);
2425 0 : } else if (rc != -EAGAIN) {
2426 0 : SPDK_ERRLOG("Failed to poll NVMe-oF Fabric CONNECT command\n");
2427 : }
2428 1 : break;
2429 0 : case NVME_TCP_QPAIR_STATE_RUNNING:
2430 0 : rc = 0;
2431 0 : break;
2432 0 : default:
2433 0 : assert(false);
2434 : rc = -EINVAL;
2435 : break;
2436 : }
2437 :
2438 2 : tqpair->flags.in_connect_poll = 0;
2439 2 : return rc;
2440 : }
2441 :
2442 : static int
2443 1 : nvme_tcp_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
2444 : {
2445 1 : int rc = 0;
2446 : struct nvme_tcp_qpair *tqpair;
2447 : struct nvme_tcp_poll_group *tgroup;
2448 :
2449 1 : tqpair = nvme_tcp_qpair(qpair);
2450 :
2451 1 : if (!tqpair->sock) {
2452 0 : rc = nvme_tcp_qpair_connect_sock(ctrlr, qpair);
2453 0 : if (rc < 0) {
2454 0 : return rc;
2455 : }
2456 : }
2457 :
2458 1 : if (qpair->poll_group) {
2459 0 : rc = nvme_poll_group_connect_qpair(qpair);
2460 0 : if (rc) {
2461 0 : SPDK_ERRLOG("Unable to activate the tcp qpair.\n");
2462 0 : return rc;
2463 : }
2464 0 : tgroup = nvme_tcp_poll_group(qpair->poll_group);
2465 0 : tqpair->stats = &tgroup->stats;
2466 0 : tqpair->shared_stats = true;
2467 : } else {
2468 : /* When resetting a controller, we disconnect adminq and then reconnect. The stats
2469 : * is not freed when disconnecting. So when reconnecting, don't allocate memory
2470 : * again.
2471 : */
2472 1 : if (tqpair->stats == NULL) {
2473 1 : tqpair->stats = calloc(1, sizeof(*tqpair->stats));
2474 1 : if (!tqpair->stats) {
2475 0 : SPDK_ERRLOG("tcp stats memory allocation failed\n");
2476 0 : return -ENOMEM;
2477 : }
2478 : }
2479 : }
2480 :
2481 1 : tqpair->maxr2t = NVME_TCP_MAX_R2T_DEFAULT;
2482 : /* Explicitly set the state and recv_state of tqpair */
2483 1 : tqpair->state = NVME_TCP_QPAIR_STATE_INVALID;
2484 1 : if (tqpair->recv_state != NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY) {
2485 0 : nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
2486 : }
2487 1 : rc = nvme_tcp_qpair_icreq_send(tqpair);
2488 1 : if (rc != 0) {
2489 0 : SPDK_ERRLOG("Unable to connect the tqpair\n");
2490 0 : return rc;
2491 : }
2492 :
2493 1 : return rc;
2494 : }
2495 :
2496 : static struct spdk_nvme_qpair *
2497 9 : nvme_tcp_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr,
2498 : uint16_t qid, uint32_t qsize,
2499 : enum spdk_nvme_qprio qprio,
2500 : uint32_t num_requests, bool async)
2501 : {
2502 : struct nvme_tcp_qpair *tqpair;
2503 : struct spdk_nvme_qpair *qpair;
2504 : int rc;
2505 :
2506 9 : if (qsize < SPDK_NVME_QUEUE_MIN_ENTRIES) {
2507 3 : SPDK_ERRLOG("Failed to create qpair with size %u. Minimum queue size is %d.\n",
2508 : qsize, SPDK_NVME_QUEUE_MIN_ENTRIES);
2509 3 : return NULL;
2510 : }
2511 :
2512 6 : tqpair = calloc(1, sizeof(struct nvme_tcp_qpair));
2513 6 : if (!tqpair) {
2514 0 : SPDK_ERRLOG("failed to get create tqpair\n");
2515 0 : return NULL;
2516 : }
2517 :
2518 : /* Set num_entries one less than queue size. According to NVMe
2519 : * and NVMe-oF specs we can not submit queue size requests,
2520 : * one slot shall always remain empty.
2521 : */
2522 6 : tqpair->num_entries = qsize - 1;
2523 6 : qpair = &tqpair->qpair;
2524 6 : rc = nvme_qpair_init(qpair, qid, ctrlr, qprio, num_requests, async);
2525 6 : if (rc != 0) {
2526 0 : free(tqpair);
2527 0 : return NULL;
2528 : }
2529 :
2530 6 : rc = nvme_tcp_alloc_reqs(tqpair);
2531 6 : if (rc) {
2532 0 : nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair);
2533 0 : return NULL;
2534 : }
2535 :
2536 : /* spdk_nvme_qpair_get_optimal_poll_group needs socket information.
2537 : * So create the socket first when creating a qpair. */
2538 6 : rc = nvme_tcp_qpair_connect_sock(ctrlr, qpair);
2539 6 : if (rc) {
2540 2 : nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair);
2541 2 : return NULL;
2542 : }
2543 :
2544 4 : return qpair;
2545 : }
2546 :
2547 : static struct spdk_nvme_qpair *
2548 4 : nvme_tcp_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid,
2549 : const struct spdk_nvme_io_qpair_opts *opts)
2550 : {
2551 8 : return nvme_tcp_ctrlr_create_qpair(ctrlr, qid, opts->io_queue_size, opts->qprio,
2552 4 : opts->io_queue_requests, opts->async_mode);
2553 : }
2554 :
2555 1 : SPDK_LOG_DEPRECATION_REGISTER(nvme_ctrlr_psk, "spdk_nvme_ctrlr_opts.psk", "v24.09", 0);
2556 :
2557 : static int
2558 0 : nvme_tcp_generate_tls_credentials(struct nvme_tcp_ctrlr *tctrlr)
2559 : {
2560 0 : struct spdk_nvme_ctrlr *ctrlr = &tctrlr->ctrlr;
2561 : int rc;
2562 0 : uint8_t psk_retained[SPDK_TLS_PSK_MAX_LEN] = {};
2563 0 : uint8_t psk_configured[SPDK_TLS_PSK_MAX_LEN] = {};
2564 0 : uint8_t pskbuf[SPDK_TLS_PSK_MAX_LEN + 1] = {};
2565 : uint8_t tls_cipher_suite;
2566 0 : uint8_t psk_retained_hash;
2567 0 : uint64_t psk_configured_size;
2568 : uint8_t *psk;
2569 :
2570 0 : if (ctrlr->opts.tls_psk != NULL) {
2571 0 : rc = spdk_key_get_key(ctrlr->opts.tls_psk, pskbuf, SPDK_TLS_PSK_MAX_LEN);
2572 0 : if (rc < 0) {
2573 0 : SPDK_ERRLOG("Failed to obtain key '%s': %s\n",
2574 : spdk_key_get_name(ctrlr->opts.tls_psk), spdk_strerror(-rc));
2575 0 : goto finish;
2576 : }
2577 :
2578 0 : psk = pskbuf;
2579 : } else {
2580 0 : SPDK_LOG_DEPRECATED(nvme_ctrlr_psk);
2581 0 : psk = ctrlr->opts.psk;
2582 : }
2583 :
2584 0 : rc = nvme_tcp_parse_interchange_psk(psk, psk_configured, sizeof(psk_configured),
2585 : &psk_configured_size, &psk_retained_hash);
2586 0 : if (rc < 0) {
2587 0 : SPDK_ERRLOG("Failed to parse PSK interchange!\n");
2588 0 : goto finish;
2589 : }
2590 :
2591 : /* The Base64 string encodes the configured PSK (32 or 48 bytes binary).
2592 : * This check also ensures that psk_configured_size is smaller than
2593 : * psk_retained buffer size. */
2594 0 : if (psk_configured_size == SHA256_DIGEST_LENGTH) {
2595 0 : tls_cipher_suite = NVME_TCP_CIPHER_AES_128_GCM_SHA256;
2596 0 : tctrlr->tls_cipher_suite = "TLS_AES_128_GCM_SHA256";
2597 0 : } else if (psk_configured_size == SHA384_DIGEST_LENGTH) {
2598 0 : tls_cipher_suite = NVME_TCP_CIPHER_AES_256_GCM_SHA384;
2599 0 : tctrlr->tls_cipher_suite = "TLS_AES_256_GCM_SHA384";
2600 : } else {
2601 0 : SPDK_ERRLOG("Unrecognized cipher suite!\n");
2602 0 : rc = -ENOTSUP;
2603 0 : goto finish;
2604 : }
2605 :
2606 0 : rc = nvme_tcp_generate_psk_identity(tctrlr->psk_identity, sizeof(tctrlr->psk_identity),
2607 0 : ctrlr->opts.hostnqn, ctrlr->trid.subnqn,
2608 : tls_cipher_suite);
2609 0 : if (rc) {
2610 0 : SPDK_ERRLOG("could not generate PSK identity\n");
2611 0 : goto finish;
2612 : }
2613 :
2614 : /* No hash indicates that Configured PSK must be used as Retained PSK. */
2615 0 : if (psk_retained_hash == NVME_TCP_HASH_ALGORITHM_NONE) {
2616 0 : assert(psk_configured_size < sizeof(psk_retained));
2617 0 : memcpy(psk_retained, psk_configured, psk_configured_size);
2618 0 : rc = psk_configured_size;
2619 : } else {
2620 : /* Derive retained PSK. */
2621 0 : rc = nvme_tcp_derive_retained_psk(psk_configured, psk_configured_size, ctrlr->opts.hostnqn,
2622 : psk_retained, sizeof(psk_retained), psk_retained_hash);
2623 0 : if (rc < 0) {
2624 0 : SPDK_ERRLOG("Unable to derive retained PSK!\n");
2625 0 : goto finish;
2626 : }
2627 : }
2628 :
2629 0 : rc = nvme_tcp_derive_tls_psk(psk_retained, rc, tctrlr->psk_identity, tctrlr->psk,
2630 : sizeof(tctrlr->psk), tls_cipher_suite);
2631 0 : if (rc < 0) {
2632 0 : SPDK_ERRLOG("Could not generate TLS PSK!\n");
2633 0 : goto finish;
2634 : }
2635 :
2636 0 : tctrlr->psk_size = rc;
2637 0 : rc = 0;
2638 0 : finish:
2639 0 : spdk_memset_s(psk_configured, sizeof(psk_configured), 0, sizeof(psk_configured));
2640 0 : spdk_memset_s(pskbuf, sizeof(pskbuf), 0, sizeof(pskbuf));
2641 :
2642 0 : return rc;
2643 : }
2644 :
2645 : /* We have to use the typedef in the function declaration to appease astyle. */
2646 : typedef struct spdk_nvme_ctrlr spdk_nvme_ctrlr_t;
2647 :
2648 : static spdk_nvme_ctrlr_t *
2649 5 : nvme_tcp_ctrlr_construct(const struct spdk_nvme_transport_id *trid,
2650 : const struct spdk_nvme_ctrlr_opts *opts,
2651 : void *devhandle)
2652 : {
2653 : struct nvme_tcp_ctrlr *tctrlr;
2654 : int rc;
2655 :
2656 5 : tctrlr = calloc(1, sizeof(*tctrlr));
2657 5 : if (tctrlr == NULL) {
2658 0 : SPDK_ERRLOG("could not allocate ctrlr\n");
2659 0 : return NULL;
2660 : }
2661 :
2662 5 : tctrlr->ctrlr.opts = *opts;
2663 5 : tctrlr->ctrlr.trid = *trid;
2664 :
2665 5 : if (opts->psk[0] != '\0' || opts->tls_psk != NULL) {
2666 : /* Only allow either one at a time */
2667 0 : if (opts->tls_psk != NULL && opts->psk[0] != '\0') {
2668 0 : SPDK_ERRLOG("Either spdk_nvme_ctrlr_opts.tls_psk or .psk can be set at "
2669 : "the same time\n");
2670 0 : free(tctrlr);
2671 0 : return NULL;
2672 : }
2673 0 : rc = nvme_tcp_generate_tls_credentials(tctrlr);
2674 0 : spdk_memset_s(&tctrlr->ctrlr.opts.psk, sizeof(tctrlr->ctrlr.opts.psk), 0,
2675 : sizeof(tctrlr->ctrlr.opts.psk));
2676 :
2677 0 : if (rc != 0) {
2678 0 : free(tctrlr);
2679 0 : return NULL;
2680 : }
2681 : }
2682 :
2683 5 : if (opts->transport_ack_timeout > NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT) {
2684 5 : SPDK_NOTICELOG("transport_ack_timeout exceeds max value %d, use max value\n",
2685 : NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT);
2686 5 : tctrlr->ctrlr.opts.transport_ack_timeout = NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT;
2687 : }
2688 :
2689 5 : rc = nvme_ctrlr_construct(&tctrlr->ctrlr);
2690 5 : if (rc != 0) {
2691 0 : free(tctrlr);
2692 0 : return NULL;
2693 : }
2694 :
2695 : /* Sequence might be used not only for data digest offload purposes but
2696 : * to handle a potential COPY operation appended as the result of translation. */
2697 5 : tctrlr->ctrlr.flags |= SPDK_NVME_CTRLR_ACCEL_SEQUENCE_SUPPORTED;
2698 10 : tctrlr->ctrlr.adminq = nvme_tcp_ctrlr_create_qpair(&tctrlr->ctrlr, 0,
2699 5 : tctrlr->ctrlr.opts.admin_queue_size, 0,
2700 5 : tctrlr->ctrlr.opts.admin_queue_size, true);
2701 5 : if (!tctrlr->ctrlr.adminq) {
2702 3 : SPDK_ERRLOG("failed to create admin qpair\n");
2703 3 : nvme_tcp_ctrlr_destruct(&tctrlr->ctrlr);
2704 3 : return NULL;
2705 : }
2706 :
2707 2 : if (nvme_ctrlr_add_process(&tctrlr->ctrlr, 0) != 0) {
2708 0 : SPDK_ERRLOG("nvme_ctrlr_add_process() failed\n");
2709 0 : nvme_ctrlr_destruct(&tctrlr->ctrlr);
2710 0 : return NULL;
2711 : }
2712 :
2713 2 : return &tctrlr->ctrlr;
2714 : }
2715 :
2716 : static uint32_t
2717 0 : nvme_tcp_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr)
2718 : {
2719 : /* TCP transport doesn't limit maximum IO transfer size. */
2720 0 : return UINT32_MAX;
2721 : }
2722 :
2723 : static uint16_t
2724 0 : nvme_tcp_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr)
2725 : {
2726 0 : return NVME_TCP_MAX_SGL_DESCRIPTORS;
2727 : }
2728 :
2729 : static int
2730 0 : nvme_tcp_qpair_iterate_requests(struct spdk_nvme_qpair *qpair,
2731 : int (*iter_fn)(struct nvme_request *req, void *arg),
2732 : void *arg)
2733 : {
2734 0 : struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
2735 : struct nvme_tcp_req *tcp_req, *tmp;
2736 : int rc;
2737 :
2738 0 : assert(iter_fn != NULL);
2739 :
2740 0 : TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) {
2741 0 : assert(tcp_req->req != NULL);
2742 :
2743 0 : rc = iter_fn(tcp_req->req, arg);
2744 0 : if (rc != 0) {
2745 0 : return rc;
2746 : }
2747 : }
2748 :
2749 0 : return 0;
2750 : }
2751 :
2752 : static void
2753 0 : nvme_tcp_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair)
2754 : {
2755 : struct nvme_tcp_req *tcp_req, *tmp;
2756 0 : struct spdk_nvme_cpl cpl = {};
2757 0 : struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
2758 :
2759 0 : cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION;
2760 0 : cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2761 :
2762 0 : TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) {
2763 0 : assert(tcp_req->req != NULL);
2764 0 : if (tcp_req->req->cmd.opc != SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) {
2765 0 : continue;
2766 : }
2767 :
2768 0 : nvme_tcp_req_complete(tcp_req, tqpair, &cpl, false);
2769 : }
2770 0 : }
2771 :
2772 : static struct spdk_nvme_transport_poll_group *
2773 1 : nvme_tcp_poll_group_create(void)
2774 : {
2775 1 : struct nvme_tcp_poll_group *group = calloc(1, sizeof(*group));
2776 :
2777 1 : if (group == NULL) {
2778 0 : SPDK_ERRLOG("Unable to allocate poll group.\n");
2779 0 : return NULL;
2780 : }
2781 :
2782 1 : TAILQ_INIT(&group->needs_poll);
2783 :
2784 1 : group->sock_group = spdk_sock_group_create(group);
2785 1 : if (group->sock_group == NULL) {
2786 0 : free(group);
2787 0 : SPDK_ERRLOG("Unable to allocate sock group.\n");
2788 0 : return NULL;
2789 : }
2790 :
2791 1 : return &group->group;
2792 : }
2793 :
2794 : static struct spdk_nvme_transport_poll_group *
2795 0 : nvme_tcp_qpair_get_optimal_poll_group(struct spdk_nvme_qpair *qpair)
2796 : {
2797 0 : struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
2798 0 : struct spdk_sock_group *group = NULL;
2799 : int rc;
2800 :
2801 0 : rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group, NULL);
2802 0 : if (!rc && group != NULL) {
2803 0 : return spdk_sock_group_get_ctx(group);
2804 : }
2805 :
2806 0 : return NULL;
2807 : }
2808 :
2809 : static int
2810 0 : nvme_tcp_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair)
2811 : {
2812 0 : struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group);
2813 0 : struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
2814 :
2815 0 : if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) {
2816 0 : return -EPROTO;
2817 : }
2818 0 : return 0;
2819 : }
2820 :
2821 : static int
2822 0 : nvme_tcp_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair)
2823 : {
2824 0 : struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group);
2825 0 : struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
2826 :
2827 0 : if (tqpair->needs_poll) {
2828 0 : TAILQ_REMOVE(&group->needs_poll, tqpair, link);
2829 0 : tqpair->needs_poll = false;
2830 : }
2831 :
2832 0 : if (tqpair->sock && group->sock_group) {
2833 0 : if (spdk_sock_group_remove_sock(group->sock_group, tqpair->sock)) {
2834 0 : return -EPROTO;
2835 : }
2836 : }
2837 0 : return 0;
2838 : }
2839 :
2840 : static int
2841 0 : nvme_tcp_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup,
2842 : struct spdk_nvme_qpair *qpair)
2843 : {
2844 0 : struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
2845 0 : struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup);
2846 :
2847 : /* disconnected qpairs won't have a sock to add. */
2848 0 : if (nvme_qpair_get_state(qpair) >= NVME_QPAIR_CONNECTED) {
2849 0 : if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) {
2850 0 : return -EPROTO;
2851 : }
2852 : }
2853 :
2854 0 : return 0;
2855 : }
2856 :
2857 : static int
2858 0 : nvme_tcp_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup,
2859 : struct spdk_nvme_qpair *qpair)
2860 : {
2861 : struct nvme_tcp_qpair *tqpair;
2862 : struct nvme_tcp_poll_group *group;
2863 :
2864 0 : assert(qpair->poll_group_tailq_head == &tgroup->disconnected_qpairs);
2865 :
2866 0 : tqpair = nvme_tcp_qpair(qpair);
2867 0 : group = nvme_tcp_poll_group(tgroup);
2868 :
2869 0 : assert(tqpair->shared_stats == true);
2870 0 : tqpair->stats = &g_dummy_stats;
2871 :
2872 0 : if (tqpair->needs_poll) {
2873 0 : TAILQ_REMOVE(&group->needs_poll, tqpair, link);
2874 0 : tqpair->needs_poll = false;
2875 : }
2876 :
2877 0 : return 0;
2878 : }
2879 :
2880 : static int64_t
2881 2 : nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup,
2882 : uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb)
2883 : {
2884 2 : struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup);
2885 : struct spdk_nvme_qpair *qpair, *tmp_qpair;
2886 : struct nvme_tcp_qpair *tqpair, *tmp_tqpair;
2887 : int num_events;
2888 :
2889 2 : group->completions_per_qpair = completions_per_qpair;
2890 2 : group->num_completions = 0;
2891 2 : group->stats.polls++;
2892 :
2893 2 : num_events = spdk_sock_group_poll(group->sock_group);
2894 :
2895 4 : STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) {
2896 2 : tqpair = nvme_tcp_qpair(qpair);
2897 2 : if (nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTING) {
2898 2 : if (TAILQ_EMPTY(&tqpair->outstanding_reqs)) {
2899 1 : nvme_transport_ctrlr_disconnect_qpair_done(qpair);
2900 : }
2901 : }
2902 : /* Wait until the qpair transitions to the DISCONNECTED state, otherwise user might
2903 : * want to free it from disconnect_qpair_cb, while it's not fully disconnected (and
2904 : * might still have outstanding requests) */
2905 2 : if (nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTED) {
2906 1 : disconnected_qpair_cb(qpair, tgroup->group->ctx);
2907 : }
2908 : }
2909 :
2910 : /* If any qpairs were marked as needing to be polled due to an asynchronous write completion
2911 : * and they weren't polled as a consequence of calling spdk_sock_group_poll above, poll them now. */
2912 2 : TAILQ_FOREACH_SAFE(tqpair, &group->needs_poll, link, tmp_tqpair) {
2913 0 : nvme_tcp_qpair_sock_cb(&tqpair->qpair, group->sock_group, tqpair->sock);
2914 : }
2915 :
2916 2 : if (spdk_unlikely(num_events < 0)) {
2917 0 : return num_events;
2918 : }
2919 :
2920 2 : group->stats.idle_polls += !num_events;
2921 2 : group->stats.socket_completions += num_events;
2922 :
2923 2 : return group->num_completions;
2924 : }
2925 :
2926 : static int
2927 1 : nvme_tcp_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup)
2928 : {
2929 : int rc;
2930 1 : struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup);
2931 :
2932 1 : if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) {
2933 0 : return -EBUSY;
2934 : }
2935 :
2936 1 : rc = spdk_sock_group_close(&group->sock_group);
2937 1 : if (rc != 0) {
2938 0 : SPDK_ERRLOG("Failed to close the sock group for a tcp poll group.\n");
2939 0 : assert(false);
2940 : }
2941 :
2942 1 : free(tgroup);
2943 :
2944 1 : return 0;
2945 : }
2946 :
2947 : static int
2948 3 : nvme_tcp_poll_group_get_stats(struct spdk_nvme_transport_poll_group *tgroup,
2949 : struct spdk_nvme_transport_poll_group_stat **_stats)
2950 : {
2951 : struct nvme_tcp_poll_group *group;
2952 : struct spdk_nvme_transport_poll_group_stat *stats;
2953 :
2954 3 : if (tgroup == NULL || _stats == NULL) {
2955 2 : SPDK_ERRLOG("Invalid stats or group pointer\n");
2956 2 : return -EINVAL;
2957 : }
2958 :
2959 1 : group = nvme_tcp_poll_group(tgroup);
2960 :
2961 1 : stats = calloc(1, sizeof(*stats));
2962 1 : if (!stats) {
2963 0 : SPDK_ERRLOG("Can't allocate memory for TCP stats\n");
2964 0 : return -ENOMEM;
2965 : }
2966 1 : stats->trtype = SPDK_NVME_TRANSPORT_TCP;
2967 1 : memcpy(&stats->tcp, &group->stats, sizeof(group->stats));
2968 :
2969 1 : *_stats = stats;
2970 :
2971 1 : return 0;
2972 : }
2973 :
2974 : static void
2975 1 : nvme_tcp_poll_group_free_stats(struct spdk_nvme_transport_poll_group *tgroup,
2976 : struct spdk_nvme_transport_poll_group_stat *stats)
2977 : {
2978 1 : free(stats);
2979 1 : }
2980 :
2981 : static int
2982 0 : nvme_tcp_ctrlr_get_memory_domains(const struct spdk_nvme_ctrlr *ctrlr,
2983 : struct spdk_memory_domain **domains, int array_size)
2984 : {
2985 0 : if (domains && array_size > 0) {
2986 0 : domains[0] = spdk_memory_domain_get_system_domain();
2987 : }
2988 :
2989 0 : return 1;
2990 : }
2991 :
2992 : const struct spdk_nvme_transport_ops tcp_ops = {
2993 : .name = "TCP",
2994 : .type = SPDK_NVME_TRANSPORT_TCP,
2995 : .ctrlr_construct = nvme_tcp_ctrlr_construct,
2996 : .ctrlr_scan = nvme_fabric_ctrlr_scan,
2997 : .ctrlr_destruct = nvme_tcp_ctrlr_destruct,
2998 : .ctrlr_enable = nvme_tcp_ctrlr_enable,
2999 :
3000 : .ctrlr_set_reg_4 = nvme_fabric_ctrlr_set_reg_4,
3001 : .ctrlr_set_reg_8 = nvme_fabric_ctrlr_set_reg_8,
3002 : .ctrlr_get_reg_4 = nvme_fabric_ctrlr_get_reg_4,
3003 : .ctrlr_get_reg_8 = nvme_fabric_ctrlr_get_reg_8,
3004 : .ctrlr_set_reg_4_async = nvme_fabric_ctrlr_set_reg_4_async,
3005 : .ctrlr_set_reg_8_async = nvme_fabric_ctrlr_set_reg_8_async,
3006 : .ctrlr_get_reg_4_async = nvme_fabric_ctrlr_get_reg_4_async,
3007 : .ctrlr_get_reg_8_async = nvme_fabric_ctrlr_get_reg_8_async,
3008 :
3009 : .ctrlr_get_max_xfer_size = nvme_tcp_ctrlr_get_max_xfer_size,
3010 : .ctrlr_get_max_sges = nvme_tcp_ctrlr_get_max_sges,
3011 :
3012 : .ctrlr_create_io_qpair = nvme_tcp_ctrlr_create_io_qpair,
3013 : .ctrlr_delete_io_qpair = nvme_tcp_ctrlr_delete_io_qpair,
3014 : .ctrlr_connect_qpair = nvme_tcp_ctrlr_connect_qpair,
3015 : .ctrlr_disconnect_qpair = nvme_tcp_ctrlr_disconnect_qpair,
3016 :
3017 : .ctrlr_get_memory_domains = nvme_tcp_ctrlr_get_memory_domains,
3018 :
3019 : .qpair_abort_reqs = nvme_tcp_qpair_abort_reqs,
3020 : .qpair_reset = nvme_tcp_qpair_reset,
3021 : .qpair_submit_request = nvme_tcp_qpair_submit_request,
3022 : .qpair_process_completions = nvme_tcp_qpair_process_completions,
3023 : .qpair_iterate_requests = nvme_tcp_qpair_iterate_requests,
3024 : .admin_qpair_abort_aers = nvme_tcp_admin_qpair_abort_aers,
3025 :
3026 : .poll_group_create = nvme_tcp_poll_group_create,
3027 : .qpair_get_optimal_poll_group = nvme_tcp_qpair_get_optimal_poll_group,
3028 : .poll_group_connect_qpair = nvme_tcp_poll_group_connect_qpair,
3029 : .poll_group_disconnect_qpair = nvme_tcp_poll_group_disconnect_qpair,
3030 : .poll_group_add = nvme_tcp_poll_group_add,
3031 : .poll_group_remove = nvme_tcp_poll_group_remove,
3032 : .poll_group_process_completions = nvme_tcp_poll_group_process_completions,
3033 : .poll_group_destroy = nvme_tcp_poll_group_destroy,
3034 : .poll_group_get_stats = nvme_tcp_poll_group_get_stats,
3035 : .poll_group_free_stats = nvme_tcp_poll_group_free_stats,
3036 : };
3037 :
3038 1 : SPDK_NVME_TRANSPORT_REGISTER(tcp, &tcp_ops);
3039 :
3040 1 : SPDK_TRACE_REGISTER_FN(nvme_tcp, "nvme_tcp", TRACE_GROUP_NVME_TCP)
3041 : {
3042 0 : struct spdk_trace_tpoint_opts opts[] = {
3043 : {
3044 : "NVME_TCP_SUBMIT", TRACE_NVME_TCP_SUBMIT,
3045 : OWNER_TYPE_NVME_TCP_QP, OBJECT_NVME_TCP_REQ, 1,
3046 : { { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 },
3047 : { "cid", SPDK_TRACE_ARG_TYPE_INT, 4 },
3048 : { "opc", SPDK_TRACE_ARG_TYPE_INT, 4 },
3049 : { "dw10", SPDK_TRACE_ARG_TYPE_PTR, 4 },
3050 : { "dw11", SPDK_TRACE_ARG_TYPE_PTR, 4 },
3051 : { "dw12", SPDK_TRACE_ARG_TYPE_PTR, 4 },
3052 : { "qd", SPDK_TRACE_ARG_TYPE_INT, 4 }
3053 : }
3054 : },
3055 : {
3056 : "NVME_TCP_COMPLETE", TRACE_NVME_TCP_COMPLETE,
3057 : OWNER_TYPE_NVME_TCP_QP, OBJECT_NVME_TCP_REQ, 0,
3058 : { { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 },
3059 : { "cid", SPDK_TRACE_ARG_TYPE_INT, 4 },
3060 : { "cpl", SPDK_TRACE_ARG_TYPE_PTR, 4 },
3061 : { "qd", SPDK_TRACE_ARG_TYPE_INT, 4 }
3062 : }
3063 : },
3064 : };
3065 :
3066 0 : spdk_trace_register_object(OBJECT_NVME_TCP_REQ, 'p');
3067 0 : spdk_trace_register_owner_type(OWNER_TYPE_NVME_TCP_QP, 'q');
3068 0 : spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
3069 0 : }
|