Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2016 Intel Corporation.
3 : * All rights reserved.
4 : */
5 :
6 : #include "spdk/stdinc.h"
7 : #include "spdk/likely.h"
8 :
9 : #include "spdk_internal/event.h"
10 : #include "spdk_internal/usdt.h"
11 :
12 : #include "spdk/log.h"
13 : #include "spdk/thread.h"
14 : #include "spdk/env.h"
15 : #include "spdk/util.h"
16 : #include "spdk/scheduler.h"
17 : #include "spdk/string.h"
18 : #include "spdk/fd_group.h"
19 :
20 : #ifdef __linux__
21 : #include <sys/prctl.h>
22 : #include <sys/eventfd.h>
23 : #endif
24 :
25 : #ifdef __FreeBSD__
26 : #include <pthread_np.h>
27 : #endif
28 :
29 : #define SPDK_EVENT_BATCH_SIZE 8
30 :
31 : static struct spdk_reactor *g_reactors;
32 : static uint32_t g_reactor_count;
33 : static struct spdk_cpuset g_reactor_core_mask;
34 : static enum spdk_reactor_state g_reactor_state = SPDK_REACTOR_STATE_UNINITIALIZED;
35 :
36 : static bool g_framework_context_switch_monitor_enabled = true;
37 :
38 : static struct spdk_mempool *g_spdk_event_mempool = NULL;
39 :
40 : TAILQ_HEAD(, spdk_scheduler) g_scheduler_list
41 : = TAILQ_HEAD_INITIALIZER(g_scheduler_list);
42 :
43 : static struct spdk_scheduler *g_scheduler = NULL;
44 : static struct spdk_reactor *g_scheduling_reactor;
45 : bool g_scheduling_in_progress = false;
46 : static uint64_t g_scheduler_period = 0;
47 : static uint32_t g_scheduler_core_number;
48 : static struct spdk_scheduler_core_info *g_core_infos = NULL;
49 :
50 : TAILQ_HEAD(, spdk_governor) g_governor_list
51 : = TAILQ_HEAD_INITIALIZER(g_governor_list);
52 :
53 : static struct spdk_governor *g_governor = NULL;
54 :
55 : static int reactor_interrupt_init(struct spdk_reactor *reactor);
56 : static void reactor_interrupt_fini(struct spdk_reactor *reactor);
57 :
58 : static pthread_mutex_t g_stopping_reactors_mtx = PTHREAD_MUTEX_INITIALIZER;
59 : static bool g_stopping_reactors = false;
60 :
61 : static struct spdk_scheduler *
62 5 : _scheduler_find(const char *name)
63 : {
64 : struct spdk_scheduler *tmp;
65 :
66 9 : TAILQ_FOREACH(tmp, &g_scheduler_list, link) {
67 7 : if (strcmp(name, tmp->name) == 0) {
68 3 : return tmp;
69 : }
70 : }
71 :
72 2 : return NULL;
73 : }
74 :
75 : int
76 3 : spdk_scheduler_set(const char *name)
77 : {
78 : struct spdk_scheduler *scheduler;
79 3 : int rc = 0;
80 :
81 : /* NULL scheduler was specifically requested */
82 3 : if (name == NULL) {
83 0 : if (g_scheduler) {
84 0 : g_scheduler->deinit();
85 : }
86 0 : g_scheduler = NULL;
87 0 : return 0;
88 : }
89 :
90 3 : scheduler = _scheduler_find(name);
91 3 : if (scheduler == NULL) {
92 0 : SPDK_ERRLOG("Requested scheduler is missing\n");
93 0 : return -EINVAL;
94 : }
95 :
96 3 : if (g_scheduler == scheduler) {
97 2 : return 0;
98 : }
99 :
100 1 : rc = scheduler->init();
101 1 : if (rc == 0) {
102 1 : if (g_scheduler) {
103 0 : g_scheduler->deinit();
104 : }
105 1 : g_scheduler = scheduler;
106 : }
107 :
108 1 : return rc;
109 : }
110 :
111 : struct spdk_scheduler *
112 6 : spdk_scheduler_get(void)
113 : {
114 6 : return g_scheduler;
115 : }
116 :
117 : uint64_t
118 0 : spdk_scheduler_get_period(void)
119 : {
120 : /* Convert from ticks to microseconds */
121 0 : return (g_scheduler_period * SPDK_SEC_TO_USEC / spdk_get_ticks_hz());
122 : }
123 :
124 : void
125 0 : spdk_scheduler_set_period(uint64_t period)
126 : {
127 : /* Convert microseconds to ticks */
128 0 : g_scheduler_period = period * spdk_get_ticks_hz() / SPDK_SEC_TO_USEC;
129 0 : }
130 :
131 : void
132 2 : spdk_scheduler_register(struct spdk_scheduler *scheduler)
133 : {
134 2 : if (_scheduler_find(scheduler->name)) {
135 0 : SPDK_ERRLOG("scheduler named '%s' already registered.\n", scheduler->name);
136 0 : assert(false);
137 : return;
138 : }
139 :
140 2 : TAILQ_INSERT_TAIL(&g_scheduler_list, scheduler, link);
141 : }
142 :
143 : static void
144 27 : reactor_construct(struct spdk_reactor *reactor, uint32_t lcore)
145 : {
146 27 : reactor->lcore = lcore;
147 27 : reactor->flags.is_valid = true;
148 :
149 27 : TAILQ_INIT(&reactor->threads);
150 27 : reactor->thread_count = 0;
151 27 : spdk_cpuset_zero(&reactor->notify_cpuset);
152 :
153 27 : reactor->events = spdk_ring_create(SPDK_RING_TYPE_MP_SC, 65536, SPDK_ENV_SOCKET_ID_ANY);
154 27 : if (reactor->events == NULL) {
155 0 : SPDK_ERRLOG("Failed to allocate events ring\n");
156 0 : assert(false);
157 : }
158 :
159 : /* Always initialize interrupt facilities for reactor */
160 27 : if (reactor_interrupt_init(reactor) != 0) {
161 : /* Reactor interrupt facilities are necessary if seting app to interrupt mode. */
162 0 : if (spdk_interrupt_mode_is_enabled()) {
163 0 : SPDK_ERRLOG("Failed to prepare intr facilities\n");
164 0 : assert(false);
165 : }
166 0 : return;
167 : }
168 :
169 : /* If application runs with full interrupt ability,
170 : * all reactors are going to run in interrupt mode.
171 : */
172 27 : if (spdk_interrupt_mode_is_enabled()) {
173 : uint32_t i;
174 :
175 0 : SPDK_ENV_FOREACH_CORE(i) {
176 0 : spdk_cpuset_set_cpu(&reactor->notify_cpuset, i, true);
177 : }
178 0 : reactor->in_interrupt = true;
179 : }
180 : }
181 :
182 : struct spdk_reactor *
183 405 : spdk_reactor_get(uint32_t lcore)
184 : {
185 : struct spdk_reactor *reactor;
186 :
187 405 : if (g_reactors == NULL) {
188 0 : SPDK_WARNLOG("Called spdk_reactor_get() while the g_reactors array was NULL!\n");
189 0 : return NULL;
190 : }
191 :
192 405 : if (lcore >= g_reactor_count) {
193 0 : return NULL;
194 : }
195 :
196 405 : reactor = &g_reactors[lcore];
197 :
198 405 : if (reactor->flags.is_valid == false) {
199 0 : return NULL;
200 : }
201 :
202 405 : return reactor;
203 : }
204 :
205 : static int reactor_thread_op(struct spdk_thread *thread, enum spdk_thread_op op);
206 : static bool reactor_thread_op_supported(enum spdk_thread_op op);
207 :
208 : int
209 9 : spdk_reactors_init(size_t msg_mempool_size)
210 : {
211 : struct spdk_reactor *reactor;
212 : int rc;
213 : uint32_t i, current_core;
214 9 : char mempool_name[32];
215 :
216 9 : snprintf(mempool_name, sizeof(mempool_name), "evtpool_%d", getpid());
217 9 : g_spdk_event_mempool = spdk_mempool_create(mempool_name,
218 : 262144 - 1, /* Power of 2 minus 1 is optimal for memory consumption */
219 : sizeof(struct spdk_event),
220 : SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
221 : SPDK_ENV_SOCKET_ID_ANY);
222 :
223 9 : if (g_spdk_event_mempool == NULL) {
224 0 : SPDK_ERRLOG("spdk_event_mempool creation failed\n");
225 0 : return -1;
226 : }
227 :
228 : /* struct spdk_reactor must be aligned on 64 byte boundary */
229 9 : g_reactor_count = spdk_env_get_last_core() + 1;
230 9 : rc = posix_memalign((void **)&g_reactors, 64,
231 : g_reactor_count * sizeof(struct spdk_reactor));
232 9 : if (rc != 0) {
233 0 : SPDK_ERRLOG("Could not allocate array size=%u for g_reactors\n",
234 : g_reactor_count);
235 0 : spdk_mempool_free(g_spdk_event_mempool);
236 0 : return -1;
237 : }
238 :
239 9 : g_core_infos = calloc(g_reactor_count, sizeof(*g_core_infos));
240 9 : if (g_core_infos == NULL) {
241 0 : SPDK_ERRLOG("Could not allocate memory for g_core_infos\n");
242 0 : spdk_mempool_free(g_spdk_event_mempool);
243 0 : free(g_reactors);
244 0 : return -ENOMEM;
245 : }
246 :
247 9 : memset(g_reactors, 0, (g_reactor_count) * sizeof(struct spdk_reactor));
248 :
249 9 : rc = spdk_thread_lib_init_ext(reactor_thread_op, reactor_thread_op_supported,
250 : sizeof(struct spdk_lw_thread), msg_mempool_size);
251 9 : if (rc != 0) {
252 0 : SPDK_ERRLOG("Initialize spdk thread lib failed\n");
253 0 : spdk_mempool_free(g_spdk_event_mempool);
254 0 : free(g_reactors);
255 0 : free(g_core_infos);
256 0 : return rc;
257 : }
258 :
259 35 : SPDK_ENV_FOREACH_CORE(i) {
260 26 : reactor_construct(&g_reactors[i], i);
261 : }
262 :
263 9 : current_core = spdk_env_get_current_core();
264 9 : reactor = spdk_reactor_get(current_core);
265 9 : assert(reactor != NULL);
266 9 : g_scheduling_reactor = reactor;
267 :
268 9 : g_reactor_state = SPDK_REACTOR_STATE_INITIALIZED;
269 :
270 9 : return 0;
271 : }
272 :
273 : void
274 9 : spdk_reactors_fini(void)
275 : {
276 : uint32_t i;
277 : struct spdk_reactor *reactor;
278 :
279 9 : if (g_reactor_state == SPDK_REACTOR_STATE_UNINITIALIZED) {
280 0 : return;
281 : }
282 :
283 9 : spdk_thread_lib_fini();
284 :
285 35 : SPDK_ENV_FOREACH_CORE(i) {
286 26 : reactor = spdk_reactor_get(i);
287 26 : assert(reactor != NULL);
288 26 : assert(reactor->thread_count == 0);
289 26 : if (reactor->events != NULL) {
290 26 : spdk_ring_free(reactor->events);
291 : }
292 :
293 26 : reactor_interrupt_fini(reactor);
294 :
295 26 : if (g_core_infos != NULL) {
296 26 : free(g_core_infos[i].thread_infos);
297 : }
298 : }
299 :
300 9 : spdk_mempool_free(g_spdk_event_mempool);
301 :
302 9 : free(g_reactors);
303 9 : g_reactors = NULL;
304 9 : free(g_core_infos);
305 9 : g_core_infos = NULL;
306 : }
307 :
308 : static void _reactor_set_interrupt_mode(void *arg1, void *arg2);
309 :
310 : static void
311 4 : _reactor_set_notify_cpuset(void *arg1, void *arg2)
312 : {
313 4 : struct spdk_reactor *target = arg1;
314 4 : struct spdk_reactor *reactor = spdk_reactor_get(spdk_env_get_current_core());
315 :
316 4 : assert(reactor != NULL);
317 4 : spdk_cpuset_set_cpu(&reactor->notify_cpuset, target->lcore, target->new_in_interrupt);
318 4 : }
319 :
320 : static void
321 20 : _event_call(uint32_t lcore, spdk_event_fn fn, void *arg1, void *arg2)
322 : {
323 : struct spdk_event *ev;
324 :
325 20 : ev = spdk_event_allocate(lcore, fn, arg1, arg2);
326 20 : assert(ev);
327 20 : spdk_event_call(ev);
328 20 : }
329 :
330 : static void
331 2 : _reactor_set_notify_cpuset_cpl(void *arg1, void *arg2)
332 : {
333 2 : struct spdk_reactor *target = arg1;
334 :
335 2 : if (target->new_in_interrupt == false) {
336 1 : target->set_interrupt_mode_in_progress = false;
337 1 : spdk_thread_send_msg(spdk_thread_get_app_thread(), target->set_interrupt_mode_cb_fn,
338 : target->set_interrupt_mode_cb_arg);
339 : } else {
340 1 : _event_call(target->lcore, _reactor_set_interrupt_mode, target, NULL);
341 : }
342 2 : }
343 :
344 : static void
345 0 : _reactor_set_thread_interrupt_mode(void *ctx)
346 : {
347 0 : struct spdk_reactor *reactor = ctx;
348 :
349 0 : spdk_thread_set_interrupt_mode(reactor->in_interrupt);
350 0 : }
351 :
352 : static void
353 2 : _reactor_set_interrupt_mode(void *arg1, void *arg2)
354 : {
355 2 : struct spdk_reactor *target = arg1;
356 : struct spdk_thread *thread;
357 : struct spdk_fd_group *grp;
358 : struct spdk_lw_thread *lw_thread, *tmp;
359 :
360 2 : assert(target == spdk_reactor_get(spdk_env_get_current_core()));
361 2 : assert(target != NULL);
362 2 : assert(target->in_interrupt != target->new_in_interrupt);
363 2 : SPDK_DEBUGLOG(reactor, "Do reactor set on core %u from %s to state %s\n",
364 : target->lcore, target->in_interrupt ? "intr" : "poll", target->new_in_interrupt ? "intr" : "poll");
365 :
366 2 : target->in_interrupt = target->new_in_interrupt;
367 :
368 2 : if (spdk_interrupt_mode_is_enabled()) {
369 : /* Align spdk_thread with reactor to interrupt mode or poll mode */
370 0 : TAILQ_FOREACH_SAFE(lw_thread, &target->threads, link, tmp) {
371 0 : thread = spdk_thread_get_from_ctx(lw_thread);
372 0 : if (target->in_interrupt) {
373 0 : grp = spdk_thread_get_interrupt_fd_group(thread);
374 0 : spdk_fd_group_nest(target->fgrp, grp);
375 : } else {
376 0 : grp = spdk_thread_get_interrupt_fd_group(thread);
377 0 : spdk_fd_group_unnest(target->fgrp, grp);
378 : }
379 :
380 0 : spdk_thread_send_msg(thread, _reactor_set_thread_interrupt_mode, target);
381 : }
382 : }
383 :
384 2 : if (target->new_in_interrupt == false) {
385 : /* Reactor is no longer in interrupt mode. Refresh the tsc_last to accurately
386 : * track reactor stats. */
387 1 : target->tsc_last = spdk_get_ticks();
388 1 : spdk_for_each_reactor(_reactor_set_notify_cpuset, target, NULL, _reactor_set_notify_cpuset_cpl);
389 : } else {
390 1 : uint64_t notify = 1;
391 1 : int rc = 0;
392 :
393 : /* Always trigger spdk_event and resched event in case of race condition */
394 1 : rc = write(target->events_fd, ¬ify, sizeof(notify));
395 1 : if (rc < 0) {
396 0 : SPDK_ERRLOG("failed to notify event queue: %s.\n", spdk_strerror(errno));
397 : }
398 1 : rc = write(target->resched_fd, ¬ify, sizeof(notify));
399 1 : if (rc < 0) {
400 0 : SPDK_ERRLOG("failed to notify reschedule: %s.\n", spdk_strerror(errno));
401 : }
402 :
403 1 : target->set_interrupt_mode_in_progress = false;
404 1 : spdk_thread_send_msg(spdk_thread_get_app_thread(), target->set_interrupt_mode_cb_fn,
405 : target->set_interrupt_mode_cb_arg);
406 : }
407 2 : }
408 :
409 : int
410 2 : spdk_reactor_set_interrupt_mode(uint32_t lcore, bool new_in_interrupt,
411 : spdk_reactor_set_interrupt_mode_cb cb_fn, void *cb_arg)
412 : {
413 : struct spdk_reactor *target;
414 :
415 2 : target = spdk_reactor_get(lcore);
416 2 : if (target == NULL) {
417 0 : return -EINVAL;
418 : }
419 :
420 : /* Eventfd has to be supported in order to use interrupt functionality. */
421 2 : if (target->fgrp == NULL) {
422 0 : return -ENOTSUP;
423 : }
424 :
425 2 : if (!spdk_thread_is_app_thread(NULL)) {
426 0 : SPDK_ERRLOG("It is only permitted within spdk application thread.\n");
427 0 : return -EPERM;
428 : }
429 :
430 2 : if (target->in_interrupt == new_in_interrupt) {
431 0 : cb_fn(cb_arg);
432 0 : return 0;
433 : }
434 :
435 2 : if (target->set_interrupt_mode_in_progress) {
436 0 : SPDK_NOTICELOG("Reactor(%u) is already in progress to set interrupt mode\n", lcore);
437 0 : return -EBUSY;
438 : }
439 2 : target->set_interrupt_mode_in_progress = true;
440 :
441 2 : target->new_in_interrupt = new_in_interrupt;
442 2 : target->set_interrupt_mode_cb_fn = cb_fn;
443 2 : target->set_interrupt_mode_cb_arg = cb_arg;
444 :
445 2 : SPDK_DEBUGLOG(reactor, "Starting reactor event from %d to %d\n",
446 : spdk_env_get_current_core(), lcore);
447 :
448 2 : if (new_in_interrupt == false) {
449 : /* For potential race cases, when setting the reactor to poll mode,
450 : * first change the mode of the reactor and then clear the corresponding
451 : * bit of the notify_cpuset of each reactor.
452 : */
453 1 : _event_call(lcore, _reactor_set_interrupt_mode, target, NULL);
454 : } else {
455 : /* For race cases, when setting the reactor to interrupt mode, first set the
456 : * corresponding bit of the notify_cpuset of each reactor and then change the mode.
457 : */
458 1 : spdk_for_each_reactor(_reactor_set_notify_cpuset, target, NULL, _reactor_set_notify_cpuset_cpl);
459 : }
460 :
461 2 : return 0;
462 : }
463 :
464 : struct spdk_event *
465 51 : spdk_event_allocate(uint32_t lcore, spdk_event_fn fn, void *arg1, void *arg2)
466 : {
467 51 : struct spdk_event *event = NULL;
468 51 : struct spdk_reactor *reactor = spdk_reactor_get(lcore);
469 :
470 51 : if (!reactor) {
471 0 : assert(false);
472 : return NULL;
473 : }
474 :
475 51 : event = spdk_mempool_get(g_spdk_event_mempool);
476 51 : if (event == NULL) {
477 0 : assert(false);
478 : return NULL;
479 : }
480 :
481 51 : event->lcore = lcore;
482 51 : event->fn = fn;
483 51 : event->arg1 = arg1;
484 51 : event->arg2 = arg2;
485 :
486 51 : return event;
487 : }
488 :
489 : void
490 51 : spdk_event_call(struct spdk_event *event)
491 : {
492 : int rc;
493 : struct spdk_reactor *reactor;
494 51 : struct spdk_reactor *local_reactor = NULL;
495 51 : uint32_t current_core = spdk_env_get_current_core();
496 :
497 51 : reactor = spdk_reactor_get(event->lcore);
498 :
499 51 : assert(reactor != NULL);
500 51 : assert(reactor->events != NULL);
501 :
502 51 : rc = spdk_ring_enqueue(reactor->events, (void **)&event, 1, NULL);
503 51 : if (rc != 1) {
504 0 : assert(false);
505 : }
506 :
507 51 : if (current_core != SPDK_ENV_LCORE_ID_ANY) {
508 51 : local_reactor = spdk_reactor_get(current_core);
509 : }
510 :
511 : /* If spdk_event_call isn't called on a reactor, always send a notification.
512 : * If it is called on a reactor, send a notification if the destination reactor
513 : * is indicated in interrupt mode state.
514 : */
515 51 : if (spdk_unlikely(local_reactor == NULL) ||
516 51 : spdk_unlikely(spdk_cpuset_get_cpu(&local_reactor->notify_cpuset, event->lcore))) {
517 4 : uint64_t notify = 1;
518 :
519 4 : rc = write(reactor->events_fd, ¬ify, sizeof(notify));
520 4 : if (rc < 0) {
521 0 : SPDK_ERRLOG("failed to notify event queue: %s.\n", spdk_strerror(errno));
522 : }
523 : }
524 51 : }
525 :
526 : static inline int
527 118 : event_queue_run_batch(void *arg)
528 : {
529 118 : struct spdk_reactor *reactor = arg;
530 : size_t count, i;
531 118 : void *events[SPDK_EVENT_BATCH_SIZE];
532 : struct spdk_thread *thread;
533 : struct spdk_lw_thread *lw_thread;
534 :
535 : #ifdef DEBUG
536 : /*
537 : * spdk_ring_dequeue() fills events and returns how many entries it wrote,
538 : * so we will never actually read uninitialized data from events, but just to be sure
539 : * (and to silence a static analyzer false positive), initialize the array to NULL pointers.
540 : */
541 118 : memset(events, 0, sizeof(events));
542 : #endif
543 :
544 : /* Operate event notification if this reactor currently runs in interrupt state */
545 118 : if (spdk_unlikely(reactor->in_interrupt)) {
546 3 : uint64_t notify = 1;
547 : int rc;
548 :
549 : /* There may be race between event_acknowledge and another producer's event_notify,
550 : * so event_acknowledge should be applied ahead. And then check for self's event_notify.
551 : * This can avoid event notification missing.
552 : */
553 3 : rc = read(reactor->events_fd, ¬ify, sizeof(notify));
554 3 : if (rc < 0) {
555 0 : SPDK_ERRLOG("failed to acknowledge event queue: %s.\n", spdk_strerror(errno));
556 0 : return -errno;
557 : }
558 :
559 3 : count = spdk_ring_dequeue(reactor->events, events, SPDK_EVENT_BATCH_SIZE);
560 :
561 3 : if (spdk_ring_count(reactor->events) != 0) {
562 : /* Trigger new notification if there are still events in event-queue waiting for processing. */
563 0 : rc = write(reactor->events_fd, ¬ify, sizeof(notify));
564 0 : if (rc < 0) {
565 0 : SPDK_ERRLOG("failed to notify event queue: %s.\n", spdk_strerror(errno));
566 0 : return -errno;
567 : }
568 : }
569 : } else {
570 115 : count = spdk_ring_dequeue(reactor->events, events, SPDK_EVENT_BATCH_SIZE);
571 : }
572 :
573 118 : if (count == 0) {
574 69 : return 0;
575 : }
576 :
577 : /* Execute the events. There are still some remaining events
578 : * that must occur on an SPDK thread. To accommodate those, try to
579 : * run them on the first thread in the list, if it exists. */
580 49 : lw_thread = TAILQ_FIRST(&reactor->threads);
581 49 : if (lw_thread) {
582 17 : thread = spdk_thread_get_from_ctx(lw_thread);
583 : } else {
584 32 : thread = NULL;
585 : }
586 :
587 100 : for (i = 0; i < count; i++) {
588 51 : struct spdk_event *event = events[i];
589 :
590 51 : assert(event != NULL);
591 51 : spdk_set_thread(thread);
592 :
593 : SPDK_DTRACE_PROBE3(event_exec, event->fn,
594 : event->arg1, event->arg2);
595 51 : event->fn(event->arg1, event->arg2);
596 51 : spdk_set_thread(NULL);
597 : }
598 :
599 49 : spdk_mempool_put_bulk(g_spdk_event_mempool, events, count);
600 :
601 49 : return (int)count;
602 : }
603 :
604 : /* 1s */
605 : #define CONTEXT_SWITCH_MONITOR_PERIOD 1000000
606 :
607 : static int
608 8 : get_rusage(struct spdk_reactor *reactor)
609 : {
610 8 : struct rusage rusage;
611 :
612 8 : if (getrusage(RUSAGE_THREAD, &rusage) != 0) {
613 0 : return -1;
614 : }
615 :
616 8 : if (rusage.ru_nvcsw != reactor->rusage.ru_nvcsw || rusage.ru_nivcsw != reactor->rusage.ru_nivcsw) {
617 8 : SPDK_INFOLOG(reactor,
618 : "Reactor %d: %ld voluntary context switches and %ld involuntary context switches in the last second.\n",
619 : reactor->lcore, rusage.ru_nvcsw - reactor->rusage.ru_nvcsw,
620 : rusage.ru_nivcsw - reactor->rusage.ru_nivcsw);
621 : }
622 8 : reactor->rusage = rusage;
623 :
624 8 : return -1;
625 : }
626 :
627 : void
628 0 : spdk_framework_enable_context_switch_monitor(bool enable)
629 : {
630 : /* This global is being read by multiple threads, so this isn't
631 : * strictly thread safe. However, we're toggling between true and
632 : * false here, and if a thread sees the value update later than it
633 : * should, it's no big deal. */
634 0 : g_framework_context_switch_monitor_enabled = enable;
635 0 : }
636 :
637 : bool
638 0 : spdk_framework_context_switch_monitor_enabled(void)
639 : {
640 0 : return g_framework_context_switch_monitor_enabled;
641 : }
642 :
643 : static void
644 9 : _set_thread_name(const char *thread_name)
645 : {
646 : #if defined(__linux__)
647 9 : prctl(PR_SET_NAME, thread_name, 0, 0, 0);
648 : #elif defined(__FreeBSD__)
649 : pthread_set_name_np(pthread_self(), thread_name);
650 : #else
651 : pthread_setname_np(pthread_self(), thread_name);
652 : #endif
653 9 : }
654 :
655 : static void
656 15 : _init_thread_stats(struct spdk_reactor *reactor, struct spdk_lw_thread *lw_thread)
657 : {
658 15 : struct spdk_thread *thread = spdk_thread_get_from_ctx(lw_thread);
659 : struct spdk_thread_stats prev_total_stats;
660 :
661 : /* Read total_stats before updating it to calculate stats during the last scheduling period. */
662 15 : prev_total_stats = lw_thread->total_stats;
663 :
664 15 : spdk_set_thread(thread);
665 15 : spdk_thread_get_stats(&lw_thread->total_stats);
666 15 : spdk_set_thread(NULL);
667 :
668 15 : lw_thread->current_stats.busy_tsc = lw_thread->total_stats.busy_tsc - prev_total_stats.busy_tsc;
669 15 : lw_thread->current_stats.idle_tsc = lw_thread->total_stats.idle_tsc - prev_total_stats.idle_tsc;
670 15 : }
671 :
672 : static void
673 8 : _threads_reschedule_thread(struct spdk_scheduler_thread_info *thread_info)
674 : {
675 : struct spdk_lw_thread *lw_thread;
676 : struct spdk_thread *thread;
677 :
678 8 : thread = spdk_thread_get_by_id(thread_info->thread_id);
679 8 : if (thread == NULL) {
680 : /* Thread no longer exists. */
681 0 : return;
682 : }
683 8 : lw_thread = spdk_thread_get_ctx(thread);
684 8 : assert(lw_thread != NULL);
685 :
686 8 : lw_thread->lcore = thread_info->lcore;
687 8 : lw_thread->resched = true;
688 : }
689 :
690 : static void
691 6 : _threads_reschedule(struct spdk_scheduler_core_info *cores_info)
692 : {
693 : struct spdk_scheduler_core_info *core;
694 : struct spdk_scheduler_thread_info *thread_info;
695 : uint32_t i, j;
696 :
697 21 : SPDK_ENV_FOREACH_CORE(i) {
698 15 : core = &cores_info[i];
699 30 : for (j = 0; j < core->threads_count; j++) {
700 15 : thread_info = &core->thread_infos[j];
701 15 : if (thread_info->lcore != i) {
702 8 : _threads_reschedule_thread(thread_info);
703 : }
704 : }
705 15 : core->threads_count = 0;
706 15 : free(core->thread_infos);
707 15 : core->thread_infos = NULL;
708 : }
709 6 : }
710 :
711 : static void
712 6 : _reactors_scheduler_fini(void)
713 : {
714 : /* Reschedule based on the balancing output */
715 6 : _threads_reschedule(g_core_infos);
716 :
717 6 : g_scheduling_in_progress = false;
718 6 : }
719 :
720 : static void
721 8 : _reactors_scheduler_update_core_mode(void *ctx)
722 : {
723 : struct spdk_reactor *reactor;
724 : uint32_t i;
725 8 : int rc = 0;
726 :
727 21 : for (i = g_scheduler_core_number; i < SPDK_ENV_LCORE_ID_ANY; i = spdk_env_get_next_core(i)) {
728 15 : reactor = spdk_reactor_get(i);
729 15 : assert(reactor != NULL);
730 15 : if (reactor->in_interrupt != g_core_infos[i].interrupt_mode) {
731 : /* Switch next found reactor to new state */
732 2 : rc = spdk_reactor_set_interrupt_mode(i, g_core_infos[i].interrupt_mode,
733 : _reactors_scheduler_update_core_mode, NULL);
734 2 : if (rc == 0) {
735 : /* Set core to start with after callback completes */
736 2 : g_scheduler_core_number = spdk_env_get_next_core(i);
737 2 : return;
738 : }
739 : }
740 : }
741 6 : _reactors_scheduler_fini();
742 : }
743 :
744 : static void
745 0 : _reactors_scheduler_cancel(void *arg1, void *arg2)
746 : {
747 : struct spdk_scheduler_core_info *core;
748 : uint32_t i;
749 :
750 0 : SPDK_ENV_FOREACH_CORE(i) {
751 0 : core = &g_core_infos[i];
752 0 : core->threads_count = 0;
753 0 : free(core->thread_infos);
754 0 : core->thread_infos = NULL;
755 : }
756 :
757 0 : g_scheduling_in_progress = false;
758 0 : }
759 :
760 : static void
761 6 : _reactors_scheduler_balance(void *arg1, void *arg2)
762 : {
763 6 : struct spdk_scheduler *scheduler = spdk_scheduler_get();
764 :
765 6 : if (g_reactor_state != SPDK_REACTOR_STATE_RUNNING || scheduler == NULL) {
766 0 : _reactors_scheduler_cancel(NULL, NULL);
767 0 : return;
768 : }
769 :
770 6 : scheduler->balance(g_core_infos, g_reactor_count);
771 :
772 6 : g_scheduler_core_number = spdk_env_get_first_core();
773 6 : _reactors_scheduler_update_core_mode(NULL);
774 : }
775 :
776 : /* Phase 1 of thread scheduling is to gather metrics on the existing threads */
777 : static void
778 15 : _reactors_scheduler_gather_metrics(void *arg1, void *arg2)
779 : {
780 : struct spdk_scheduler_core_info *core_info;
781 : struct spdk_lw_thread *lw_thread;
782 : struct spdk_thread *thread;
783 : struct spdk_reactor *reactor;
784 : uint32_t next_core;
785 15 : uint32_t i = 0;
786 :
787 15 : reactor = spdk_reactor_get(spdk_env_get_current_core());
788 15 : assert(reactor != NULL);
789 15 : core_info = &g_core_infos[reactor->lcore];
790 15 : core_info->lcore = reactor->lcore;
791 15 : core_info->current_idle_tsc = reactor->idle_tsc - core_info->total_idle_tsc;
792 15 : core_info->total_idle_tsc = reactor->idle_tsc;
793 15 : core_info->current_busy_tsc = reactor->busy_tsc - core_info->total_busy_tsc;
794 15 : core_info->total_busy_tsc = reactor->busy_tsc;
795 15 : core_info->interrupt_mode = reactor->in_interrupt;
796 15 : core_info->threads_count = 0;
797 :
798 15 : SPDK_DEBUGLOG(reactor, "Gathering metrics on %u\n", reactor->lcore);
799 :
800 15 : if (reactor->thread_count > 0) {
801 11 : core_info->thread_infos = calloc(reactor->thread_count, sizeof(*core_info->thread_infos));
802 11 : if (core_info->thread_infos == NULL) {
803 0 : SPDK_ERRLOG("Failed to allocate memory when gathering metrics on %u\n", reactor->lcore);
804 :
805 : /* Cancel this round of schedule work */
806 0 : _event_call(g_scheduling_reactor->lcore, _reactors_scheduler_cancel, NULL, NULL);
807 0 : return;
808 : }
809 :
810 26 : TAILQ_FOREACH(lw_thread, &reactor->threads, link) {
811 15 : _init_thread_stats(reactor, lw_thread);
812 :
813 15 : core_info->thread_infos[i].lcore = lw_thread->lcore;
814 15 : thread = spdk_thread_get_from_ctx(lw_thread);
815 15 : assert(thread != NULL);
816 15 : core_info->thread_infos[i].thread_id = spdk_thread_get_id(thread);
817 15 : core_info->thread_infos[i].total_stats = lw_thread->total_stats;
818 15 : core_info->thread_infos[i].current_stats = lw_thread->current_stats;
819 15 : core_info->threads_count++;
820 15 : assert(core_info->threads_count <= reactor->thread_count);
821 15 : i++;
822 : }
823 : }
824 :
825 15 : next_core = spdk_env_get_next_core(reactor->lcore);
826 15 : if (next_core == UINT32_MAX) {
827 6 : next_core = spdk_env_get_first_core();
828 : }
829 :
830 : /* If we've looped back around to the scheduler thread, move to the next phase */
831 15 : if (next_core == g_scheduling_reactor->lcore) {
832 : /* Phase 2 of scheduling is rebalancing - deciding which threads to move where */
833 6 : _event_call(next_core, _reactors_scheduler_balance, NULL, NULL);
834 6 : return;
835 : }
836 :
837 9 : _event_call(next_core, _reactors_scheduler_gather_metrics, NULL, NULL);
838 : }
839 :
840 : static int _reactor_schedule_thread(struct spdk_thread *thread);
841 : static uint64_t g_rusage_period;
842 :
843 : static void
844 19 : _reactor_remove_lw_thread(struct spdk_reactor *reactor, struct spdk_lw_thread *lw_thread)
845 : {
846 19 : struct spdk_thread *thread = spdk_thread_get_from_ctx(lw_thread);
847 : struct spdk_fd_group *grp;
848 :
849 19 : TAILQ_REMOVE(&reactor->threads, lw_thread, link);
850 19 : assert(reactor->thread_count > 0);
851 19 : reactor->thread_count--;
852 :
853 : /* Operate thread intr if running with full interrupt ability */
854 19 : if (spdk_interrupt_mode_is_enabled()) {
855 0 : if (reactor->in_interrupt) {
856 0 : grp = spdk_thread_get_interrupt_fd_group(thread);
857 0 : spdk_fd_group_unnest(reactor->fgrp, grp);
858 : }
859 : }
860 19 : }
861 :
862 : static bool
863 57 : reactor_post_process_lw_thread(struct spdk_reactor *reactor, struct spdk_lw_thread *lw_thread)
864 : {
865 57 : struct spdk_thread *thread = spdk_thread_get_from_ctx(lw_thread);
866 :
867 57 : if (spdk_unlikely(spdk_thread_is_exited(thread) &&
868 : spdk_thread_is_idle(thread))) {
869 11 : _reactor_remove_lw_thread(reactor, lw_thread);
870 11 : spdk_thread_destroy(thread);
871 11 : return true;
872 : }
873 :
874 46 : if (spdk_unlikely(lw_thread->resched && !spdk_thread_is_bound(thread))) {
875 8 : lw_thread->resched = false;
876 8 : _reactor_remove_lw_thread(reactor, lw_thread);
877 8 : _reactor_schedule_thread(thread);
878 8 : return true;
879 : }
880 :
881 38 : return false;
882 : }
883 :
884 : static void
885 0 : reactor_interrupt_run(struct spdk_reactor *reactor)
886 : {
887 0 : int block_timeout = -1; /* _EPOLL_WAIT_FOREVER */
888 :
889 0 : spdk_fd_group_wait(reactor->fgrp, block_timeout);
890 0 : }
891 :
892 : static void
893 43 : _reactor_run(struct spdk_reactor *reactor)
894 : {
895 : struct spdk_thread *thread;
896 : struct spdk_lw_thread *lw_thread, *tmp;
897 : uint64_t now;
898 : int rc;
899 :
900 43 : event_queue_run_batch(reactor);
901 :
902 : /* If no threads are present on the reactor,
903 : * tsc_last gets outdated. Update it to track
904 : * thread execution time correctly. */
905 43 : if (spdk_unlikely(TAILQ_EMPTY(&reactor->threads))) {
906 4 : now = spdk_get_ticks();
907 4 : reactor->idle_tsc += now - reactor->tsc_last;
908 4 : reactor->tsc_last = now;
909 4 : return;
910 : }
911 :
912 96 : TAILQ_FOREACH_SAFE(lw_thread, &reactor->threads, link, tmp) {
913 57 : thread = spdk_thread_get_from_ctx(lw_thread);
914 57 : rc = spdk_thread_poll(thread, 0, reactor->tsc_last);
915 :
916 57 : now = spdk_thread_get_last_tsc(thread);
917 57 : if (rc == 0) {
918 49 : reactor->idle_tsc += now - reactor->tsc_last;
919 8 : } else if (rc > 0) {
920 8 : reactor->busy_tsc += now - reactor->tsc_last;
921 : }
922 57 : reactor->tsc_last = now;
923 :
924 57 : reactor_post_process_lw_thread(reactor, lw_thread);
925 : }
926 : }
927 :
928 : static int
929 9 : reactor_run(void *arg)
930 : {
931 9 : struct spdk_reactor *reactor = arg;
932 : struct spdk_thread *thread;
933 : struct spdk_lw_thread *lw_thread, *tmp;
934 9 : char thread_name[32];
935 9 : uint64_t last_sched = 0;
936 :
937 9 : SPDK_NOTICELOG("Reactor started on core %u\n", reactor->lcore);
938 :
939 : /* Rename the POSIX thread because the reactor is tied to the POSIX
940 : * thread in the SPDK event library.
941 : */
942 9 : snprintf(thread_name, sizeof(thread_name), "reactor_%u", reactor->lcore);
943 9 : _set_thread_name(thread_name);
944 :
945 9 : reactor->tsc_last = spdk_get_ticks();
946 :
947 : while (1) {
948 : /* Execute interrupt process fn if this reactor currently runs in interrupt state */
949 9 : if (spdk_unlikely(reactor->in_interrupt)) {
950 0 : reactor_interrupt_run(reactor);
951 : } else {
952 9 : _reactor_run(reactor);
953 : }
954 :
955 9 : if (g_framework_context_switch_monitor_enabled) {
956 9 : if ((reactor->last_rusage + g_rusage_period) < reactor->tsc_last) {
957 8 : get_rusage(reactor);
958 8 : reactor->last_rusage = reactor->tsc_last;
959 : }
960 : }
961 :
962 9 : if (spdk_unlikely(g_scheduler_period > 0 &&
963 : (reactor->tsc_last - last_sched) > g_scheduler_period &&
964 : reactor == g_scheduling_reactor &&
965 : !g_scheduling_in_progress)) {
966 0 : last_sched = reactor->tsc_last;
967 0 : g_scheduling_in_progress = true;
968 0 : _reactors_scheduler_gather_metrics(NULL, NULL);
969 : }
970 :
971 9 : if (g_reactor_state != SPDK_REACTOR_STATE_RUNNING) {
972 9 : break;
973 : }
974 : }
975 :
976 9 : TAILQ_FOREACH(lw_thread, &reactor->threads, link) {
977 0 : thread = spdk_thread_get_from_ctx(lw_thread);
978 : /* All threads should have already had spdk_thread_exit() called on them, except
979 : * for the app thread.
980 : */
981 0 : if (spdk_thread_is_running(thread)) {
982 0 : if (!spdk_thread_is_app_thread(thread)) {
983 0 : SPDK_ERRLOG("spdk_thread_exit() was not called on thread '%s'\n",
984 : spdk_thread_get_name(thread));
985 0 : SPDK_ERRLOG("This will result in a non-zero exit code in a future release.\n");
986 : }
987 0 : spdk_set_thread(thread);
988 0 : spdk_thread_exit(thread);
989 : }
990 : }
991 :
992 9 : while (!TAILQ_EMPTY(&reactor->threads)) {
993 0 : TAILQ_FOREACH_SAFE(lw_thread, &reactor->threads, link, tmp) {
994 0 : thread = spdk_thread_get_from_ctx(lw_thread);
995 0 : spdk_set_thread(thread);
996 0 : if (spdk_thread_is_exited(thread)) {
997 0 : _reactor_remove_lw_thread(reactor, lw_thread);
998 0 : spdk_thread_destroy(thread);
999 : } else {
1000 0 : if (spdk_unlikely(reactor->in_interrupt)) {
1001 0 : reactor_interrupt_run(reactor);
1002 : } else {
1003 0 : spdk_thread_poll(thread, 0, 0);
1004 : }
1005 : }
1006 : }
1007 : }
1008 :
1009 9 : return 0;
1010 : }
1011 :
1012 : int
1013 0 : spdk_app_parse_core_mask(const char *mask, struct spdk_cpuset *cpumask)
1014 : {
1015 : int ret;
1016 : const struct spdk_cpuset *validmask;
1017 :
1018 0 : ret = spdk_cpuset_parse(cpumask, mask);
1019 0 : if (ret < 0) {
1020 0 : return ret;
1021 : }
1022 :
1023 0 : validmask = spdk_app_get_core_mask();
1024 0 : spdk_cpuset_and(cpumask, validmask);
1025 :
1026 0 : return 0;
1027 : }
1028 :
1029 : const struct spdk_cpuset *
1030 0 : spdk_app_get_core_mask(void)
1031 : {
1032 0 : return &g_reactor_core_mask;
1033 : }
1034 :
1035 : void
1036 0 : spdk_reactors_start(void)
1037 : {
1038 : struct spdk_reactor *reactor;
1039 : uint32_t i, current_core;
1040 : int rc;
1041 :
1042 0 : g_rusage_period = (CONTEXT_SWITCH_MONITOR_PERIOD * spdk_get_ticks_hz()) / SPDK_SEC_TO_USEC;
1043 0 : g_reactor_state = SPDK_REACTOR_STATE_RUNNING;
1044 : /* Reinitialize to false, in case the app framework is restarting in the same process. */
1045 0 : g_stopping_reactors = false;
1046 :
1047 0 : current_core = spdk_env_get_current_core();
1048 0 : SPDK_ENV_FOREACH_CORE(i) {
1049 0 : if (i != current_core) {
1050 0 : reactor = spdk_reactor_get(i);
1051 0 : if (reactor == NULL) {
1052 0 : continue;
1053 : }
1054 :
1055 0 : rc = spdk_env_thread_launch_pinned(reactor->lcore, reactor_run, reactor);
1056 0 : if (rc < 0) {
1057 0 : SPDK_ERRLOG("Unable to start reactor thread on core %u\n", reactor->lcore);
1058 0 : assert(false);
1059 : return;
1060 : }
1061 : }
1062 0 : spdk_cpuset_set_cpu(&g_reactor_core_mask, i, true);
1063 : }
1064 :
1065 : /* Start the main reactor */
1066 0 : reactor = spdk_reactor_get(current_core);
1067 0 : assert(reactor != NULL);
1068 0 : reactor_run(reactor);
1069 :
1070 0 : spdk_env_thread_wait_all();
1071 :
1072 0 : g_reactor_state = SPDK_REACTOR_STATE_SHUTDOWN;
1073 : }
1074 :
1075 : static void
1076 0 : _reactors_stop(void *arg1, void *arg2)
1077 : {
1078 : uint32_t i;
1079 : int rc;
1080 : struct spdk_reactor *reactor;
1081 : struct spdk_reactor *local_reactor;
1082 0 : uint64_t notify = 1;
1083 :
1084 0 : g_reactor_state = SPDK_REACTOR_STATE_EXITING;
1085 0 : local_reactor = spdk_reactor_get(spdk_env_get_current_core());
1086 :
1087 0 : SPDK_ENV_FOREACH_CORE(i) {
1088 : /* If spdk_event_call isn't called on a reactor, always send a notification.
1089 : * If it is called on a reactor, send a notification if the destination reactor
1090 : * is indicated in interrupt mode state.
1091 : */
1092 0 : if (local_reactor == NULL || spdk_cpuset_get_cpu(&local_reactor->notify_cpuset, i)) {
1093 0 : reactor = spdk_reactor_get(i);
1094 0 : assert(reactor != NULL);
1095 0 : rc = write(reactor->events_fd, ¬ify, sizeof(notify));
1096 0 : if (rc < 0) {
1097 0 : SPDK_ERRLOG("failed to notify event queue for reactor(%u): %s.\n", i, spdk_strerror(errno));
1098 0 : continue;
1099 : }
1100 : }
1101 : }
1102 0 : }
1103 :
1104 : static void
1105 0 : nop(void *arg1, void *arg2)
1106 : {
1107 0 : }
1108 :
1109 : void
1110 0 : spdk_reactors_stop(void *arg1)
1111 : {
1112 0 : spdk_for_each_reactor(nop, NULL, NULL, _reactors_stop);
1113 0 : }
1114 :
1115 : static pthread_mutex_t g_scheduler_mtx = PTHREAD_MUTEX_INITIALIZER;
1116 : static uint32_t g_next_core = UINT32_MAX;
1117 :
1118 : static void
1119 21 : _schedule_thread(void *arg1, void *arg2)
1120 : {
1121 21 : struct spdk_lw_thread *lw_thread = arg1;
1122 : struct spdk_thread *thread;
1123 : struct spdk_reactor *reactor;
1124 : uint32_t current_core;
1125 : struct spdk_fd_group *grp;
1126 :
1127 21 : current_core = spdk_env_get_current_core();
1128 21 : reactor = spdk_reactor_get(current_core);
1129 21 : assert(reactor != NULL);
1130 :
1131 : /* Update total_stats to reflect state of thread
1132 : * at the end of the move. */
1133 21 : thread = spdk_thread_get_from_ctx(lw_thread);
1134 21 : spdk_set_thread(thread);
1135 21 : spdk_thread_get_stats(&lw_thread->total_stats);
1136 21 : spdk_set_thread(NULL);
1137 :
1138 21 : lw_thread->lcore = current_core;
1139 :
1140 21 : TAILQ_INSERT_TAIL(&reactor->threads, lw_thread, link);
1141 21 : reactor->thread_count++;
1142 :
1143 : /* Operate thread intr if running with full interrupt ability */
1144 21 : if (spdk_interrupt_mode_is_enabled()) {
1145 : int rc;
1146 :
1147 0 : if (reactor->in_interrupt) {
1148 0 : grp = spdk_thread_get_interrupt_fd_group(thread);
1149 0 : rc = spdk_fd_group_nest(reactor->fgrp, grp);
1150 0 : if (rc < 0) {
1151 0 : SPDK_ERRLOG("Failed to schedule spdk_thread: %s.\n", spdk_strerror(-rc));
1152 : }
1153 : }
1154 :
1155 : /* Align spdk_thread with reactor to interrupt mode or poll mode */
1156 0 : spdk_thread_send_msg(thread, _reactor_set_thread_interrupt_mode, reactor);
1157 : }
1158 21 : }
1159 :
1160 : static int
1161 21 : _reactor_schedule_thread(struct spdk_thread *thread)
1162 : {
1163 : uint32_t core;
1164 : struct spdk_lw_thread *lw_thread;
1165 21 : struct spdk_event *evt = NULL;
1166 : struct spdk_cpuset *cpumask;
1167 : uint32_t i;
1168 21 : struct spdk_reactor *local_reactor = NULL;
1169 21 : uint32_t current_lcore = spdk_env_get_current_core();
1170 21 : struct spdk_cpuset polling_cpumask;
1171 21 : struct spdk_cpuset valid_cpumask;
1172 :
1173 21 : cpumask = spdk_thread_get_cpumask(thread);
1174 :
1175 21 : lw_thread = spdk_thread_get_ctx(thread);
1176 21 : assert(lw_thread != NULL);
1177 21 : core = lw_thread->lcore;
1178 21 : memset(lw_thread, 0, sizeof(*lw_thread));
1179 :
1180 21 : if (current_lcore != SPDK_ENV_LCORE_ID_ANY) {
1181 21 : local_reactor = spdk_reactor_get(current_lcore);
1182 21 : assert(local_reactor);
1183 : }
1184 :
1185 : /* When interrupt ability of spdk_thread is not enabled and the current
1186 : * reactor runs on DPDK thread, skip reactors which are in interrupt mode.
1187 : */
1188 21 : if (!spdk_interrupt_mode_is_enabled() && local_reactor != NULL) {
1189 : /* Get the cpumask of all reactors in polling */
1190 21 : spdk_cpuset_zero(&polling_cpumask);
1191 76 : SPDK_ENV_FOREACH_CORE(i) {
1192 55 : spdk_cpuset_set_cpu(&polling_cpumask, i, true);
1193 : }
1194 21 : spdk_cpuset_xor(&polling_cpumask, &local_reactor->notify_cpuset);
1195 :
1196 21 : if (core == SPDK_ENV_LCORE_ID_ANY) {
1197 : /* Get the cpumask of all valid reactors which are suggested and also in polling */
1198 14 : spdk_cpuset_copy(&valid_cpumask, &polling_cpumask);
1199 14 : spdk_cpuset_and(&valid_cpumask, spdk_thread_get_cpumask(thread));
1200 :
1201 : /* If there are any valid reactors, spdk_thread should be scheduled
1202 : * into one of the valid reactors.
1203 : * If there is no valid reactors, spdk_thread should be scheduled
1204 : * into one of the polling reactors.
1205 : */
1206 14 : if (spdk_cpuset_count(&valid_cpumask) != 0) {
1207 14 : cpumask = &valid_cpumask;
1208 : } else {
1209 0 : cpumask = &polling_cpumask;
1210 : }
1211 7 : } else if (!spdk_cpuset_get_cpu(&polling_cpumask, core)) {
1212 : /* If specified reactor is not in polling, spdk_thread should be scheduled
1213 : * into one of the polling reactors.
1214 : */
1215 0 : core = SPDK_ENV_LCORE_ID_ANY;
1216 0 : cpumask = &polling_cpumask;
1217 : }
1218 : }
1219 :
1220 21 : pthread_mutex_lock(&g_scheduler_mtx);
1221 21 : if (core == SPDK_ENV_LCORE_ID_ANY) {
1222 19 : for (i = 0; i < spdk_env_get_core_count(); i++) {
1223 19 : if (g_next_core >= g_reactor_count) {
1224 5 : g_next_core = spdk_env_get_first_core();
1225 : }
1226 19 : core = g_next_core;
1227 19 : g_next_core = spdk_env_get_next_core(g_next_core);
1228 :
1229 19 : if (spdk_cpuset_get_cpu(cpumask, core)) {
1230 14 : break;
1231 : }
1232 : }
1233 : }
1234 :
1235 21 : evt = spdk_event_allocate(core, _schedule_thread, lw_thread, NULL);
1236 :
1237 21 : pthread_mutex_unlock(&g_scheduler_mtx);
1238 :
1239 21 : assert(evt != NULL);
1240 21 : if (evt == NULL) {
1241 0 : SPDK_ERRLOG("Unable to schedule thread on requested core mask.\n");
1242 0 : return -1;
1243 : }
1244 :
1245 21 : lw_thread->tsc_start = spdk_get_ticks();
1246 :
1247 21 : spdk_event_call(evt);
1248 :
1249 21 : return 0;
1250 : }
1251 :
1252 : static void
1253 2 : _reactor_request_thread_reschedule(struct spdk_thread *thread)
1254 : {
1255 : struct spdk_lw_thread *lw_thread;
1256 : struct spdk_reactor *reactor;
1257 : uint32_t current_core;
1258 :
1259 2 : assert(thread == spdk_get_thread());
1260 :
1261 2 : lw_thread = spdk_thread_get_ctx(thread);
1262 :
1263 2 : assert(lw_thread != NULL);
1264 2 : lw_thread->resched = true;
1265 2 : lw_thread->lcore = SPDK_ENV_LCORE_ID_ANY;
1266 :
1267 2 : current_core = spdk_env_get_current_core();
1268 2 : reactor = spdk_reactor_get(current_core);
1269 2 : assert(reactor != NULL);
1270 :
1271 : /* Send a notification if the destination reactor is indicated in intr mode state */
1272 2 : if (spdk_unlikely(spdk_cpuset_get_cpu(&reactor->notify_cpuset, reactor->lcore))) {
1273 0 : uint64_t notify = 1;
1274 :
1275 0 : if (write(reactor->resched_fd, ¬ify, sizeof(notify)) < 0) {
1276 0 : SPDK_ERRLOG("failed to notify reschedule: %s.\n", spdk_strerror(errno));
1277 : }
1278 : }
1279 2 : }
1280 :
1281 : static int
1282 15 : reactor_thread_op(struct spdk_thread *thread, enum spdk_thread_op op)
1283 : {
1284 : struct spdk_lw_thread *lw_thread;
1285 :
1286 15 : switch (op) {
1287 13 : case SPDK_THREAD_OP_NEW:
1288 13 : lw_thread = spdk_thread_get_ctx(thread);
1289 13 : lw_thread->lcore = SPDK_ENV_LCORE_ID_ANY;
1290 13 : return _reactor_schedule_thread(thread);
1291 2 : case SPDK_THREAD_OP_RESCHED:
1292 2 : _reactor_request_thread_reschedule(thread);
1293 2 : return 0;
1294 0 : default:
1295 0 : return -ENOTSUP;
1296 : }
1297 : }
1298 :
1299 : static bool
1300 15 : reactor_thread_op_supported(enum spdk_thread_op op)
1301 : {
1302 15 : switch (op) {
1303 15 : case SPDK_THREAD_OP_NEW:
1304 : case SPDK_THREAD_OP_RESCHED:
1305 15 : return true;
1306 0 : default:
1307 0 : return false;
1308 : }
1309 : }
1310 :
1311 : struct call_reactor {
1312 : uint32_t cur_core;
1313 : spdk_event_fn fn;
1314 : void *arg1;
1315 : void *arg2;
1316 :
1317 : uint32_t orig_core;
1318 : spdk_event_fn cpl;
1319 : };
1320 :
1321 : static void
1322 9 : on_reactor(void *arg1, void *arg2)
1323 : {
1324 9 : struct call_reactor *cr = arg1;
1325 : struct spdk_event *evt;
1326 :
1327 9 : cr->fn(cr->arg1, cr->arg2);
1328 :
1329 9 : cr->cur_core = spdk_env_get_next_core(cr->cur_core);
1330 :
1331 9 : if (cr->cur_core >= g_reactor_count) {
1332 3 : SPDK_DEBUGLOG(reactor, "Completed reactor iteration\n");
1333 :
1334 3 : evt = spdk_event_allocate(cr->orig_core, cr->cpl, cr->arg1, cr->arg2);
1335 3 : free(cr);
1336 : } else {
1337 6 : SPDK_DEBUGLOG(reactor, "Continuing reactor iteration to %d\n",
1338 : cr->cur_core);
1339 :
1340 6 : evt = spdk_event_allocate(cr->cur_core, on_reactor, arg1, NULL);
1341 : }
1342 9 : assert(evt != NULL);
1343 9 : spdk_event_call(evt);
1344 9 : }
1345 :
1346 : void
1347 3 : spdk_for_each_reactor(spdk_event_fn fn, void *arg1, void *arg2, spdk_event_fn cpl)
1348 : {
1349 : struct call_reactor *cr;
1350 :
1351 : /* When the application framework is shutting down, we will send one
1352 : * final for_each_reactor operation with completion callback _reactors_stop,
1353 : * to flush any existing for_each_reactor operations to avoid any memory
1354 : * leaks. We use a mutex here to protect a boolean flag that will ensure
1355 : * we don't start any more operations once we've started shutting down.
1356 : */
1357 3 : pthread_mutex_lock(&g_stopping_reactors_mtx);
1358 3 : if (g_stopping_reactors) {
1359 0 : pthread_mutex_unlock(&g_stopping_reactors_mtx);
1360 0 : return;
1361 3 : } else if (cpl == _reactors_stop) {
1362 0 : g_stopping_reactors = true;
1363 : }
1364 3 : pthread_mutex_unlock(&g_stopping_reactors_mtx);
1365 :
1366 3 : cr = calloc(1, sizeof(*cr));
1367 3 : if (!cr) {
1368 0 : SPDK_ERRLOG("Unable to perform reactor iteration\n");
1369 0 : cpl(arg1, arg2);
1370 0 : return;
1371 : }
1372 :
1373 3 : cr->fn = fn;
1374 3 : cr->arg1 = arg1;
1375 3 : cr->arg2 = arg2;
1376 3 : cr->cpl = cpl;
1377 3 : cr->orig_core = spdk_env_get_current_core();
1378 3 : cr->cur_core = spdk_env_get_first_core();
1379 :
1380 3 : SPDK_DEBUGLOG(reactor, "Starting reactor iteration from %d\n", cr->orig_core);
1381 :
1382 3 : _event_call(cr->cur_core, on_reactor, cr, NULL);
1383 : }
1384 :
1385 : #ifdef __linux__
1386 : static int
1387 0 : reactor_schedule_thread_event(void *arg)
1388 : {
1389 0 : struct spdk_reactor *reactor = arg;
1390 : struct spdk_lw_thread *lw_thread, *tmp;
1391 0 : uint32_t count = 0;
1392 0 : uint64_t notify = 1;
1393 :
1394 0 : assert(reactor->in_interrupt);
1395 :
1396 0 : if (read(reactor->resched_fd, ¬ify, sizeof(notify)) < 0) {
1397 0 : SPDK_ERRLOG("failed to acknowledge reschedule: %s.\n", spdk_strerror(errno));
1398 0 : return -errno;
1399 : }
1400 :
1401 0 : TAILQ_FOREACH_SAFE(lw_thread, &reactor->threads, link, tmp) {
1402 0 : count += reactor_post_process_lw_thread(reactor, lw_thread) ? 1 : 0;
1403 : }
1404 :
1405 0 : return count;
1406 : }
1407 :
1408 : static int
1409 27 : reactor_interrupt_init(struct spdk_reactor *reactor)
1410 : {
1411 : int rc;
1412 :
1413 27 : rc = spdk_fd_group_create(&reactor->fgrp);
1414 27 : if (rc != 0) {
1415 0 : return rc;
1416 : }
1417 :
1418 27 : reactor->resched_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
1419 27 : if (reactor->resched_fd < 0) {
1420 0 : rc = -EBADF;
1421 0 : goto err;
1422 : }
1423 :
1424 27 : rc = SPDK_FD_GROUP_ADD(reactor->fgrp, reactor->resched_fd, reactor_schedule_thread_event,
1425 : reactor);
1426 27 : if (rc) {
1427 0 : close(reactor->resched_fd);
1428 0 : goto err;
1429 : }
1430 :
1431 27 : reactor->events_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
1432 27 : if (reactor->events_fd < 0) {
1433 0 : spdk_fd_group_remove(reactor->fgrp, reactor->resched_fd);
1434 0 : close(reactor->resched_fd);
1435 :
1436 0 : rc = -EBADF;
1437 0 : goto err;
1438 : }
1439 :
1440 27 : rc = SPDK_FD_GROUP_ADD(reactor->fgrp, reactor->events_fd,
1441 : event_queue_run_batch, reactor);
1442 27 : if (rc) {
1443 0 : spdk_fd_group_remove(reactor->fgrp, reactor->resched_fd);
1444 0 : close(reactor->resched_fd);
1445 0 : close(reactor->events_fd);
1446 0 : goto err;
1447 : }
1448 :
1449 27 : return 0;
1450 :
1451 0 : err:
1452 0 : spdk_fd_group_destroy(reactor->fgrp);
1453 0 : reactor->fgrp = NULL;
1454 0 : return rc;
1455 : }
1456 : #else
1457 : static int
1458 : reactor_interrupt_init(struct spdk_reactor *reactor)
1459 : {
1460 : return -ENOTSUP;
1461 : }
1462 : #endif
1463 :
1464 : static void
1465 27 : reactor_interrupt_fini(struct spdk_reactor *reactor)
1466 : {
1467 27 : struct spdk_fd_group *fgrp = reactor->fgrp;
1468 :
1469 27 : if (!fgrp) {
1470 0 : return;
1471 : }
1472 :
1473 27 : spdk_fd_group_remove(fgrp, reactor->events_fd);
1474 27 : spdk_fd_group_remove(fgrp, reactor->resched_fd);
1475 :
1476 27 : close(reactor->events_fd);
1477 27 : close(reactor->resched_fd);
1478 :
1479 27 : spdk_fd_group_destroy(fgrp);
1480 27 : reactor->fgrp = NULL;
1481 : }
1482 :
1483 : static struct spdk_governor *
1484 3 : _governor_find(const char *name)
1485 : {
1486 : struct spdk_governor *governor, *tmp;
1487 :
1488 3 : TAILQ_FOREACH_SAFE(governor, &g_governor_list, link, tmp) {
1489 1 : if (strcmp(name, governor->name) == 0) {
1490 1 : return governor;
1491 : }
1492 : }
1493 :
1494 2 : return NULL;
1495 : }
1496 :
1497 : int
1498 2 : spdk_governor_set(const char *name)
1499 : {
1500 : struct spdk_governor *governor;
1501 2 : int rc = 0;
1502 :
1503 : /* NULL governor was specifically requested */
1504 2 : if (name == NULL) {
1505 0 : if (g_governor) {
1506 0 : g_governor->deinit();
1507 : }
1508 0 : g_governor = NULL;
1509 0 : return 0;
1510 : }
1511 :
1512 2 : governor = _governor_find(name);
1513 2 : if (governor == NULL) {
1514 1 : return -EINVAL;
1515 : }
1516 :
1517 1 : if (g_governor == governor) {
1518 0 : return 0;
1519 : }
1520 :
1521 1 : rc = governor->init();
1522 1 : if (rc == 0) {
1523 1 : if (g_governor) {
1524 0 : g_governor->deinit();
1525 : }
1526 1 : g_governor = governor;
1527 : }
1528 :
1529 1 : return rc;
1530 : }
1531 :
1532 : struct spdk_governor *
1533 6 : spdk_governor_get(void)
1534 : {
1535 6 : return g_governor;
1536 : }
1537 :
1538 : void
1539 1 : spdk_governor_register(struct spdk_governor *governor)
1540 : {
1541 1 : if (_governor_find(governor->name)) {
1542 0 : SPDK_ERRLOG("governor named '%s' already registered.\n", governor->name);
1543 0 : assert(false);
1544 : return;
1545 : }
1546 :
1547 1 : TAILQ_INSERT_TAIL(&g_governor_list, governor, link);
1548 : }
1549 :
1550 1 : SPDK_LOG_REGISTER_COMPONENT(reactor)
|