LCOV - code coverage report
Current view: top level - lib/thread - thread.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 845 1333 63.4 %
Date: 2024-12-06 01:29:11 Functions: 111 165 67.3 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2016 Intel Corporation.
       3             :  *   All rights reserved.
       4             :  *   Copyright (c) 2022, 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
       5             :  */
       6             : 
       7             : #include "spdk/stdinc.h"
       8             : 
       9             : #include "spdk/env.h"
      10             : #include "spdk/likely.h"
      11             : #include "spdk/queue.h"
      12             : #include "spdk/string.h"
      13             : #include "spdk/thread.h"
      14             : #include "spdk/trace.h"
      15             : #include "spdk/util.h"
      16             : #include "spdk/fd_group.h"
      17             : 
      18             : #include "spdk/log.h"
      19             : #include "spdk_internal/thread.h"
      20             : #include "spdk_internal/usdt.h"
      21             : #include "thread_internal.h"
      22             : 
      23             : #include "spdk_internal/trace_defs.h"
      24             : 
      25             : #ifdef __linux__
      26             : #include <sys/timerfd.h>
      27             : #include <sys/eventfd.h>
      28             : #endif
      29             : 
      30             : #ifdef SPDK_HAVE_EXECINFO_H
      31             : #include <execinfo.h>
      32             : #endif
      33             : 
      34             : #define SPDK_MSG_BATCH_SIZE             8
      35             : #define SPDK_MAX_DEVICE_NAME_LEN        256
      36             : #define SPDK_THREAD_EXIT_TIMEOUT_SEC    5
      37             : #define SPDK_MAX_POLLER_NAME_LEN        256
      38             : #define SPDK_MAX_THREAD_NAME_LEN        256
      39             : 
      40             : static struct spdk_thread *g_app_thread;
      41             : 
      42             : struct spdk_interrupt {
      43             :         int                     efd;
      44             :         struct spdk_thread      *thread;
      45             :         spdk_interrupt_fn       fn;
      46             :         void                    *arg;
      47             :         char                    name[SPDK_MAX_POLLER_NAME_LEN + 1];
      48             : };
      49             : 
      50             : enum spdk_poller_state {
      51             :         /* The poller is registered with a thread but not currently executing its fn. */
      52             :         SPDK_POLLER_STATE_WAITING,
      53             : 
      54             :         /* The poller is currently running its fn. */
      55             :         SPDK_POLLER_STATE_RUNNING,
      56             : 
      57             :         /* The poller was unregistered during the execution of its fn. */
      58             :         SPDK_POLLER_STATE_UNREGISTERED,
      59             : 
      60             :         /* The poller is in the process of being paused.  It will be paused
      61             :          * during the next time it's supposed to be executed.
      62             :          */
      63             :         SPDK_POLLER_STATE_PAUSING,
      64             : 
      65             :         /* The poller is registered but currently paused.  It's on the
      66             :          * paused_pollers list.
      67             :          */
      68             :         SPDK_POLLER_STATE_PAUSED,
      69             : };
      70             : 
      71             : struct spdk_poller {
      72             :         TAILQ_ENTRY(spdk_poller)        tailq;
      73             :         RB_ENTRY(spdk_poller)           node;
      74             : 
      75             :         /* Current state of the poller; should only be accessed from the poller's thread. */
      76             :         enum spdk_poller_state          state;
      77             : 
      78             :         uint64_t                        period_ticks;
      79             :         uint64_t                        next_run_tick;
      80             :         uint64_t                        run_count;
      81             :         uint64_t                        busy_count;
      82             :         uint64_t                        id;
      83             :         spdk_poller_fn                  fn;
      84             :         void                            *arg;
      85             :         struct spdk_thread              *thread;
      86             :         struct spdk_interrupt           *intr;
      87             :         spdk_poller_set_interrupt_mode_cb set_intr_cb_fn;
      88             :         void                            *set_intr_cb_arg;
      89             : 
      90             :         char                            name[SPDK_MAX_POLLER_NAME_LEN + 1];
      91             : };
      92             : 
      93             : enum spdk_thread_state {
      94             :         /* The thread is processing poller and message by spdk_thread_poll(). */
      95             :         SPDK_THREAD_STATE_RUNNING,
      96             : 
      97             :         /* The thread is in the process of termination. It reaps unregistering
      98             :          * poller are releasing I/O channel.
      99             :          */
     100             :         SPDK_THREAD_STATE_EXITING,
     101             : 
     102             :         /* The thread is exited. It is ready to call spdk_thread_destroy(). */
     103             :         SPDK_THREAD_STATE_EXITED,
     104             : };
     105             : 
     106             : struct spdk_thread_post_poller_handler {
     107             :         spdk_post_poller_fn fn;
     108             :         void *fn_arg;
     109             : };
     110             : 
     111             : #define SPDK_THREAD_MAX_POST_POLLER_HANDLERS (4)
     112             : 
     113             : struct spdk_thread {
     114             :         uint64_t                        tsc_last;
     115             :         struct spdk_thread_stats        stats;
     116             :         /*
     117             :          * Contains pollers actively running on this thread.  Pollers
     118             :          *  are run round-robin. The thread takes one poller from the head
     119             :          *  of the ring, executes it, then puts it back at the tail of
     120             :          *  the ring.
     121             :          */
     122             :         TAILQ_HEAD(active_pollers_head, spdk_poller)    active_pollers;
     123             :         /**
     124             :          * Contains pollers running on this thread with a periodic timer.
     125             :          */
     126             :         RB_HEAD(timed_pollers_tree, spdk_poller)        timed_pollers;
     127             :         struct spdk_poller                              *first_timed_poller;
     128             :         /*
     129             :          * Contains paused pollers.  Pollers on this queue are waiting until
     130             :          * they are resumed (in which case they're put onto the active/timer
     131             :          * queues) or unregistered.
     132             :          */
     133             :         TAILQ_HEAD(paused_pollers_head, spdk_poller)    paused_pollers;
     134             :         struct spdk_thread_post_poller_handler          pp_handlers[SPDK_THREAD_MAX_POST_POLLER_HANDLERS];
     135             :         struct spdk_ring                *messages;
     136             :         uint8_t                         num_pp_handlers;
     137             :         int                             msg_fd;
     138             :         SLIST_HEAD(, spdk_msg)          msg_cache;
     139             :         size_t                          msg_cache_count;
     140             :         spdk_msg_fn                     critical_msg;
     141             :         uint64_t                        id;
     142             :         uint64_t                        next_poller_id;
     143             :         enum spdk_thread_state          state;
     144             :         int                             pending_unregister_count;
     145             :         uint32_t                        for_each_count;
     146             : 
     147             :         RB_HEAD(io_channel_tree, spdk_io_channel)       io_channels;
     148             :         TAILQ_ENTRY(spdk_thread)                        tailq;
     149             : 
     150             :         char                            name[SPDK_MAX_THREAD_NAME_LEN + 1];
     151             :         struct spdk_cpuset              cpumask;
     152             :         uint64_t                        exit_timeout_tsc;
     153             : 
     154             :         int32_t                         lock_count;
     155             : 
     156             :         /* spdk_thread is bound to current CPU core. */
     157             :         bool                            is_bound;
     158             : 
     159             :         /* Indicates whether this spdk_thread currently runs in interrupt. */
     160             :         bool                            in_interrupt;
     161             :         bool                            poller_unregistered;
     162             :         struct spdk_fd_group            *fgrp;
     163             : 
     164             :         uint16_t                        trace_id;
     165             : 
     166             :         uint8_t                         reserved[6];
     167             : 
     168             :         /* User context allocated at the end */
     169             :         uint8_t                         ctx[0];
     170             : };
     171             : 
     172             : /*
     173             :  * Assert that spdk_thread struct is 8 byte aligned to ensure
     174             :  * the user ctx is also 8-byte aligned.
     175             :  */
     176             : SPDK_STATIC_ASSERT((sizeof(struct spdk_thread)) % 8 == 0, "Incorrect size");
     177             : 
     178             : static pthread_mutex_t g_devlist_mutex = PTHREAD_MUTEX_INITIALIZER;
     179             : 
     180             : static spdk_new_thread_fn g_new_thread_fn = NULL;
     181             : static spdk_thread_op_fn g_thread_op_fn = NULL;
     182             : static spdk_thread_op_supported_fn g_thread_op_supported_fn;
     183             : static size_t g_ctx_sz = 0;
     184             : /* Monotonic increasing ID is set to each created thread beginning at 1. Once the
     185             :  * ID exceeds UINT64_MAX, further thread creation is not allowed and restarting
     186             :  * SPDK application is required.
     187             :  */
     188             : static uint64_t g_thread_id = 1;
     189             : 
     190             : enum spin_error {
     191             :         SPIN_ERR_NONE,
     192             :         /* Trying to use an SPDK lock while not on an SPDK thread */
     193             :         SPIN_ERR_NOT_SPDK_THREAD,
     194             :         /* Trying to lock a lock already held by this SPDK thread */
     195             :         SPIN_ERR_DEADLOCK,
     196             :         /* Trying to unlock a lock not held by this SPDK thread */
     197             :         SPIN_ERR_WRONG_THREAD,
     198             :         /* pthread_spin_*() returned an error */
     199             :         SPIN_ERR_PTHREAD,
     200             :         /* Trying to destroy a lock that is held */
     201             :         SPIN_ERR_LOCK_HELD,
     202             :         /* lock_count is invalid */
     203             :         SPIN_ERR_LOCK_COUNT,
     204             :         /*
     205             :          * An spdk_thread may migrate to another pthread. A spinlock held across migration leads to
     206             :          * undefined behavior. A spinlock held when an SPDK thread goes off CPU would lead to
     207             :          * deadlock when another SPDK thread on the same pthread tries to take that lock.
     208             :          */
     209             :         SPIN_ERR_HOLD_DURING_SWITCH,
     210             :         /* Trying to use a lock that was destroyed (but not re-initialized) */
     211             :         SPIN_ERR_DESTROYED,
     212             :         /* Trying to use a lock that is not initialized */
     213             :         SPIN_ERR_NOT_INITIALIZED,
     214             : 
     215             :         /* Must be last, not an actual error code */
     216             :         SPIN_ERR_LAST
     217             : };
     218             : 
     219             : static const char *spin_error_strings[] = {
     220             :         [SPIN_ERR_NONE]                 = "No error",
     221             :         [SPIN_ERR_NOT_SPDK_THREAD]      = "Not an SPDK thread",
     222             :         [SPIN_ERR_DEADLOCK]             = "Deadlock detected",
     223             :         [SPIN_ERR_WRONG_THREAD]         = "Unlock on wrong SPDK thread",
     224             :         [SPIN_ERR_PTHREAD]              = "Error from pthread_spinlock",
     225             :         [SPIN_ERR_LOCK_HELD]            = "Destroying a held spinlock",
     226             :         [SPIN_ERR_LOCK_COUNT]           = "Lock count is invalid",
     227             :         [SPIN_ERR_HOLD_DURING_SWITCH]   = "Lock(s) held while SPDK thread going off CPU",
     228             :         [SPIN_ERR_DESTROYED]            = "Lock has been destroyed",
     229             :         [SPIN_ERR_NOT_INITIALIZED]      = "Lock has not been initialized",
     230             : };
     231             : 
     232             : #define SPIN_ERROR_STRING(err) (err < 0 || err >= SPDK_COUNTOF(spin_error_strings)) \
     233             :                                 ? "Unknown error" : spin_error_strings[err]
     234             : 
     235             : static void
     236           0 : __posix_abort(enum spin_error err)
     237             : {
     238           0 :         abort();
     239             : }
     240             : 
     241             : typedef void (*spin_abort)(enum spin_error err);
     242             : spin_abort g_spin_abort_fn = __posix_abort;
     243             : 
     244             : #define SPIN_ASSERT_IMPL(cond, err, extra_log, ret) \
     245             :         do { \
     246             :                 if (spdk_unlikely(!(cond))) { \
     247             :                         SPDK_ERRLOG("unrecoverable spinlock error %d: %s (%s)\n", err, \
     248             :                                     SPIN_ERROR_STRING(err), #cond); \
     249             :                         extra_log; \
     250             :                         g_spin_abort_fn(err); \
     251             :                         ret; \
     252             :                 } \
     253             :         } while (0)
     254             : #define SPIN_ASSERT_LOG_STACKS(cond, err, lock) \
     255             :         SPIN_ASSERT_IMPL(cond, err, sspin_stacks_print(sspin), return)
     256             : #define SPIN_ASSERT_RETURN(cond, err, ret)      SPIN_ASSERT_IMPL(cond, err, , return ret)
     257             : #define SPIN_ASSERT(cond, err)                  SPIN_ASSERT_IMPL(cond, err, ,)
     258             : 
     259             : struct io_device {
     260             :         void                            *io_device;
     261             :         char                            name[SPDK_MAX_DEVICE_NAME_LEN + 1];
     262             :         spdk_io_channel_create_cb       create_cb;
     263             :         spdk_io_channel_destroy_cb      destroy_cb;
     264             :         spdk_io_device_unregister_cb    unregister_cb;
     265             :         struct spdk_thread              *unregister_thread;
     266             :         uint32_t                        ctx_size;
     267             :         uint32_t                        for_each_count;
     268             :         RB_ENTRY(io_device)             node;
     269             : 
     270             :         uint32_t                        refcnt;
     271             : 
     272             :         bool                            pending_unregister;
     273             :         bool                            unregistered;
     274             : };
     275             : 
     276             : static RB_HEAD(io_device_tree, io_device) g_io_devices = RB_INITIALIZER(g_io_devices);
     277             : 
     278             : static int
     279       15303 : io_device_cmp(struct io_device *dev1, struct io_device *dev2)
     280             : {
     281       15303 :         return (dev1->io_device < dev2->io_device ? -1 : dev1->io_device > dev2->io_device);
     282             : }
     283             : 
     284       26796 : RB_GENERATE_STATIC(io_device_tree, io_device, node, io_device_cmp);
     285             : 
     286             : static int
     287       10034 : io_channel_cmp(struct spdk_io_channel *ch1, struct spdk_io_channel *ch2)
     288             : {
     289       10034 :         return (ch1->dev < ch2->dev ? -1 : ch1->dev > ch2->dev);
     290             : }
     291             : 
     292       27863 : RB_GENERATE_STATIC(io_channel_tree, spdk_io_channel, node, io_channel_cmp);
     293             : 
     294             : struct spdk_msg {
     295             :         spdk_msg_fn             fn;
     296             :         void                    *arg;
     297             : 
     298             :         SLIST_ENTRY(spdk_msg)   link;
     299             : };
     300             : 
     301             : static struct spdk_mempool *g_spdk_msg_mempool = NULL;
     302             : 
     303             : static TAILQ_HEAD(, spdk_thread) g_threads = TAILQ_HEAD_INITIALIZER(g_threads);
     304             : static uint32_t g_thread_count = 0;
     305             : 
     306             : static __thread struct spdk_thread *tls_thread = NULL;
     307             : 
     308             : static void
     309           0 : thread_trace(void)
     310             : {
     311           0 :         struct spdk_trace_tpoint_opts opts[] = {
     312             :                 {
     313             :                         "THREAD_IOCH_GET", TRACE_THREAD_IOCH_GET,
     314             :                         OWNER_TYPE_NONE, OBJECT_NONE, 0,
     315             :                         {{ "refcnt", SPDK_TRACE_ARG_TYPE_INT, 4 }}
     316             :                 },
     317             :                 {
     318             :                         "THREAD_IOCH_PUT", TRACE_THREAD_IOCH_PUT,
     319             :                         OWNER_TYPE_NONE, OBJECT_NONE, 0,
     320             :                         {{ "refcnt", SPDK_TRACE_ARG_TYPE_INT, 4 }}
     321             :                 }
     322             :         };
     323             : 
     324           0 :         spdk_trace_register_owner_type(OWNER_TYPE_THREAD, 't');
     325           0 :         spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
     326           0 : }
     327          49 : SPDK_TRACE_REGISTER_FN(thread_trace, "thread", TRACE_GROUP_THREAD)
     328             : 
     329             : /*
     330             :  * If this compare function returns zero when two next_run_ticks are equal,
     331             :  * the macro RB_INSERT() returns a pointer to the element with the same
     332             :  * next_run_tick.
     333             :  *
     334             :  * Fortunately, the macro RB_REMOVE() takes not a key but a pointer to the element
     335             :  * to remove as a parameter.
     336             :  *
     337             :  * Hence we allow RB_INSERT() to insert elements with the same keys on the right
     338             :  * side by returning 1 when two next_run_ticks are equal.
     339             :  */
     340             : static inline int
     341         909 : timed_poller_compare(struct spdk_poller *poller1, struct spdk_poller *poller2)
     342             : {
     343         909 :         if (poller1->next_run_tick < poller2->next_run_tick) {
     344         502 :                 return -1;
     345             :         } else {
     346         407 :                 return 1;
     347             :         }
     348             : }
     349             : 
     350        5000 : RB_GENERATE_STATIC(timed_pollers_tree, spdk_poller, node, timed_poller_compare);
     351             : 
     352             : static inline struct spdk_thread *
     353      942598 : _get_thread(void)
     354             : {
     355      942598 :         return tls_thread;
     356             : }
     357             : 
     358             : static int
     359          97 : _thread_lib_init(size_t ctx_sz, size_t msg_mempool_sz)
     360             : {
     361          97 :         char mempool_name[SPDK_MAX_MEMZONE_NAME_LEN];
     362             : 
     363          97 :         g_ctx_sz = ctx_sz;
     364             : 
     365          97 :         snprintf(mempool_name, sizeof(mempool_name), "msgpool_%d", getpid());
     366          97 :         g_spdk_msg_mempool = spdk_mempool_create(mempool_name, msg_mempool_sz,
     367             :                              sizeof(struct spdk_msg),
     368             :                              0, /* No cache. We do our own. */
     369             :                              SPDK_ENV_NUMA_ID_ANY);
     370             : 
     371          97 :         SPDK_DEBUGLOG(thread, "spdk_msg_mempool was created with size: %zu\n",
     372             :                       msg_mempool_sz);
     373             : 
     374          97 :         if (!g_spdk_msg_mempool) {
     375           0 :                 SPDK_ERRLOG("spdk_msg_mempool creation failed\n");
     376           0 :                 return -ENOMEM;
     377             :         }
     378             : 
     379          97 :         return 0;
     380             : }
     381             : 
     382             : static void thread_interrupt_destroy(struct spdk_thread *thread);
     383             : static int thread_interrupt_create(struct spdk_thread *thread);
     384             : 
     385             : static void
     386         178 : _free_thread(struct spdk_thread *thread)
     387             : {
     388             :         struct spdk_io_channel *ch;
     389             :         struct spdk_msg *msg;
     390             :         struct spdk_poller *poller, *ptmp;
     391             : 
     392         178 :         RB_FOREACH(ch, io_channel_tree, &thread->io_channels) {
     393           0 :                 SPDK_ERRLOG("thread %s still has channel for io_device %s\n",
     394             :                             thread->name, ch->dev->name);
     395             :         }
     396             : 
     397         178 :         TAILQ_FOREACH_SAFE(poller, &thread->active_pollers, tailq, ptmp) {
     398           0 :                 if (poller->state != SPDK_POLLER_STATE_UNREGISTERED) {
     399           0 :                         SPDK_WARNLOG("active_poller %s still registered at thread exit\n",
     400             :                                      poller->name);
     401             :                 }
     402           0 :                 TAILQ_REMOVE(&thread->active_pollers, poller, tailq);
     403           0 :                 free(poller);
     404             :         }
     405             : 
     406         205 :         RB_FOREACH_SAFE(poller, timed_pollers_tree, &thread->timed_pollers, ptmp) {
     407          27 :                 if (poller->state != SPDK_POLLER_STATE_UNREGISTERED) {
     408           0 :                         SPDK_WARNLOG("timed_poller %s still registered at thread exit\n",
     409             :                                      poller->name);
     410             :                 }
     411          27 :                 RB_REMOVE(timed_pollers_tree, &thread->timed_pollers, poller);
     412          27 :                 free(poller);
     413             :         }
     414             : 
     415         178 :         TAILQ_FOREACH_SAFE(poller, &thread->paused_pollers, tailq, ptmp) {
     416           0 :                 SPDK_WARNLOG("paused_poller %s still registered at thread exit\n", poller->name);
     417           0 :                 TAILQ_REMOVE(&thread->paused_pollers, poller, tailq);
     418           0 :                 free(poller);
     419             :         }
     420             : 
     421         178 :         pthread_mutex_lock(&g_devlist_mutex);
     422         178 :         assert(g_thread_count > 0);
     423         178 :         g_thread_count--;
     424         178 :         TAILQ_REMOVE(&g_threads, thread, tailq);
     425         178 :         pthread_mutex_unlock(&g_devlist_mutex);
     426             : 
     427         178 :         msg = SLIST_FIRST(&thread->msg_cache);
     428      182302 :         while (msg != NULL) {
     429      182124 :                 SLIST_REMOVE_HEAD(&thread->msg_cache, link);
     430             : 
     431      182124 :                 assert(thread->msg_cache_count > 0);
     432      182124 :                 thread->msg_cache_count--;
     433      182124 :                 spdk_mempool_put(g_spdk_msg_mempool, msg);
     434             : 
     435      182124 :                 msg = SLIST_FIRST(&thread->msg_cache);
     436             :         }
     437             : 
     438         178 :         assert(thread->msg_cache_count == 0);
     439             : 
     440         178 :         if (spdk_interrupt_mode_is_enabled()) {
     441           0 :                 thread_interrupt_destroy(thread);
     442             :         }
     443             : 
     444         178 :         spdk_ring_free(thread->messages);
     445         178 :         free(thread);
     446         178 : }
     447             : 
     448             : int
     449          85 : spdk_thread_lib_init(spdk_new_thread_fn new_thread_fn, size_t ctx_sz)
     450             : {
     451          85 :         assert(g_new_thread_fn == NULL);
     452          85 :         assert(g_thread_op_fn == NULL);
     453             : 
     454          85 :         if (new_thread_fn == NULL) {
     455          83 :                 SPDK_INFOLOG(thread, "new_thread_fn was not specified at spdk_thread_lib_init\n");
     456             :         } else {
     457           2 :                 g_new_thread_fn = new_thread_fn;
     458             :         }
     459             : 
     460          85 :         return _thread_lib_init(ctx_sz, SPDK_DEFAULT_MSG_MEMPOOL_SIZE);
     461             : }
     462             : 
     463             : int
     464          12 : spdk_thread_lib_init_ext(spdk_thread_op_fn thread_op_fn,
     465             :                          spdk_thread_op_supported_fn thread_op_supported_fn,
     466             :                          size_t ctx_sz, size_t msg_mempool_sz)
     467             : {
     468          12 :         assert(g_new_thread_fn == NULL);
     469          12 :         assert(g_thread_op_fn == NULL);
     470          12 :         assert(g_thread_op_supported_fn == NULL);
     471             : 
     472          12 :         if ((thread_op_fn != NULL) != (thread_op_supported_fn != NULL)) {
     473           0 :                 SPDK_ERRLOG("Both must be defined or undefined together.\n");
     474           0 :                 return -EINVAL;
     475             :         }
     476             : 
     477          12 :         if (thread_op_fn == NULL && thread_op_supported_fn == NULL) {
     478           0 :                 SPDK_INFOLOG(thread, "thread_op_fn and thread_op_supported_fn were not specified\n");
     479             :         } else {
     480          12 :                 g_thread_op_fn = thread_op_fn;
     481          12 :                 g_thread_op_supported_fn = thread_op_supported_fn;
     482             :         }
     483             : 
     484          12 :         return _thread_lib_init(ctx_sz, msg_mempool_sz);
     485             : }
     486             : 
     487             : void
     488          95 : spdk_thread_lib_fini(void)
     489             : {
     490             :         struct io_device *dev;
     491             : 
     492          96 :         RB_FOREACH(dev, io_device_tree, &g_io_devices) {
     493           1 :                 SPDK_ERRLOG("io_device %s not unregistered\n", dev->name);
     494             :         }
     495             : 
     496          95 :         g_new_thread_fn = NULL;
     497          95 :         g_thread_op_fn = NULL;
     498          95 :         g_thread_op_supported_fn = NULL;
     499          95 :         g_ctx_sz = 0;
     500          95 :         if (g_app_thread != NULL) {
     501          91 :                 _free_thread(g_app_thread);
     502          91 :                 g_app_thread = NULL;
     503             :         }
     504             : 
     505          95 :         if (g_spdk_msg_mempool) {
     506          95 :                 spdk_mempool_free(g_spdk_msg_mempool);
     507          95 :                 g_spdk_msg_mempool = NULL;
     508             :         }
     509          95 : }
     510             : 
     511             : struct spdk_thread *
     512         215 : spdk_thread_create(const char *name, const struct spdk_cpuset *cpumask)
     513             : {
     514         215 :         struct spdk_thread *thread, *null_thread;
     515         215 :         size_t size = SPDK_ALIGN_CEIL(sizeof(*thread) + g_ctx_sz, SPDK_CACHE_LINE_SIZE);
     516         215 :         struct spdk_msg *msgs[SPDK_MSG_MEMPOOL_CACHE_SIZE];
     517         215 :         int rc = 0, i;
     518             : 
     519             :         /* Since this spdk_thread object will be used by another core, ensure that it won't share a
     520             :          * cache line with any other object allocated on this core */
     521         215 :         rc = posix_memalign((void **)&thread, SPDK_CACHE_LINE_SIZE, size);
     522         215 :         if (rc != 0) {
     523           0 :                 SPDK_ERRLOG("Unable to allocate memory for thread\n");
     524           0 :                 return NULL;
     525             :         }
     526         215 :         memset(thread, 0, size);
     527             : 
     528         215 :         if (cpumask) {
     529          23 :                 spdk_cpuset_copy(&thread->cpumask, cpumask);
     530             :         } else {
     531         192 :                 spdk_cpuset_negate(&thread->cpumask);
     532             :         }
     533             : 
     534         215 :         RB_INIT(&thread->io_channels);
     535         215 :         TAILQ_INIT(&thread->active_pollers);
     536         215 :         RB_INIT(&thread->timed_pollers);
     537         215 :         TAILQ_INIT(&thread->paused_pollers);
     538         215 :         SLIST_INIT(&thread->msg_cache);
     539         215 :         thread->msg_cache_count = 0;
     540             : 
     541         215 :         thread->tsc_last = spdk_get_ticks();
     542             : 
     543             :         /* Monotonic increasing ID is set to each created poller beginning at 1. Once the
     544             :          * ID exceeds UINT64_MAX a warning message is logged
     545             :          */
     546         215 :         thread->next_poller_id = 1;
     547             : 
     548         215 :         thread->messages = spdk_ring_create(SPDK_RING_TYPE_MP_SC, 65536, SPDK_ENV_NUMA_ID_ANY);
     549         215 :         if (!thread->messages) {
     550           0 :                 SPDK_ERRLOG("Unable to allocate memory for message ring\n");
     551           0 :                 free(thread);
     552           0 :                 return NULL;
     553             :         }
     554             : 
     555             :         /* Fill the local message pool cache. */
     556         215 :         rc = spdk_mempool_get_bulk(g_spdk_msg_mempool, (void **)msgs, SPDK_MSG_MEMPOOL_CACHE_SIZE);
     557         215 :         if (rc == 0) {
     558             :                 /* If we can't populate the cache it's ok. The cache will get filled
     559             :                  * up organically as messages are passed to the thread. */
     560      220375 :                 for (i = 0; i < SPDK_MSG_MEMPOOL_CACHE_SIZE; i++) {
     561      220160 :                         SLIST_INSERT_HEAD(&thread->msg_cache, msgs[i], link);
     562      220160 :                         thread->msg_cache_count++;
     563             :                 }
     564             :         }
     565             : 
     566         215 :         if (name) {
     567          42 :                 snprintf(thread->name, sizeof(thread->name), "%s", name);
     568             :         } else {
     569         173 :                 snprintf(thread->name, sizeof(thread->name), "%p", thread);
     570             :         }
     571             : 
     572         215 :         thread->trace_id = spdk_trace_register_owner(OWNER_TYPE_THREAD, thread->name);
     573             : 
     574         215 :         pthread_mutex_lock(&g_devlist_mutex);
     575         215 :         if (g_thread_id == 0) {
     576           0 :                 SPDK_ERRLOG("Thread ID rolled over. Further thread creation is not allowed.\n");
     577           0 :                 pthread_mutex_unlock(&g_devlist_mutex);
     578           0 :                 _free_thread(thread);
     579           0 :                 return NULL;
     580             :         }
     581         215 :         thread->id = g_thread_id++;
     582         215 :         TAILQ_INSERT_TAIL(&g_threads, thread, tailq);
     583         215 :         g_thread_count++;
     584         215 :         pthread_mutex_unlock(&g_devlist_mutex);
     585             : 
     586         215 :         SPDK_DEBUGLOG(thread, "Allocating new thread (%" PRIu64 ", %s)\n",
     587             :                       thread->id, thread->name);
     588             : 
     589         215 :         if (spdk_interrupt_mode_is_enabled()) {
     590           0 :                 thread->in_interrupt = true;
     591           0 :                 rc = thread_interrupt_create(thread);
     592           0 :                 if (rc != 0) {
     593           0 :                         _free_thread(thread);
     594           0 :                         return NULL;
     595             :                 }
     596             :         }
     597             : 
     598         215 :         if (g_new_thread_fn) {
     599           5 :                 rc = g_new_thread_fn(thread);
     600         210 :         } else if (g_thread_op_supported_fn && g_thread_op_supported_fn(SPDK_THREAD_OP_NEW)) {
     601          16 :                 rc = g_thread_op_fn(thread, SPDK_THREAD_OP_NEW);
     602             :         }
     603             : 
     604         215 :         if (rc != 0) {
     605           2 :                 _free_thread(thread);
     606           2 :                 return NULL;
     607             :         }
     608             : 
     609         213 :         thread->state = SPDK_THREAD_STATE_RUNNING;
     610             : 
     611             :         /* If this is the first thread, save it as the app thread.  Use an atomic
     612             :          * compare + exchange to guard against crazy users who might try to
     613             :          * call spdk_thread_create() simultaneously on multiple threads.
     614             :          */
     615         213 :         null_thread = NULL;
     616         213 :         __atomic_compare_exchange_n(&g_app_thread, &null_thread, thread, false,
     617             :                                     __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
     618             : 
     619         213 :         return thread;
     620             : }
     621             : 
     622             : struct spdk_thread *
     623        1619 : spdk_thread_get_app_thread(void)
     624             : {
     625        1619 :         return g_app_thread;
     626             : }
     627             : 
     628             : bool
     629         155 : spdk_thread_is_app_thread(struct spdk_thread *thread)
     630             : {
     631         155 :         if (thread == NULL) {
     632         151 :                 thread = _get_thread();
     633             :         }
     634             : 
     635         155 :         return g_app_thread == thread;
     636             : }
     637             : 
     638             : void
     639           1 : spdk_thread_bind(struct spdk_thread *thread, bool bind)
     640             : {
     641           1 :         thread->is_bound = bind;
     642           1 : }
     643             : 
     644             : bool
     645          10 : spdk_thread_is_bound(struct spdk_thread *thread)
     646             : {
     647          10 :         return thread->is_bound;
     648             : }
     649             : 
     650             : void
     651      466764 : spdk_set_thread(struct spdk_thread *thread)
     652             : {
     653      466764 :         tls_thread = thread;
     654      466764 : }
     655             : 
     656             : static void
     657         216 : thread_exit(struct spdk_thread *thread, uint64_t now)
     658             : {
     659             :         struct spdk_poller *poller;
     660             :         struct spdk_io_channel *ch;
     661             : 
     662         216 :         if (now >= thread->exit_timeout_tsc) {
     663           1 :                 SPDK_ERRLOG("thread %s got timeout, and move it to the exited state forcefully\n",
     664             :                             thread->name);
     665           1 :                 goto exited;
     666             :         }
     667             : 
     668         215 :         if (spdk_ring_count(thread->messages) > 0) {
     669           2 :                 SPDK_INFOLOG(thread, "thread %s still has messages\n", thread->name);
     670           2 :                 return;
     671             :         }
     672             : 
     673         213 :         if (thread->for_each_count > 0) {
     674           4 :                 SPDK_INFOLOG(thread, "thread %s is still executing %u for_each_channels/threads\n",
     675             :                              thread->name, thread->for_each_count);
     676           4 :                 return;
     677             :         }
     678             : 
     679         209 :         TAILQ_FOREACH(poller, &thread->active_pollers, tailq) {
     680           2 :                 if (poller->state != SPDK_POLLER_STATE_UNREGISTERED) {
     681           2 :                         SPDK_INFOLOG(thread,
     682             :                                      "thread %s still has active poller %s\n",
     683             :                                      thread->name, poller->name);
     684           2 :                         return;
     685             :                 }
     686             :         }
     687             : 
     688         254 :         RB_FOREACH(poller, timed_pollers_tree, &thread->timed_pollers) {
     689          47 :                 if (poller->state != SPDK_POLLER_STATE_UNREGISTERED) {
     690           0 :                         SPDK_INFOLOG(thread,
     691             :                                      "thread %s still has active timed poller %s\n",
     692             :                                      thread->name, poller->name);
     693           0 :                         return;
     694             :                 }
     695             :         }
     696             : 
     697         207 :         TAILQ_FOREACH(poller, &thread->paused_pollers, tailq) {
     698           0 :                 SPDK_INFOLOG(thread,
     699             :                              "thread %s still has paused poller %s\n",
     700             :                              thread->name, poller->name);
     701           0 :                 return;
     702             :         }
     703             : 
     704         207 :         RB_FOREACH(ch, io_channel_tree, &thread->io_channels) {
     705           6 :                 SPDK_INFOLOG(thread,
     706             :                              "thread %s still has channel for io_device %s\n",
     707             :                              thread->name, ch->dev->name);
     708           6 :                 return;
     709             :         }
     710             : 
     711         201 :         if (thread->pending_unregister_count > 0) {
     712           2 :                 SPDK_INFOLOG(thread,
     713             :                              "thread %s is still unregistering io_devices\n",
     714             :                              thread->name);
     715           2 :                 return;
     716             :         }
     717             : 
     718         199 : exited:
     719         200 :         thread->state = SPDK_THREAD_STATE_EXITED;
     720         200 :         if (spdk_unlikely(thread->in_interrupt)) {
     721           0 :                 g_thread_op_fn(thread, SPDK_THREAD_OP_RESCHED);
     722             :         }
     723             : }
     724             : 
     725             : static void _thread_exit(void *ctx);
     726             : 
     727             : int
     728         211 : spdk_thread_exit(struct spdk_thread *thread)
     729             : {
     730         211 :         SPDK_DEBUGLOG(thread, "Exit thread %s\n", thread->name);
     731             : 
     732         211 :         assert(tls_thread == thread);
     733             : 
     734         211 :         if (thread->state >= SPDK_THREAD_STATE_EXITING) {
     735           9 :                 SPDK_INFOLOG(thread,
     736             :                              "thread %s is already exiting\n",
     737             :                              thread->name);
     738           9 :                 return 0;
     739             :         }
     740             : 
     741         202 :         thread->exit_timeout_tsc = spdk_get_ticks() + (spdk_get_ticks_hz() *
     742             :                                    SPDK_THREAD_EXIT_TIMEOUT_SEC);
     743         202 :         thread->state = SPDK_THREAD_STATE_EXITING;
     744             : 
     745         202 :         if (spdk_interrupt_mode_is_enabled()) {
     746           0 :                 spdk_thread_send_msg(thread, _thread_exit, thread);
     747             :         }
     748             : 
     749         202 :         return 0;
     750             : }
     751             : 
     752             : bool
     753           0 : spdk_thread_is_running(struct spdk_thread *thread)
     754             : {
     755           0 :         return thread->state == SPDK_THREAD_STATE_RUNNING;
     756             : }
     757             : 
     758             : bool
     759         401 : spdk_thread_is_exited(struct spdk_thread *thread)
     760             : {
     761         401 :         return thread->state == SPDK_THREAD_STATE_EXITED;
     762             : }
     763             : 
     764             : void
     765         181 : spdk_thread_destroy(struct spdk_thread *thread)
     766             : {
     767         181 :         assert(thread != NULL);
     768         181 :         SPDK_DEBUGLOG(thread, "Destroy thread %s\n", thread->name);
     769             : 
     770         181 :         assert(thread->state == SPDK_THREAD_STATE_EXITED);
     771             : 
     772         181 :         if (tls_thread == thread) {
     773         174 :                 tls_thread = NULL;
     774             :         }
     775             : 
     776             :         /* To be safe, do not free the app thread until spdk_thread_lib_fini(). */
     777         181 :         if (thread != g_app_thread) {
     778          85 :                 _free_thread(thread);
     779             :         }
     780         181 : }
     781             : 
     782             : void *
     783          49 : spdk_thread_get_ctx(struct spdk_thread *thread)
     784             : {
     785          49 :         if (g_ctx_sz > 0) {
     786          49 :                 return thread->ctx;
     787             :         }
     788             : 
     789           0 :         return NULL;
     790             : }
     791             : 
     792             : struct spdk_cpuset *
     793          46 : spdk_thread_get_cpumask(struct spdk_thread *thread)
     794             : {
     795          46 :         return &thread->cpumask;
     796             : }
     797             : 
     798             : int
     799           2 : spdk_thread_set_cpumask(struct spdk_cpuset *cpumask)
     800             : {
     801             :         struct spdk_thread *thread;
     802             : 
     803           2 :         if (!g_thread_op_supported_fn || !g_thread_op_supported_fn(SPDK_THREAD_OP_RESCHED)) {
     804           0 :                 SPDK_ERRLOG("Framework does not support reschedule operation.\n");
     805           0 :                 assert(false);
     806             :                 return -ENOTSUP;
     807             :         }
     808             : 
     809           2 :         thread = spdk_get_thread();
     810           2 :         if (!thread) {
     811           0 :                 SPDK_ERRLOG("Called from non-SPDK thread\n");
     812           0 :                 assert(false);
     813             :                 return -EINVAL;
     814             :         }
     815             : 
     816           2 :         spdk_cpuset_copy(&thread->cpumask, cpumask);
     817             : 
     818             :         /* Invoke framework's reschedule operation. If this function is called multiple times
     819             :          * in a single spdk_thread_poll() context, the last cpumask will be used in the
     820             :          * reschedule operation.
     821             :          */
     822           2 :         g_thread_op_fn(thread, SPDK_THREAD_OP_RESCHED);
     823             : 
     824           2 :         return 0;
     825             : }
     826             : 
     827             : struct spdk_thread *
     828         191 : spdk_thread_get_from_ctx(void *ctx)
     829             : {
     830         191 :         if (ctx == NULL) {
     831           0 :                 assert(false);
     832             :                 return NULL;
     833             :         }
     834             : 
     835         191 :         assert(g_ctx_sz > 0);
     836             : 
     837         191 :         return SPDK_CONTAINEROF(ctx, struct spdk_thread, ctx);
     838             : }
     839             : 
     840             : static inline uint32_t
     841      421403 : msg_queue_run_batch(struct spdk_thread *thread, uint32_t max_msgs)
     842             : {
     843             :         unsigned count, i;
     844      421403 :         void *messages[SPDK_MSG_BATCH_SIZE];
     845      421403 :         uint64_t notify = 1;
     846             :         int rc;
     847             : 
     848             : #ifdef DEBUG
     849             :         /*
     850             :          * spdk_ring_dequeue() fills messages and returns how many entries it wrote,
     851             :          * so we will never actually read uninitialized data from events, but just to be sure
     852             :          * (and to silence a static analyzer false positive), initialize the array to NULL pointers.
     853             :          */
     854      421403 :         memset(messages, 0, sizeof(messages));
     855             : #endif
     856             : 
     857      421403 :         if (max_msgs > 0) {
     858         365 :                 max_msgs = spdk_min(max_msgs, SPDK_MSG_BATCH_SIZE);
     859             :         } else {
     860      421038 :                 max_msgs = SPDK_MSG_BATCH_SIZE;
     861             :         }
     862             : 
     863      421403 :         count = spdk_ring_dequeue(thread->messages, messages, max_msgs);
     864      421403 :         if (spdk_unlikely(thread->in_interrupt) &&
     865           0 :             spdk_ring_count(thread->messages) != 0) {
     866           0 :                 rc = write(thread->msg_fd, &notify, sizeof(notify));
     867           0 :                 if (rc < 0) {
     868           0 :                         SPDK_ERRLOG("failed to notify msg_queue: %s.\n", spdk_strerror(errno));
     869             :                 }
     870             :         }
     871      421403 :         if (count == 0) {
     872      260849 :                 return 0;
     873             :         }
     874             : 
     875      326465 :         for (i = 0; i < count; i++) {
     876      165911 :                 struct spdk_msg *msg = messages[i];
     877             : 
     878      165911 :                 assert(msg != NULL);
     879             : 
     880             :                 SPDK_DTRACE_PROBE2(msg_exec, msg->fn, msg->arg);
     881             : 
     882      165911 :                 msg->fn(msg->arg);
     883             : 
     884      165911 :                 SPIN_ASSERT(thread->lock_count == 0, SPIN_ERR_HOLD_DURING_SWITCH);
     885             : 
     886      165911 :                 if (thread->msg_cache_count < SPDK_MSG_MEMPOOL_CACHE_SIZE) {
     887             :                         /* Insert the messages at the head. We want to re-use the hot
     888             :                          * ones. */
     889      165772 :                         SLIST_INSERT_HEAD(&thread->msg_cache, msg, link);
     890      165772 :                         thread->msg_cache_count++;
     891             :                 } else {
     892         139 :                         spdk_mempool_put(g_spdk_msg_mempool, msg);
     893             :                 }
     894             :         }
     895             : 
     896      160554 :         return count;
     897             : }
     898             : 
     899             : static void
     900         546 : poller_insert_timer(struct spdk_thread *thread, struct spdk_poller *poller, uint64_t now)
     901             : {
     902             :         struct spdk_poller *tmp __attribute__((unused));
     903             : 
     904         546 :         poller->next_run_tick = now + poller->period_ticks;
     905             : 
     906             :         /*
     907             :          * Insert poller in the thread's timed_pollers tree by next scheduled run time
     908             :          * as its key.
     909             :          */
     910         546 :         tmp = RB_INSERT(timed_pollers_tree, &thread->timed_pollers, poller);
     911         546 :         assert(tmp == NULL);
     912             : 
     913             :         /* Update the cache only if it is empty or the inserted poller is earlier than it.
     914             :          * RB_MIN() is not necessary here because all pollers, which has exactly the same
     915             :          * next_run_tick as the existing poller, are inserted on the right side.
     916             :          */
     917         546 :         if (thread->first_timed_poller == NULL ||
     918         458 :             poller->next_run_tick < thread->first_timed_poller->next_run_tick) {
     919         254 :                 thread->first_timed_poller = poller;
     920             :         }
     921         546 : }
     922             : 
     923             : static inline void
     924           0 : poller_remove_timer(struct spdk_thread *thread, struct spdk_poller *poller)
     925             : {
     926             :         struct spdk_poller *tmp __attribute__((unused));
     927             : 
     928           0 :         tmp = RB_REMOVE(timed_pollers_tree, &thread->timed_pollers, poller);
     929           0 :         assert(tmp != NULL);
     930             : 
     931             :         /* This function is not used in any case that is performance critical.
     932             :          * Update the cache simply by RB_MIN() if it needs to be changed.
     933             :          */
     934           0 :         if (thread->first_timed_poller == poller) {
     935           0 :                 thread->first_timed_poller = RB_MIN(timed_pollers_tree, &thread->timed_pollers);
     936             :         }
     937           0 : }
     938             : 
     939             : static void
     940         895 : thread_insert_poller(struct spdk_thread *thread, struct spdk_poller *poller)
     941             : {
     942         895 :         if (poller->period_ticks) {
     943         367 :                 poller_insert_timer(thread, poller, spdk_get_ticks());
     944             :         } else {
     945         528 :                 TAILQ_INSERT_TAIL(&thread->active_pollers, poller, tailq);
     946             :         }
     947         895 : }
     948             : 
     949             : static inline void
     950      421403 : thread_update_stats(struct spdk_thread *thread, uint64_t end,
     951             :                     uint64_t start, int rc)
     952             : {
     953      421403 :         if (rc == 0) {
     954             :                 /* Poller status idle */
     955      260301 :                 thread->stats.idle_tsc += end - start;
     956      161102 :         } else if (rc > 0) {
     957             :                 /* Poller status busy */
     958      161102 :                 thread->stats.busy_tsc += end - start;
     959             :         }
     960             :         /* Store end time to use it as start time of the next spdk_thread_poll(). */
     961      421403 :         thread->tsc_last = end;
     962      421403 : }
     963             : 
     964             : static inline int
     965        4389 : thread_execute_poller(struct spdk_thread *thread, struct spdk_poller *poller)
     966             : {
     967             :         int rc;
     968             : 
     969        4389 :         switch (poller->state) {
     970         108 :         case SPDK_POLLER_STATE_UNREGISTERED:
     971         108 :                 TAILQ_REMOVE(&thread->active_pollers, poller, tailq);
     972         108 :                 free(poller);
     973         108 :                 return 0;
     974           5 :         case SPDK_POLLER_STATE_PAUSING:
     975           5 :                 TAILQ_REMOVE(&thread->active_pollers, poller, tailq);
     976           5 :                 TAILQ_INSERT_TAIL(&thread->paused_pollers, poller, tailq);
     977           5 :                 poller->state = SPDK_POLLER_STATE_PAUSED;
     978           5 :                 return 0;
     979        4276 :         case SPDK_POLLER_STATE_WAITING:
     980        4276 :                 break;
     981           0 :         default:
     982           0 :                 assert(false);
     983             :                 break;
     984             :         }
     985             : 
     986        4276 :         poller->state = SPDK_POLLER_STATE_RUNNING;
     987        4276 :         rc = poller->fn(poller->arg);
     988             : 
     989        4276 :         SPIN_ASSERT(thread->lock_count == 0, SPIN_ERR_HOLD_DURING_SWITCH);
     990             : 
     991        4276 :         poller->run_count++;
     992        4276 :         if (rc > 0) {
     993         576 :                 poller->busy_count++;
     994             :         }
     995             : 
     996             : #ifdef DEBUG
     997        4276 :         if (rc == -1) {
     998          13 :                 SPDK_DEBUGLOG(thread, "Poller %s returned -1\n", poller->name);
     999             :         }
    1000             : #endif
    1001             : 
    1002        4276 :         switch (poller->state) {
    1003         415 :         case SPDK_POLLER_STATE_UNREGISTERED:
    1004         415 :                 TAILQ_REMOVE(&thread->active_pollers, poller, tailq);
    1005         415 :                 free(poller);
    1006         415 :                 break;
    1007           7 :         case SPDK_POLLER_STATE_PAUSING:
    1008           7 :                 TAILQ_REMOVE(&thread->active_pollers, poller, tailq);
    1009           7 :                 TAILQ_INSERT_TAIL(&thread->paused_pollers, poller, tailq);
    1010           7 :                 poller->state = SPDK_POLLER_STATE_PAUSED;
    1011           7 :                 break;
    1012           0 :         case SPDK_POLLER_STATE_PAUSED:
    1013             :         case SPDK_POLLER_STATE_WAITING:
    1014           0 :                 break;
    1015        3854 :         case SPDK_POLLER_STATE_RUNNING:
    1016        3854 :                 poller->state = SPDK_POLLER_STATE_WAITING;
    1017        3854 :                 break;
    1018           0 :         default:
    1019           0 :                 assert(false);
    1020             :                 break;
    1021             :         }
    1022             : 
    1023        4276 :         return rc;
    1024             : }
    1025             : 
    1026             : static inline int
    1027         499 : thread_execute_timed_poller(struct spdk_thread *thread, struct spdk_poller *poller,
    1028             :                             uint64_t now)
    1029             : {
    1030             :         int rc;
    1031             : 
    1032         499 :         switch (poller->state) {
    1033         156 :         case SPDK_POLLER_STATE_UNREGISTERED:
    1034         156 :                 free(poller);
    1035         156 :                 return 0;
    1036          13 :         case SPDK_POLLER_STATE_PAUSING:
    1037          13 :                 TAILQ_INSERT_TAIL(&thread->paused_pollers, poller, tailq);
    1038          13 :                 poller->state = SPDK_POLLER_STATE_PAUSED;
    1039          13 :                 return 0;
    1040         330 :         case SPDK_POLLER_STATE_WAITING:
    1041         330 :                 break;
    1042           0 :         default:
    1043           0 :                 assert(false);
    1044             :                 break;
    1045             :         }
    1046             : 
    1047         330 :         poller->state = SPDK_POLLER_STATE_RUNNING;
    1048         330 :         rc = poller->fn(poller->arg);
    1049             : 
    1050         330 :         SPIN_ASSERT(thread->lock_count == 0, SPIN_ERR_HOLD_DURING_SWITCH);
    1051             : 
    1052         330 :         poller->run_count++;
    1053         330 :         if (rc > 0) {
    1054         237 :                 poller->busy_count++;
    1055             :         }
    1056             : 
    1057             : #ifdef DEBUG
    1058         330 :         if (rc == -1) {
    1059           5 :                 SPDK_DEBUGLOG(thread, "Timed poller %s returned -1\n", poller->name);
    1060             :         }
    1061             : #endif
    1062             : 
    1063         330 :         switch (poller->state) {
    1064         147 :         case SPDK_POLLER_STATE_UNREGISTERED:
    1065         147 :                 free(poller);
    1066         147 :                 break;
    1067           4 :         case SPDK_POLLER_STATE_PAUSING:
    1068           4 :                 TAILQ_INSERT_TAIL(&thread->paused_pollers, poller, tailq);
    1069           4 :                 poller->state = SPDK_POLLER_STATE_PAUSED;
    1070           4 :                 break;
    1071           0 :         case SPDK_POLLER_STATE_PAUSED:
    1072           0 :                 break;
    1073         179 :         case SPDK_POLLER_STATE_RUNNING:
    1074         179 :                 poller->state = SPDK_POLLER_STATE_WAITING;
    1075             :         /* fallthrough */
    1076         179 :         case SPDK_POLLER_STATE_WAITING:
    1077         179 :                 poller_insert_timer(thread, poller, now);
    1078         179 :                 break;
    1079           0 :         default:
    1080           0 :                 assert(false);
    1081             :                 break;
    1082             :         }
    1083             : 
    1084         330 :         return rc;
    1085             : }
    1086             : 
    1087             : static inline void
    1088           0 : thread_run_pp_handlers(struct spdk_thread *thread)
    1089             : {
    1090           0 :         uint8_t i, count = thread->num_pp_handlers;
    1091             : 
    1092             :         /* Set to max value to prevent new handlers registration within the callback */
    1093           0 :         thread->num_pp_handlers = SPDK_THREAD_MAX_POST_POLLER_HANDLERS;
    1094             : 
    1095           0 :         for (i = 0; i < count; i++) {
    1096           0 :                 thread->pp_handlers[i].fn(thread->pp_handlers[i].fn_arg);
    1097           0 :                 thread->pp_handlers[i].fn = NULL;
    1098             :         }
    1099             : 
    1100           0 :         thread->num_pp_handlers = 0;
    1101           0 : }
    1102             : 
    1103             : static int
    1104      421403 : thread_poll(struct spdk_thread *thread, uint32_t max_msgs, uint64_t now)
    1105             : {
    1106             :         uint32_t msg_count;
    1107             :         struct spdk_poller *poller, *tmp;
    1108             :         spdk_msg_fn critical_msg;
    1109      421403 :         int rc = 0;
    1110             : 
    1111      421403 :         thread->tsc_last = now;
    1112             : 
    1113      421403 :         critical_msg = thread->critical_msg;
    1114      421403 :         if (spdk_unlikely(critical_msg != NULL)) {
    1115           0 :                 critical_msg(NULL);
    1116           0 :                 thread->critical_msg = NULL;
    1117           0 :                 rc = 1;
    1118             :         }
    1119             : 
    1120      421403 :         msg_count = msg_queue_run_batch(thread, max_msgs);
    1121      421403 :         if (msg_count) {
    1122      160554 :                 rc = 1;
    1123             :         }
    1124             : 
    1125      425792 :         TAILQ_FOREACH_REVERSE_SAFE(poller, &thread->active_pollers,
    1126             :                                    active_pollers_head, tailq, tmp) {
    1127             :                 int poller_rc;
    1128             : 
    1129        4389 :                 poller_rc = thread_execute_poller(thread, poller);
    1130        4389 :                 if (poller_rc > rc) {
    1131         350 :                         rc = poller_rc;
    1132             :                 }
    1133        4389 :                 if (thread->num_pp_handlers) {
    1134           0 :                         thread_run_pp_handlers(thread);
    1135             :                 }
    1136             :         }
    1137             : 
    1138      421403 :         poller = thread->first_timed_poller;
    1139      421902 :         while (poller != NULL) {
    1140        3409 :                 int timer_rc = 0;
    1141             : 
    1142        3409 :                 if (now < poller->next_run_tick) {
    1143        2910 :                         break;
    1144             :                 }
    1145             : 
    1146         499 :                 tmp = RB_NEXT(timed_pollers_tree, &thread->timed_pollers, poller);
    1147         499 :                 RB_REMOVE(timed_pollers_tree, &thread->timed_pollers, poller);
    1148             : 
    1149             :                 /* Update the cache to the next timed poller in the list
    1150             :                  * only if the current poller is still the closest, otherwise,
    1151             :                  * do nothing because the cache has been already updated.
    1152             :                  */
    1153         499 :                 if (thread->first_timed_poller == poller) {
    1154         499 :                         thread->first_timed_poller = tmp;
    1155             :                 }
    1156             : 
    1157         499 :                 timer_rc = thread_execute_timed_poller(thread, poller, now);
    1158         499 :                 if (timer_rc > rc) {
    1159         198 :                         rc = timer_rc;
    1160             :                 }
    1161             : 
    1162         499 :                 poller = tmp;
    1163             :         }
    1164             : 
    1165      421403 :         return rc;
    1166             : }
    1167             : 
    1168             : static void
    1169           0 : _thread_remove_pollers(void *ctx)
    1170             : {
    1171           0 :         struct spdk_thread *thread = ctx;
    1172             :         struct spdk_poller *poller, *tmp;
    1173             : 
    1174           0 :         TAILQ_FOREACH_REVERSE_SAFE(poller, &thread->active_pollers,
    1175             :                                    active_pollers_head, tailq, tmp) {
    1176           0 :                 if (poller->state == SPDK_POLLER_STATE_UNREGISTERED) {
    1177           0 :                         TAILQ_REMOVE(&thread->active_pollers, poller, tailq);
    1178           0 :                         free(poller);
    1179             :                 }
    1180             :         }
    1181             : 
    1182           0 :         RB_FOREACH_SAFE(poller, timed_pollers_tree, &thread->timed_pollers, tmp) {
    1183           0 :                 if (poller->state == SPDK_POLLER_STATE_UNREGISTERED) {
    1184           0 :                         poller_remove_timer(thread, poller);
    1185           0 :                         free(poller);
    1186             :                 }
    1187             :         }
    1188             : 
    1189           0 :         thread->poller_unregistered = false;
    1190           0 : }
    1191             : 
    1192             : static void
    1193           0 : _thread_exit(void *ctx)
    1194             : {
    1195           0 :         struct spdk_thread *thread = ctx;
    1196             : 
    1197           0 :         assert(thread->state == SPDK_THREAD_STATE_EXITING);
    1198             : 
    1199           0 :         thread_exit(thread, spdk_get_ticks());
    1200             : 
    1201           0 :         if (thread->state != SPDK_THREAD_STATE_EXITED) {
    1202           0 :                 spdk_thread_send_msg(thread, _thread_exit, thread);
    1203             :         }
    1204           0 : }
    1205             : 
    1206             : int
    1207      421403 : spdk_thread_poll(struct spdk_thread *thread, uint32_t max_msgs, uint64_t now)
    1208             : {
    1209             :         struct spdk_thread *orig_thread;
    1210             :         int rc;
    1211             : 
    1212      421403 :         orig_thread = _get_thread();
    1213      421403 :         tls_thread = thread;
    1214             : 
    1215      421403 :         if (now == 0) {
    1216      414222 :                 now = spdk_get_ticks();
    1217             :         }
    1218             : 
    1219      421403 :         if (spdk_likely(!thread->in_interrupt)) {
    1220      421403 :                 rc = thread_poll(thread, max_msgs, now);
    1221      421403 :                 if (spdk_unlikely(thread->in_interrupt)) {
    1222             :                         /* The thread transitioned to interrupt mode during the above poll.
    1223             :                          * Poll it one more time in case that during the transition time
    1224             :                          * there is msg received without notification.
    1225             :                          */
    1226           0 :                         rc = thread_poll(thread, max_msgs, now);
    1227             :                 }
    1228             : 
    1229      421403 :                 if (spdk_unlikely(thread->state == SPDK_THREAD_STATE_EXITING)) {
    1230         216 :                         thread_exit(thread, now);
    1231             :                 }
    1232             :         } else {
    1233             :                 /* Non-block wait on thread's fd_group */
    1234           0 :                 rc = spdk_fd_group_wait(thread->fgrp, 0);
    1235             :         }
    1236             : 
    1237      421403 :         thread_update_stats(thread, spdk_get_ticks(), now, rc);
    1238             : 
    1239      421403 :         tls_thread = orig_thread;
    1240             : 
    1241      421403 :         return rc;
    1242             : }
    1243             : 
    1244             : uint64_t
    1245           0 : spdk_thread_next_poller_expiration(struct spdk_thread *thread)
    1246             : {
    1247             :         struct spdk_poller *poller;
    1248             : 
    1249           0 :         poller = thread->first_timed_poller;
    1250           0 :         if (poller) {
    1251           0 :                 return poller->next_run_tick;
    1252             :         }
    1253             : 
    1254           0 :         return 0;
    1255             : }
    1256             : 
    1257             : int
    1258           0 : spdk_thread_has_active_pollers(struct spdk_thread *thread)
    1259             : {
    1260           0 :         return !TAILQ_EMPTY(&thread->active_pollers);
    1261             : }
    1262             : 
    1263             : static bool
    1264          25 : thread_has_unpaused_pollers(struct spdk_thread *thread)
    1265             : {
    1266          25 :         if (TAILQ_EMPTY(&thread->active_pollers) &&
    1267          25 :             RB_EMPTY(&thread->timed_pollers)) {
    1268          25 :                 return false;
    1269             :         }
    1270             : 
    1271           0 :         return true;
    1272             : }
    1273             : 
    1274             : bool
    1275           2 : spdk_thread_has_pollers(struct spdk_thread *thread)
    1276             : {
    1277           2 :         if (!thread_has_unpaused_pollers(thread) &&
    1278           2 :             TAILQ_EMPTY(&thread->paused_pollers)) {
    1279           2 :                 return false;
    1280             :         }
    1281             : 
    1282           0 :         return true;
    1283             : }
    1284             : 
    1285             : bool
    1286          23 : spdk_thread_is_idle(struct spdk_thread *thread)
    1287             : {
    1288          46 :         if (spdk_ring_count(thread->messages) ||
    1289          23 :             thread_has_unpaused_pollers(thread) ||
    1290          23 :             thread->critical_msg != NULL) {
    1291           0 :                 return false;
    1292             :         }
    1293             : 
    1294          23 :         return true;
    1295             : }
    1296             : 
    1297             : uint32_t
    1298          17 : spdk_thread_get_count(void)
    1299             : {
    1300             :         /*
    1301             :          * Return cached value of the current thread count.  We could acquire the
    1302             :          *  lock and iterate through the TAILQ of threads to count them, but that
    1303             :          *  count could still be invalidated after we release the lock.
    1304             :          */
    1305          17 :         return g_thread_count;
    1306             : }
    1307             : 
    1308             : struct spdk_thread *
    1309      348685 : spdk_get_thread(void)
    1310             : {
    1311      348685 :         return _get_thread();
    1312             : }
    1313             : 
    1314             : const char *
    1315           4 : spdk_thread_get_name(const struct spdk_thread *thread)
    1316             : {
    1317           4 :         return thread->name;
    1318             : }
    1319             : 
    1320             : uint64_t
    1321          15 : spdk_thread_get_id(const struct spdk_thread *thread)
    1322             : {
    1323          15 :         return thread->id;
    1324             : }
    1325             : 
    1326             : struct spdk_thread *
    1327          13 : spdk_thread_get_by_id(uint64_t id)
    1328             : {
    1329             :         struct spdk_thread *thread;
    1330             : 
    1331          13 :         if (id == 0 || id >= g_thread_id) {
    1332           0 :                 SPDK_ERRLOG("invalid thread id: %" PRIu64 ".\n", id);
    1333           0 :                 return NULL;
    1334             :         }
    1335          13 :         pthread_mutex_lock(&g_devlist_mutex);
    1336          28 :         TAILQ_FOREACH(thread, &g_threads, tailq) {
    1337          28 :                 if (thread->id == id) {
    1338          13 :                         break;
    1339             :                 }
    1340             :         }
    1341          13 :         pthread_mutex_unlock(&g_devlist_mutex);
    1342          13 :         return thread;
    1343             : }
    1344             : 
    1345             : int
    1346          57 : spdk_thread_get_stats(struct spdk_thread_stats *stats)
    1347             : {
    1348             :         struct spdk_thread *thread;
    1349             : 
    1350          57 :         thread = _get_thread();
    1351          57 :         if (!thread) {
    1352           0 :                 SPDK_ERRLOG("No thread allocated\n");
    1353           0 :                 return -EINVAL;
    1354             :         }
    1355             : 
    1356          57 :         if (stats == NULL) {
    1357           0 :                 return -EINVAL;
    1358             :         }
    1359             : 
    1360          57 :         *stats = thread->stats;
    1361             : 
    1362          57 :         return 0;
    1363             : }
    1364             : 
    1365             : uint64_t
    1366      168056 : spdk_thread_get_last_tsc(struct spdk_thread *thread)
    1367             : {
    1368      168056 :         if (thread == NULL) {
    1369           0 :                 thread = _get_thread();
    1370             :         }
    1371             : 
    1372      168056 :         return thread->tsc_last;
    1373             : }
    1374             : 
    1375             : static inline int
    1376      165920 : thread_send_msg_notification(const struct spdk_thread *target_thread)
    1377             : {
    1378      165920 :         uint64_t notify = 1;
    1379             :         int rc;
    1380             : 
    1381             :         /* Not necessary to do notification if interrupt facility is not enabled */
    1382      165920 :         if (spdk_likely(!spdk_interrupt_mode_is_enabled())) {
    1383      165920 :                 return 0;
    1384             :         }
    1385             : 
    1386             :         /* When each spdk_thread can switch between poll and interrupt mode dynamically,
    1387             :          * after sending thread msg, it is necessary to check whether target thread runs in
    1388             :          * interrupt mode and then decide whether do event notification.
    1389             :          */
    1390           0 :         if (spdk_unlikely(target_thread->in_interrupt)) {
    1391           0 :                 rc = write(target_thread->msg_fd, &notify, sizeof(notify));
    1392           0 :                 if (rc < 0) {
    1393           0 :                         SPDK_ERRLOG("failed to notify msg_queue: %s.\n", spdk_strerror(errno));
    1394           0 :                         return -EIO;
    1395             :                 }
    1396             :         }
    1397             : 
    1398           0 :         return 0;
    1399             : }
    1400             : 
    1401             : int
    1402      165920 : spdk_thread_send_msg(const struct spdk_thread *thread, spdk_msg_fn fn, void *ctx)
    1403             : {
    1404             :         struct spdk_thread *local_thread;
    1405      165920 :         struct spdk_msg *msg;
    1406             :         int rc;
    1407             : 
    1408      165920 :         assert(thread != NULL);
    1409             : 
    1410      165920 :         if (spdk_unlikely(thread->state == SPDK_THREAD_STATE_EXITED)) {
    1411           0 :                 SPDK_ERRLOG("Thread %s is marked as exited.\n", thread->name);
    1412           0 :                 return -EIO;
    1413             :         }
    1414             : 
    1415      165920 :         local_thread = _get_thread();
    1416             : 
    1417      165920 :         msg = NULL;
    1418      165920 :         if (local_thread != NULL) {
    1419      165920 :                 if (local_thread->msg_cache_count > 0) {
    1420      165920 :                         msg = SLIST_FIRST(&local_thread->msg_cache);
    1421      165920 :                         assert(msg != NULL);
    1422      165920 :                         SLIST_REMOVE_HEAD(&local_thread->msg_cache, link);
    1423      165920 :                         local_thread->msg_cache_count--;
    1424             :                 }
    1425             :         }
    1426             : 
    1427      165920 :         if (msg == NULL) {
    1428           0 :                 msg = spdk_mempool_get(g_spdk_msg_mempool);
    1429           0 :                 if (!msg) {
    1430           0 :                         SPDK_ERRLOG("msg could not be allocated\n");
    1431           0 :                         return -ENOMEM;
    1432             :                 }
    1433             :         }
    1434             : 
    1435      165920 :         msg->fn = fn;
    1436      165920 :         msg->arg = ctx;
    1437             : 
    1438      165920 :         rc = spdk_ring_enqueue(thread->messages, (void **)&msg, 1, NULL);
    1439      165920 :         if (rc != 1) {
    1440           0 :                 SPDK_ERRLOG("msg could not be enqueued\n");
    1441           0 :                 spdk_mempool_put(g_spdk_msg_mempool, msg);
    1442           0 :                 return -EIO;
    1443             :         }
    1444             : 
    1445      165920 :         return thread_send_msg_notification(thread);
    1446             : }
    1447             : 
    1448             : int
    1449           0 : spdk_thread_send_critical_msg(struct spdk_thread *thread, spdk_msg_fn fn)
    1450             : {
    1451           0 :         spdk_msg_fn expected = NULL;
    1452             : 
    1453           0 :         if (!__atomic_compare_exchange_n(&thread->critical_msg, &expected, fn, false, __ATOMIC_SEQ_CST,
    1454             :                                          __ATOMIC_SEQ_CST)) {
    1455           0 :                 return -EIO;
    1456             :         }
    1457             : 
    1458           0 :         return thread_send_msg_notification(thread);
    1459             : }
    1460             : 
    1461             : #ifdef __linux__
    1462             : static int
    1463           0 : interrupt_timerfd_process(void *arg)
    1464             : {
    1465           0 :         struct spdk_poller *poller = arg;
    1466           0 :         uint64_t exp;
    1467             :         int rc;
    1468             : 
    1469             :         /* clear the level of interval timer */
    1470           0 :         rc = read(poller->intr->efd, &exp, sizeof(exp));
    1471           0 :         if (rc < 0) {
    1472           0 :                 if (rc == -EAGAIN) {
    1473           0 :                         return 0;
    1474             :                 }
    1475             : 
    1476           0 :                 return rc;
    1477             :         }
    1478             : 
    1479             :         SPDK_DTRACE_PROBE2(timerfd_exec, poller->fn, poller->arg);
    1480             : 
    1481           0 :         return poller->fn(poller->arg);
    1482             : }
    1483             : 
    1484             : static int
    1485           0 : period_poller_interrupt_init(struct spdk_poller *poller)
    1486             : {
    1487             :         int timerfd;
    1488             : 
    1489           0 :         SPDK_DEBUGLOG(thread, "timerfd init for periodic poller %s\n", poller->name);
    1490           0 :         timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC);
    1491           0 :         if (timerfd < 0) {
    1492           0 :                 return -errno;
    1493             :         }
    1494             : 
    1495           0 :         poller->intr = spdk_interrupt_register(timerfd, interrupt_timerfd_process, poller, poller->name);
    1496           0 :         if (poller->intr == NULL) {
    1497           0 :                 close(timerfd);
    1498           0 :                 return -1;
    1499             :         }
    1500             : 
    1501           0 :         return 0;
    1502             : }
    1503             : 
    1504             : static void
    1505           0 : period_poller_set_interrupt_mode(struct spdk_poller *poller, void *cb_arg, bool interrupt_mode)
    1506             : {
    1507             :         int timerfd;
    1508           0 :         uint64_t now_tick = spdk_get_ticks();
    1509           0 :         uint64_t ticks = spdk_get_ticks_hz();
    1510             :         int ret;
    1511           0 :         struct itimerspec new_tv = {};
    1512           0 :         struct itimerspec old_tv = {};
    1513             : 
    1514           0 :         assert(poller->intr != NULL);
    1515           0 :         assert(poller->period_ticks != 0);
    1516             : 
    1517           0 :         timerfd = poller->intr->efd;
    1518             : 
    1519           0 :         assert(timerfd >= 0);
    1520             : 
    1521           0 :         SPDK_DEBUGLOG(thread, "timerfd set poller %s into %s mode\n", poller->name,
    1522             :                       interrupt_mode ? "interrupt" : "poll");
    1523             : 
    1524           0 :         if (interrupt_mode) {
    1525             :                 /* Set repeated timer expiration */
    1526           0 :                 new_tv.it_interval.tv_sec = poller->period_ticks / ticks;
    1527           0 :                 new_tv.it_interval.tv_nsec = poller->period_ticks % ticks * SPDK_SEC_TO_NSEC / ticks;
    1528             : 
    1529             :                 /* Update next timer expiration */
    1530           0 :                 if (poller->next_run_tick == 0) {
    1531           0 :                         poller->next_run_tick = now_tick + poller->period_ticks;
    1532           0 :                 } else if (poller->next_run_tick < now_tick) {
    1533           0 :                         poller->next_run_tick = now_tick;
    1534             :                 }
    1535             : 
    1536           0 :                 new_tv.it_value.tv_sec = (poller->next_run_tick - now_tick) / ticks;
    1537           0 :                 new_tv.it_value.tv_nsec = (poller->next_run_tick - now_tick) % ticks * SPDK_SEC_TO_NSEC / ticks;
    1538             : 
    1539           0 :                 ret = timerfd_settime(timerfd, 0, &new_tv, NULL);
    1540           0 :                 if (ret < 0) {
    1541           0 :                         SPDK_ERRLOG("Failed to arm timerfd: error(%d)\n", errno);
    1542           0 :                         assert(false);
    1543             :                 }
    1544             :         } else {
    1545             :                 /* Disarm the timer */
    1546           0 :                 ret = timerfd_settime(timerfd, 0, &new_tv, &old_tv);
    1547           0 :                 if (ret < 0) {
    1548             :                         /* timerfd_settime's failure indicates that the timerfd is in error */
    1549           0 :                         SPDK_ERRLOG("Failed to disarm timerfd: error(%d)\n", errno);
    1550           0 :                         assert(false);
    1551             :                 }
    1552             : 
    1553             :                 /* In order to reuse poller_insert_timer, fix now_tick, so next_run_tick would be
    1554             :                  * now_tick + ticks * old_tv.it_value.tv_sec + (ticks * old_tv.it_value.tv_nsec) / SPDK_SEC_TO_NSEC
    1555             :                  */
    1556           0 :                 now_tick = now_tick - poller->period_ticks + ticks * old_tv.it_value.tv_sec + \
    1557           0 :                            (ticks * old_tv.it_value.tv_nsec) / SPDK_SEC_TO_NSEC;
    1558           0 :                 poller_remove_timer(poller->thread, poller);
    1559           0 :                 poller_insert_timer(poller->thread, poller, now_tick);
    1560             :         }
    1561           0 : }
    1562             : 
    1563             : static void
    1564           0 : poller_interrupt_fini(struct spdk_poller *poller)
    1565             : {
    1566             :         int fd;
    1567             : 
    1568           0 :         SPDK_DEBUGLOG(thread, "interrupt fini for poller %s\n", poller->name);
    1569           0 :         assert(poller->intr != NULL);
    1570           0 :         fd = poller->intr->efd;
    1571           0 :         spdk_interrupt_unregister(&poller->intr);
    1572           0 :         close(fd);
    1573           0 : }
    1574             : 
    1575             : static int
    1576           0 : busy_poller_interrupt_init(struct spdk_poller *poller)
    1577             : {
    1578             :         int busy_efd;
    1579             : 
    1580           0 :         SPDK_DEBUGLOG(thread, "busy_efd init for busy poller %s\n", poller->name);
    1581           0 :         busy_efd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
    1582           0 :         if (busy_efd < 0) {
    1583           0 :                 SPDK_ERRLOG("Failed to create eventfd for Poller(%s).\n", poller->name);
    1584           0 :                 return -errno;
    1585             :         }
    1586             : 
    1587           0 :         poller->intr = spdk_interrupt_register(busy_efd, poller->fn, poller->arg, poller->name);
    1588           0 :         if (poller->intr == NULL) {
    1589           0 :                 close(busy_efd);
    1590           0 :                 return -1;
    1591             :         }
    1592             : 
    1593           0 :         return 0;
    1594             : }
    1595             : 
    1596             : static void
    1597           0 : busy_poller_set_interrupt_mode(struct spdk_poller *poller, void *cb_arg, bool interrupt_mode)
    1598             : {
    1599           0 :         int busy_efd = poller->intr->efd;
    1600           0 :         uint64_t notify = 1;
    1601             :         int rc __attribute__((unused));
    1602             : 
    1603           0 :         assert(busy_efd >= 0);
    1604             : 
    1605           0 :         if (interrupt_mode) {
    1606             :                 /* Write without read on eventfd will get it repeatedly triggered. */
    1607           0 :                 if (write(busy_efd, &notify, sizeof(notify)) < 0) {
    1608           0 :                         SPDK_ERRLOG("Failed to set busy wait for Poller(%s).\n", poller->name);
    1609             :                 }
    1610             :         } else {
    1611             :                 /* Read on eventfd will clear its level triggering. */
    1612           0 :                 rc = read(busy_efd, &notify, sizeof(notify));
    1613             :         }
    1614           0 : }
    1615             : 
    1616             : #else
    1617             : 
    1618             : static int
    1619             : period_poller_interrupt_init(struct spdk_poller *poller)
    1620             : {
    1621             :         return -ENOTSUP;
    1622             : }
    1623             : 
    1624             : static void
    1625             : period_poller_set_interrupt_mode(struct spdk_poller *poller, void *cb_arg, bool interrupt_mode)
    1626             : {
    1627             : }
    1628             : 
    1629             : static void
    1630             : poller_interrupt_fini(struct spdk_poller *poller)
    1631             : {
    1632             : }
    1633             : 
    1634             : static int
    1635             : busy_poller_interrupt_init(struct spdk_poller *poller)
    1636             : {
    1637             :         return -ENOTSUP;
    1638             : }
    1639             : 
    1640             : static void
    1641             : busy_poller_set_interrupt_mode(struct spdk_poller *poller, void *cb_arg, bool interrupt_mode)
    1642             : {
    1643             : }
    1644             : 
    1645             : #endif
    1646             : 
    1647             : void
    1648           7 : spdk_poller_register_interrupt(struct spdk_poller *poller,
    1649             :                                spdk_poller_set_interrupt_mode_cb cb_fn,
    1650             :                                void *cb_arg)
    1651             : {
    1652           7 :         assert(poller != NULL);
    1653           7 :         assert(spdk_get_thread() == poller->thread);
    1654             : 
    1655           7 :         if (!spdk_interrupt_mode_is_enabled()) {
    1656           7 :                 return;
    1657             :         }
    1658             : 
    1659             :         /* If this poller already had an interrupt, clean the old one up. */
    1660           0 :         if (poller->intr != NULL) {
    1661           0 :                 poller_interrupt_fini(poller);
    1662             :         }
    1663             : 
    1664           0 :         poller->set_intr_cb_fn = cb_fn;
    1665           0 :         poller->set_intr_cb_arg = cb_arg;
    1666             : 
    1667             :         /* Set poller into interrupt mode if thread is in interrupt. */
    1668           0 :         if (poller->thread->in_interrupt && poller->set_intr_cb_fn) {
    1669           0 :                 poller->set_intr_cb_fn(poller, poller->set_intr_cb_arg, true);
    1670             :         }
    1671             : }
    1672             : 
    1673             : static uint64_t
    1674         875 : convert_us_to_ticks(uint64_t us)
    1675             : {
    1676             :         uint64_t quotient, remainder, ticks;
    1677             : 
    1678         875 :         if (us) {
    1679         353 :                 quotient = us / SPDK_SEC_TO_USEC;
    1680         353 :                 remainder = us % SPDK_SEC_TO_USEC;
    1681         353 :                 ticks = spdk_get_ticks_hz();
    1682             : 
    1683         353 :                 return ticks * quotient + (ticks * remainder) / SPDK_SEC_TO_USEC;
    1684             :         } else {
    1685         522 :                 return 0;
    1686             :         }
    1687             : }
    1688             : 
    1689             : static struct spdk_poller *
    1690         875 : poller_register(spdk_poller_fn fn,
    1691             :                 void *arg,
    1692             :                 uint64_t period_microseconds,
    1693             :                 const char *name)
    1694             : {
    1695             :         struct spdk_thread *thread;
    1696             :         struct spdk_poller *poller;
    1697             : 
    1698         875 :         thread = spdk_get_thread();
    1699         875 :         if (!thread) {
    1700           0 :                 assert(false);
    1701             :                 return NULL;
    1702             :         }
    1703             : 
    1704         875 :         if (spdk_unlikely(thread->state == SPDK_THREAD_STATE_EXITED)) {
    1705           0 :                 SPDK_ERRLOG("thread %s is marked as exited\n", thread->name);
    1706           0 :                 return NULL;
    1707             :         }
    1708             : 
    1709         875 :         poller = calloc(1, sizeof(*poller));
    1710         875 :         if (poller == NULL) {
    1711           0 :                 SPDK_ERRLOG("Poller memory allocation failed\n");
    1712           0 :                 return NULL;
    1713             :         }
    1714             : 
    1715         875 :         if (name) {
    1716         814 :                 snprintf(poller->name, sizeof(poller->name), "%s", name);
    1717             :         } else {
    1718          61 :                 snprintf(poller->name, sizeof(poller->name), "%p", fn);
    1719             :         }
    1720             : 
    1721         875 :         poller->state = SPDK_POLLER_STATE_WAITING;
    1722         875 :         poller->fn = fn;
    1723         875 :         poller->arg = arg;
    1724         875 :         poller->thread = thread;
    1725         875 :         poller->intr = NULL;
    1726         875 :         if (thread->next_poller_id == 0) {
    1727           0 :                 SPDK_WARNLOG("Poller ID rolled over. Poller ID is duplicated.\n");
    1728           0 :                 thread->next_poller_id = 1;
    1729             :         }
    1730         875 :         poller->id = thread->next_poller_id++;
    1731             : 
    1732         875 :         poller->period_ticks = convert_us_to_ticks(period_microseconds);
    1733             : 
    1734         875 :         if (spdk_interrupt_mode_is_enabled()) {
    1735             :                 int rc;
    1736             : 
    1737           0 :                 if (period_microseconds) {
    1738           0 :                         rc = period_poller_interrupt_init(poller);
    1739           0 :                         if (rc < 0) {
    1740           0 :                                 SPDK_ERRLOG("Failed to register interruptfd for periodic poller: %s\n", spdk_strerror(-rc));
    1741           0 :                                 free(poller);
    1742           0 :                                 return NULL;
    1743             :                         }
    1744             : 
    1745           0 :                         poller->set_intr_cb_fn = period_poller_set_interrupt_mode;
    1746           0 :                         poller->set_intr_cb_arg = NULL;
    1747             : 
    1748             :                 } else {
    1749             :                         /* If the poller doesn't have a period, create interruptfd that's always
    1750             :                          * busy automatically when running in interrupt mode.
    1751             :                          */
    1752           0 :                         rc = busy_poller_interrupt_init(poller);
    1753           0 :                         if (rc > 0) {
    1754           0 :                                 SPDK_ERRLOG("Failed to register interruptfd for busy poller: %s\n", spdk_strerror(-rc));
    1755           0 :                                 free(poller);
    1756           0 :                                 return NULL;
    1757             :                         }
    1758             : 
    1759           0 :                         poller->set_intr_cb_fn = busy_poller_set_interrupt_mode;
    1760           0 :                         poller->set_intr_cb_arg = NULL;
    1761             :                 }
    1762             : 
    1763             :                 /* Set poller into interrupt mode if thread is in interrupt. */
    1764           0 :                 if (poller->thread->in_interrupt) {
    1765           0 :                         poller->set_intr_cb_fn(poller, poller->set_intr_cb_arg, true);
    1766             :                 }
    1767             :         }
    1768             : 
    1769         875 :         thread_insert_poller(thread, poller);
    1770             : 
    1771         875 :         return poller;
    1772             : }
    1773             : 
    1774             : struct spdk_poller *
    1775          61 : spdk_poller_register(spdk_poller_fn fn,
    1776             :                      void *arg,
    1777             :                      uint64_t period_microseconds)
    1778             : {
    1779          61 :         return poller_register(fn, arg, period_microseconds, NULL);
    1780             : }
    1781             : 
    1782             : struct spdk_poller *
    1783         814 : spdk_poller_register_named(spdk_poller_fn fn,
    1784             :                            void *arg,
    1785             :                            uint64_t period_microseconds,
    1786             :                            const char *name)
    1787             : {
    1788         814 :         return poller_register(fn, arg, period_microseconds, name);
    1789             : }
    1790             : 
    1791             : static void
    1792           0 : wrong_thread(const char *func, const char *name, struct spdk_thread *thread,
    1793             :              struct spdk_thread *curthread)
    1794             : {
    1795           0 :         if (thread == NULL) {
    1796           0 :                 SPDK_ERRLOG("%s(%s) called with NULL thread\n", func, name);
    1797           0 :                 abort();
    1798             :         }
    1799           0 :         SPDK_ERRLOG("%s(%s) called from wrong thread %s:%" PRIu64 " (should be "
    1800             :                     "%s:%" PRIu64 ")\n", func, name, curthread->name, curthread->id,
    1801             :                     thread->name, thread->id);
    1802           0 :         assert(false);
    1803             : }
    1804             : 
    1805             : void
    1806        1322 : spdk_poller_unregister(struct spdk_poller **ppoller)
    1807             : {
    1808             :         struct spdk_thread *thread;
    1809             :         struct spdk_poller *poller;
    1810             : 
    1811        1322 :         poller = *ppoller;
    1812        1322 :         if (poller == NULL) {
    1813         447 :                 return;
    1814             :         }
    1815             : 
    1816         875 :         *ppoller = NULL;
    1817             : 
    1818         875 :         thread = spdk_get_thread();
    1819         875 :         if (!thread) {
    1820           0 :                 assert(false);
    1821             :                 return;
    1822             :         }
    1823             : 
    1824         875 :         if (poller->thread != thread) {
    1825           0 :                 wrong_thread(__func__, poller->name, poller->thread, thread);
    1826           0 :                 return;
    1827             :         }
    1828             : 
    1829         875 :         if (spdk_interrupt_mode_is_enabled()) {
    1830             :                 /* Release the interrupt resource for period or busy poller */
    1831           0 :                 if (poller->intr != NULL) {
    1832           0 :                         poller_interrupt_fini(poller);
    1833             :                 }
    1834             : 
    1835             :                 /* If there is not already a pending poller removal, generate
    1836             :                  * a message to go process removals. */
    1837           0 :                 if (!thread->poller_unregistered) {
    1838           0 :                         thread->poller_unregistered = true;
    1839           0 :                         spdk_thread_send_msg(thread, _thread_remove_pollers, thread);
    1840             :                 }
    1841             :         }
    1842             : 
    1843             :         /* If the poller was paused, put it on the active_pollers list so that
    1844             :          * its unregistration can be processed by spdk_thread_poll().
    1845             :          */
    1846         875 :         if (poller->state == SPDK_POLLER_STATE_PAUSED) {
    1847           9 :                 TAILQ_REMOVE(&thread->paused_pollers, poller, tailq);
    1848           9 :                 TAILQ_INSERT_TAIL(&thread->active_pollers, poller, tailq);
    1849           9 :                 poller->period_ticks = 0;
    1850             :         }
    1851             : 
    1852             :         /* Simply set the state to unregistered. The poller will get cleaned up
    1853             :          * in a subsequent call to spdk_thread_poll().
    1854             :          */
    1855         875 :         poller->state = SPDK_POLLER_STATE_UNREGISTERED;
    1856             : }
    1857             : 
    1858             : void
    1859          41 : spdk_poller_pause(struct spdk_poller *poller)
    1860             : {
    1861             :         struct spdk_thread *thread;
    1862             : 
    1863          41 :         thread = spdk_get_thread();
    1864          41 :         if (!thread) {
    1865           0 :                 assert(false);
    1866             :                 return;
    1867             :         }
    1868             : 
    1869          41 :         if (poller->thread != thread) {
    1870           0 :                 wrong_thread(__func__, poller->name, poller->thread, thread);
    1871           0 :                 return;
    1872             :         }
    1873             : 
    1874             :         /* We just set its state to SPDK_POLLER_STATE_PAUSING and let
    1875             :          * spdk_thread_poll() move it. It allows a poller to be paused from
    1876             :          * another one's context without breaking the TAILQ_FOREACH_REVERSE_SAFE
    1877             :          * iteration, or from within itself without breaking the logic to always
    1878             :          * remove the closest timed poller in the TAILQ_FOREACH_SAFE iteration.
    1879             :          */
    1880          41 :         switch (poller->state) {
    1881           2 :         case SPDK_POLLER_STATE_PAUSED:
    1882             :         case SPDK_POLLER_STATE_PAUSING:
    1883           2 :                 break;
    1884          39 :         case SPDK_POLLER_STATE_RUNNING:
    1885             :         case SPDK_POLLER_STATE_WAITING:
    1886          39 :                 poller->state = SPDK_POLLER_STATE_PAUSING;
    1887          39 :                 break;
    1888           0 :         default:
    1889           0 :                 assert(false);
    1890             :                 break;
    1891             :         }
    1892             : }
    1893             : 
    1894             : void
    1895          40 : spdk_poller_resume(struct spdk_poller *poller)
    1896             : {
    1897             :         struct spdk_thread *thread;
    1898             : 
    1899          40 :         thread = spdk_get_thread();
    1900          40 :         if (!thread) {
    1901           0 :                 assert(false);
    1902             :                 return;
    1903             :         }
    1904             : 
    1905          40 :         if (poller->thread != thread) {
    1906           0 :                 wrong_thread(__func__, poller->name, poller->thread, thread);
    1907           0 :                 return;
    1908             :         }
    1909             : 
    1910             :         /* If a poller is paused it has to be removed from the paused pollers
    1911             :          * list and put on the active list or timer tree depending on its
    1912             :          * period_ticks.  If a poller is still in the process of being paused,
    1913             :          * we just need to flip its state back to waiting, as it's already on
    1914             :          * the appropriate list or tree.
    1915             :          */
    1916          40 :         switch (poller->state) {
    1917          20 :         case SPDK_POLLER_STATE_PAUSED:
    1918          20 :                 TAILQ_REMOVE(&thread->paused_pollers, poller, tailq);
    1919          20 :                 thread_insert_poller(thread, poller);
    1920             :         /* fallthrough */
    1921          29 :         case SPDK_POLLER_STATE_PAUSING:
    1922          29 :                 poller->state = SPDK_POLLER_STATE_WAITING;
    1923          29 :                 break;
    1924          11 :         case SPDK_POLLER_STATE_RUNNING:
    1925             :         case SPDK_POLLER_STATE_WAITING:
    1926          11 :                 break;
    1927           0 :         default:
    1928           0 :                 assert(false);
    1929             :                 break;
    1930             :         }
    1931             : }
    1932             : 
    1933             : const char *
    1934           2 : spdk_poller_get_name(struct spdk_poller *poller)
    1935             : {
    1936           2 :         return poller->name;
    1937             : }
    1938             : 
    1939             : uint64_t
    1940           3 : spdk_poller_get_id(struct spdk_poller *poller)
    1941             : {
    1942           3 :         return poller->id;
    1943             : }
    1944             : 
    1945             : const char *
    1946           9 : spdk_poller_get_state_str(struct spdk_poller *poller)
    1947             : {
    1948           9 :         switch (poller->state) {
    1949           4 :         case SPDK_POLLER_STATE_WAITING:
    1950           4 :                 return "waiting";
    1951           2 :         case SPDK_POLLER_STATE_RUNNING:
    1952           2 :                 return "running";
    1953           0 :         case SPDK_POLLER_STATE_UNREGISTERED:
    1954           0 :                 return "unregistered";
    1955           1 :         case SPDK_POLLER_STATE_PAUSING:
    1956           1 :                 return "pausing";
    1957           2 :         case SPDK_POLLER_STATE_PAUSED:
    1958           2 :                 return "paused";
    1959           0 :         default:
    1960           0 :                 return NULL;
    1961             :         }
    1962             : }
    1963             : 
    1964             : uint64_t
    1965           2 : spdk_poller_get_period_ticks(struct spdk_poller *poller)
    1966             : {
    1967           2 :         return poller->period_ticks;
    1968             : }
    1969             : 
    1970             : void
    1971           2 : spdk_poller_get_stats(struct spdk_poller *poller, struct spdk_poller_stats *stats)
    1972             : {
    1973           2 :         stats->run_count = poller->run_count;
    1974           2 :         stats->busy_count = poller->busy_count;
    1975           2 : }
    1976             : 
    1977             : struct spdk_poller *
    1978           0 : spdk_thread_get_first_active_poller(struct spdk_thread *thread)
    1979             : {
    1980           0 :         return TAILQ_FIRST(&thread->active_pollers);
    1981             : }
    1982             : 
    1983             : struct spdk_poller *
    1984           0 : spdk_thread_get_next_active_poller(struct spdk_poller *prev)
    1985             : {
    1986           0 :         return TAILQ_NEXT(prev, tailq);
    1987             : }
    1988             : 
    1989             : struct spdk_poller *
    1990           0 : spdk_thread_get_first_timed_poller(struct spdk_thread *thread)
    1991             : {
    1992           0 :         return RB_MIN(timed_pollers_tree, &thread->timed_pollers);
    1993             : }
    1994             : 
    1995             : struct spdk_poller *
    1996           0 : spdk_thread_get_next_timed_poller(struct spdk_poller *prev)
    1997             : {
    1998           0 :         return RB_NEXT(timed_pollers_tree, &thread->timed_pollers, prev);
    1999             : }
    2000             : 
    2001             : struct spdk_poller *
    2002           0 : spdk_thread_get_first_paused_poller(struct spdk_thread *thread)
    2003             : {
    2004           0 :         return TAILQ_FIRST(&thread->paused_pollers);
    2005             : }
    2006             : 
    2007             : struct spdk_poller *
    2008           0 : spdk_thread_get_next_paused_poller(struct spdk_poller *prev)
    2009             : {
    2010           0 :         return TAILQ_NEXT(prev, tailq);
    2011             : }
    2012             : 
    2013             : struct spdk_io_channel *
    2014           0 : spdk_thread_get_first_io_channel(struct spdk_thread *thread)
    2015             : {
    2016           0 :         return RB_MIN(io_channel_tree, &thread->io_channels);
    2017             : }
    2018             : 
    2019             : struct spdk_io_channel *
    2020           0 : spdk_thread_get_next_io_channel(struct spdk_io_channel *prev)
    2021             : {
    2022           0 :         return RB_NEXT(io_channel_tree, &thread->io_channels, prev);
    2023             : }
    2024             : 
    2025             : uint16_t
    2026           0 : spdk_thread_get_trace_id(struct spdk_thread *thread)
    2027             : {
    2028           0 :         return thread->trace_id;
    2029             : }
    2030             : 
    2031             : struct call_thread {
    2032             :         struct spdk_thread *cur_thread;
    2033             :         spdk_msg_fn fn;
    2034             :         void *ctx;
    2035             : 
    2036             :         struct spdk_thread *orig_thread;
    2037             :         spdk_msg_fn cpl;
    2038             : };
    2039             : 
    2040             : static void
    2041           2 : _back_to_orig_thread(void *ctx)
    2042             : {
    2043           2 :         struct call_thread *ct = ctx;
    2044             : 
    2045           2 :         assert(ct->orig_thread->for_each_count > 0);
    2046           2 :         ct->orig_thread->for_each_count--;
    2047             : 
    2048           2 :         ct->cpl(ct->ctx);
    2049           2 :         free(ctx);
    2050           2 : }
    2051             : 
    2052             : static void
    2053           6 : _on_thread(void *ctx)
    2054             : {
    2055           6 :         struct call_thread *ct = ctx;
    2056             :         int rc __attribute__((unused));
    2057             : 
    2058           6 :         ct->fn(ct->ctx);
    2059             : 
    2060           6 :         pthread_mutex_lock(&g_devlist_mutex);
    2061           6 :         ct->cur_thread = TAILQ_NEXT(ct->cur_thread, tailq);
    2062           6 :         while (ct->cur_thread && ct->cur_thread->state != SPDK_THREAD_STATE_RUNNING) {
    2063           0 :                 SPDK_DEBUGLOG(thread, "thread %s is not running but still not destroyed.\n",
    2064             :                               ct->cur_thread->name);
    2065           0 :                 ct->cur_thread = TAILQ_NEXT(ct->cur_thread, tailq);
    2066             :         }
    2067           6 :         pthread_mutex_unlock(&g_devlist_mutex);
    2068             : 
    2069           6 :         if (!ct->cur_thread) {
    2070           2 :                 SPDK_DEBUGLOG(thread, "Completed thread iteration\n");
    2071             : 
    2072           2 :                 rc = spdk_thread_send_msg(ct->orig_thread, _back_to_orig_thread, ctx);
    2073             :         } else {
    2074           4 :                 SPDK_DEBUGLOG(thread, "Continuing thread iteration to %s\n",
    2075             :                               ct->cur_thread->name);
    2076             : 
    2077           4 :                 rc = spdk_thread_send_msg(ct->cur_thread, _on_thread, ctx);
    2078             :         }
    2079           6 :         assert(rc == 0);
    2080           6 : }
    2081             : 
    2082             : void
    2083           2 : spdk_for_each_thread(spdk_msg_fn fn, void *ctx, spdk_msg_fn cpl)
    2084             : {
    2085             :         struct call_thread *ct;
    2086             :         struct spdk_thread *thread;
    2087             :         int rc __attribute__((unused));
    2088             : 
    2089           2 :         ct = calloc(1, sizeof(*ct));
    2090           2 :         if (!ct) {
    2091           0 :                 SPDK_ERRLOG("Unable to perform thread iteration\n");
    2092           0 :                 cpl(ctx);
    2093           0 :                 return;
    2094             :         }
    2095             : 
    2096           2 :         ct->fn = fn;
    2097           2 :         ct->ctx = ctx;
    2098           2 :         ct->cpl = cpl;
    2099             : 
    2100           2 :         thread = _get_thread();
    2101           2 :         if (!thread) {
    2102           0 :                 SPDK_ERRLOG("No thread allocated\n");
    2103           0 :                 free(ct);
    2104           0 :                 cpl(ctx);
    2105           0 :                 return;
    2106             :         }
    2107           2 :         ct->orig_thread = thread;
    2108             : 
    2109           2 :         ct->orig_thread->for_each_count++;
    2110             : 
    2111           2 :         pthread_mutex_lock(&g_devlist_mutex);
    2112           2 :         ct->cur_thread = TAILQ_FIRST(&g_threads);
    2113           2 :         pthread_mutex_unlock(&g_devlist_mutex);
    2114             : 
    2115           2 :         SPDK_DEBUGLOG(thread, "Starting thread iteration from %s\n",
    2116             :                       ct->orig_thread->name);
    2117             : 
    2118           2 :         rc = spdk_thread_send_msg(ct->cur_thread, _on_thread, ct);
    2119           2 :         assert(rc == 0);
    2120             : }
    2121             : 
    2122             : static inline void
    2123           0 : poller_set_interrupt_mode(struct spdk_poller *poller, bool interrupt_mode)
    2124             : {
    2125           0 :         if (poller->state == SPDK_POLLER_STATE_UNREGISTERED) {
    2126           0 :                 return;
    2127             :         }
    2128             : 
    2129           0 :         if (poller->set_intr_cb_fn) {
    2130           0 :                 poller->set_intr_cb_fn(poller, poller->set_intr_cb_arg, interrupt_mode);
    2131             :         }
    2132             : }
    2133             : 
    2134             : void
    2135           0 : spdk_thread_set_interrupt_mode(bool enable_interrupt)
    2136             : {
    2137           0 :         struct spdk_thread *thread = _get_thread();
    2138             :         struct spdk_poller *poller, *tmp;
    2139             : 
    2140           0 :         assert(thread);
    2141           0 :         assert(spdk_interrupt_mode_is_enabled());
    2142             : 
    2143           0 :         SPDK_NOTICELOG("Set spdk_thread (%s) to %s mode from %s mode.\n",
    2144             :                        thread->name,  enable_interrupt ? "intr" : "poll",
    2145             :                        thread->in_interrupt ? "intr" : "poll");
    2146             : 
    2147           0 :         if (thread->in_interrupt == enable_interrupt) {
    2148           0 :                 return;
    2149             :         }
    2150             : 
    2151             :         /* Set pollers to expected mode */
    2152           0 :         RB_FOREACH_SAFE(poller, timed_pollers_tree, &thread->timed_pollers, tmp) {
    2153           0 :                 poller_set_interrupt_mode(poller, enable_interrupt);
    2154             :         }
    2155           0 :         TAILQ_FOREACH_SAFE(poller, &thread->active_pollers, tailq, tmp) {
    2156           0 :                 poller_set_interrupt_mode(poller, enable_interrupt);
    2157             :         }
    2158             :         /* All paused pollers will go to work in interrupt mode */
    2159           0 :         TAILQ_FOREACH_SAFE(poller, &thread->paused_pollers, tailq, tmp) {
    2160           0 :                 poller_set_interrupt_mode(poller, enable_interrupt);
    2161             :         }
    2162             : 
    2163           0 :         thread->in_interrupt = enable_interrupt;
    2164           0 :         return;
    2165             : }
    2166             : 
    2167             : static struct io_device *
    2168        8996 : io_device_get(void *io_device)
    2169             : {
    2170        8996 :         struct io_device find = {};
    2171             : 
    2172        8996 :         find.io_device = io_device;
    2173        8996 :         return RB_FIND(io_device_tree, &g_io_devices, &find);
    2174             : }
    2175             : 
    2176             : void
    2177        2613 : spdk_io_device_register(void *io_device, spdk_io_channel_create_cb create_cb,
    2178             :                         spdk_io_channel_destroy_cb destroy_cb, uint32_t ctx_size,
    2179             :                         const char *name)
    2180             : {
    2181             :         struct io_device *dev, *tmp;
    2182             :         struct spdk_thread *thread;
    2183             : 
    2184        2613 :         assert(io_device != NULL);
    2185        2613 :         assert(create_cb != NULL);
    2186        2613 :         assert(destroy_cb != NULL);
    2187             : 
    2188        2613 :         thread = spdk_get_thread();
    2189        2613 :         if (!thread) {
    2190           0 :                 SPDK_ERRLOG("called from non-SPDK thread\n");
    2191           0 :                 assert(false);
    2192             :                 return;
    2193             :         }
    2194             : 
    2195        2613 :         dev = calloc(1, sizeof(struct io_device));
    2196        2613 :         if (dev == NULL) {
    2197           0 :                 SPDK_ERRLOG("could not allocate io_device\n");
    2198           0 :                 return;
    2199             :         }
    2200             : 
    2201        2613 :         dev->io_device = io_device;
    2202        2613 :         if (name) {
    2203        1701 :                 snprintf(dev->name, sizeof(dev->name), "%s", name);
    2204             :         } else {
    2205         912 :                 snprintf(dev->name, sizeof(dev->name), "%p", dev);
    2206             :         }
    2207        2613 :         dev->create_cb = create_cb;
    2208        2613 :         dev->destroy_cb = destroy_cb;
    2209        2613 :         dev->unregister_cb = NULL;
    2210        2613 :         dev->ctx_size = ctx_size;
    2211        2613 :         dev->for_each_count = 0;
    2212        2613 :         dev->unregistered = false;
    2213        2613 :         dev->refcnt = 0;
    2214             : 
    2215        2613 :         SPDK_DEBUGLOG(thread, "Registering io_device %s (%p) on thread %s\n",
    2216             :                       dev->name, dev->io_device, thread->name);
    2217             : 
    2218        2613 :         pthread_mutex_lock(&g_devlist_mutex);
    2219        2613 :         tmp = RB_INSERT(io_device_tree, &g_io_devices, dev);
    2220        2613 :         if (tmp != NULL) {
    2221           2 :                 SPDK_ERRLOG("io_device %p already registered (old:%s new:%s)\n",
    2222             :                             io_device, tmp->name, dev->name);
    2223           2 :                 free(dev);
    2224             :         }
    2225             : 
    2226        2613 :         pthread_mutex_unlock(&g_devlist_mutex);
    2227             : }
    2228             : 
    2229             : static void
    2230        2417 : _finish_unregister(void *arg)
    2231             : {
    2232        2417 :         struct io_device *dev = arg;
    2233             :         struct spdk_thread *thread;
    2234             : 
    2235        2417 :         thread = spdk_get_thread();
    2236        2417 :         assert(thread == dev->unregister_thread);
    2237             : 
    2238        2417 :         SPDK_DEBUGLOG(thread, "Finishing unregistration of io_device %s (%p) on thread %s\n",
    2239             :                       dev->name, dev->io_device, thread->name);
    2240             : 
    2241        2417 :         assert(thread->pending_unregister_count > 0);
    2242        2417 :         thread->pending_unregister_count--;
    2243             : 
    2244        2417 :         dev->unregister_cb(dev->io_device);
    2245        2417 :         free(dev);
    2246        2417 : }
    2247             : 
    2248             : static void
    2249        2610 : io_device_free(struct io_device *dev)
    2250             : {
    2251             :         int rc __attribute__((unused));
    2252             : 
    2253        2610 :         if (dev->unregister_cb == NULL) {
    2254         193 :                 free(dev);
    2255             :         } else {
    2256        2417 :                 assert(dev->unregister_thread != NULL);
    2257        2417 :                 SPDK_DEBUGLOG(thread, "io_device %s (%p) needs to unregister from thread %s\n",
    2258             :                               dev->name, dev->io_device, dev->unregister_thread->name);
    2259        2417 :                 rc = spdk_thread_send_msg(dev->unregister_thread, _finish_unregister, dev);
    2260        2417 :                 assert(rc == 0);
    2261             :         }
    2262        2610 : }
    2263             : 
    2264             : void
    2265        2612 : spdk_io_device_unregister(void *io_device, spdk_io_device_unregister_cb unregister_cb)
    2266             : {
    2267             :         struct io_device *dev;
    2268             :         uint32_t refcnt;
    2269             :         struct spdk_thread *thread;
    2270             : 
    2271        2612 :         thread = spdk_get_thread();
    2272        2612 :         if (!thread) {
    2273           0 :                 SPDK_ERRLOG("called from non-SPDK thread\n");
    2274           0 :                 assert(false);
    2275             :                 return;
    2276             :         }
    2277             : 
    2278        2612 :         pthread_mutex_lock(&g_devlist_mutex);
    2279        2612 :         dev = io_device_get(io_device);
    2280        2612 :         if (!dev) {
    2281           0 :                 SPDK_ERRLOG("io_device %p not found\n", io_device);
    2282           0 :                 assert(false);
    2283             :                 pthread_mutex_unlock(&g_devlist_mutex);
    2284             :                 return;
    2285             :         }
    2286             : 
    2287             :         /* The for_each_count check differentiates the user attempting to unregister the
    2288             :          * device a second time, from the internal call to this function that occurs
    2289             :          * after the for_each_count reaches 0.
    2290             :          */
    2291        2612 :         if (dev->pending_unregister && dev->for_each_count > 0) {
    2292           0 :                 SPDK_ERRLOG("io_device %p already has a pending unregister\n", io_device);
    2293           0 :                 assert(false);
    2294             :                 pthread_mutex_unlock(&g_devlist_mutex);
    2295             :                 return;
    2296             :         }
    2297             : 
    2298        2612 :         dev->unregister_cb = unregister_cb;
    2299        2612 :         dev->unregister_thread = thread;
    2300             : 
    2301        2612 :         if (dev->for_each_count > 0) {
    2302           1 :                 SPDK_WARNLOG("io_device %s (%p) has %u for_each calls outstanding\n",
    2303             :                              dev->name, io_device, dev->for_each_count);
    2304           1 :                 dev->pending_unregister = true;
    2305           1 :                 pthread_mutex_unlock(&g_devlist_mutex);
    2306           1 :                 return;
    2307             :         }
    2308             : 
    2309        2611 :         dev->unregistered = true;
    2310        2611 :         RB_REMOVE(io_device_tree, &g_io_devices, dev);
    2311        2611 :         refcnt = dev->refcnt;
    2312        2611 :         pthread_mutex_unlock(&g_devlist_mutex);
    2313             : 
    2314        2611 :         SPDK_DEBUGLOG(thread, "Unregistering io_device %s (%p) from thread %s\n",
    2315             :                       dev->name, dev->io_device, thread->name);
    2316             : 
    2317        2611 :         if (unregister_cb) {
    2318        2417 :                 thread->pending_unregister_count++;
    2319             :         }
    2320             : 
    2321        2611 :         if (refcnt > 0) {
    2322             :                 /* defer deletion */
    2323        1009 :                 return;
    2324             :         }
    2325             : 
    2326        1602 :         io_device_free(dev);
    2327             : }
    2328             : 
    2329             : const char *
    2330           0 : spdk_io_device_get_name(struct io_device *dev)
    2331             : {
    2332           0 :         return dev->name;
    2333             : }
    2334             : 
    2335             : static struct spdk_io_channel *
    2336       10424 : thread_get_io_channel(struct spdk_thread *thread, struct io_device *dev)
    2337             : {
    2338       10424 :         struct spdk_io_channel find = {};
    2339             : 
    2340       10424 :         find.dev = dev;
    2341       10424 :         return RB_FIND(io_channel_tree, &thread->io_channels, &find);
    2342             : }
    2343             : 
    2344             : struct spdk_io_channel *
    2345        4528 : spdk_get_io_channel(void *io_device)
    2346             : {
    2347             :         struct spdk_io_channel *ch;
    2348             :         struct spdk_thread *thread;
    2349             :         struct io_device *dev;
    2350             :         int rc;
    2351             : 
    2352        4528 :         pthread_mutex_lock(&g_devlist_mutex);
    2353        4528 :         dev = io_device_get(io_device);
    2354        4528 :         if (dev == NULL) {
    2355           1 :                 SPDK_ERRLOG("could not find io_device %p\n", io_device);
    2356           1 :                 pthread_mutex_unlock(&g_devlist_mutex);
    2357           1 :                 return NULL;
    2358             :         }
    2359             : 
    2360        4527 :         thread = _get_thread();
    2361        4527 :         if (!thread) {
    2362           0 :                 SPDK_ERRLOG("No thread allocated\n");
    2363           0 :                 pthread_mutex_unlock(&g_devlist_mutex);
    2364           0 :                 return NULL;
    2365             :         }
    2366             : 
    2367        4527 :         if (spdk_unlikely(thread->state == SPDK_THREAD_STATE_EXITED)) {
    2368           0 :                 SPDK_ERRLOG("Thread %s is marked as exited\n", thread->name);
    2369           0 :                 pthread_mutex_unlock(&g_devlist_mutex);
    2370           0 :                 return NULL;
    2371             :         }
    2372             : 
    2373        4527 :         ch = thread_get_io_channel(thread, dev);
    2374        4527 :         if (ch != NULL) {
    2375        1305 :                 ch->ref++;
    2376             : 
    2377        1305 :                 SPDK_DEBUGLOG(thread, "Get io_channel %p for io_device %s (%p) on thread %s refcnt %u\n",
    2378             :                               ch, dev->name, dev->io_device, thread->name, ch->ref);
    2379             : 
    2380             :                 /*
    2381             :                  * An I/O channel already exists for this device on this
    2382             :                  *  thread, so return it.
    2383             :                  */
    2384        1305 :                 pthread_mutex_unlock(&g_devlist_mutex);
    2385        1305 :                 spdk_trace_record(TRACE_THREAD_IOCH_GET, 0, 0,
    2386             :                                   (uint64_t)spdk_io_channel_get_ctx(ch), ch->ref);
    2387        1305 :                 return ch;
    2388             :         }
    2389             : 
    2390        3222 :         ch = calloc(1, sizeof(*ch) + dev->ctx_size);
    2391        3222 :         if (ch == NULL) {
    2392           0 :                 SPDK_ERRLOG("could not calloc spdk_io_channel\n");
    2393           0 :                 pthread_mutex_unlock(&g_devlist_mutex);
    2394           0 :                 return NULL;
    2395             :         }
    2396             : 
    2397        3222 :         ch->dev = dev;
    2398        3222 :         ch->destroy_cb = dev->destroy_cb;
    2399        3222 :         ch->thread = thread;
    2400        3222 :         ch->ref = 1;
    2401        3222 :         ch->destroy_ref = 0;
    2402        3222 :         RB_INSERT(io_channel_tree, &thread->io_channels, ch);
    2403             : 
    2404        3222 :         SPDK_DEBUGLOG(thread, "Get io_channel %p for io_device %s (%p) on thread %s refcnt %u\n",
    2405             :                       ch, dev->name, dev->io_device, thread->name, ch->ref);
    2406             : 
    2407        3222 :         dev->refcnt++;
    2408             : 
    2409        3222 :         pthread_mutex_unlock(&g_devlist_mutex);
    2410             : 
    2411        3222 :         rc = dev->create_cb(io_device, (uint8_t *)ch + sizeof(*ch));
    2412        3222 :         if (rc != 0) {
    2413           3 :                 pthread_mutex_lock(&g_devlist_mutex);
    2414           3 :                 RB_REMOVE(io_channel_tree, &ch->thread->io_channels, ch);
    2415           3 :                 dev->refcnt--;
    2416           3 :                 free(ch);
    2417           3 :                 SPDK_ERRLOG("could not create io_channel for io_device %s (%p): %s (rc=%d)\n",
    2418             :                             dev->name, io_device, spdk_strerror(-rc), rc);
    2419           3 :                 pthread_mutex_unlock(&g_devlist_mutex);
    2420           3 :                 return NULL;
    2421             :         }
    2422             : 
    2423        3219 :         spdk_trace_record(TRACE_THREAD_IOCH_GET, 0, 0, (uint64_t)spdk_io_channel_get_ctx(ch), 1);
    2424        3219 :         return ch;
    2425             : }
    2426             : 
    2427             : static void
    2428        3221 : put_io_channel(void *arg)
    2429             : {
    2430        3221 :         struct spdk_io_channel *ch = arg;
    2431        3221 :         bool do_remove_dev = true;
    2432             :         struct spdk_thread *thread;
    2433             : 
    2434        3221 :         thread = spdk_get_thread();
    2435        3221 :         if (!thread) {
    2436           0 :                 SPDK_ERRLOG("called from non-SPDK thread\n");
    2437           0 :                 assert(false);
    2438             :                 return;
    2439             :         }
    2440             : 
    2441        3221 :         SPDK_DEBUGLOG(thread,
    2442             :                       "Releasing io_channel %p for io_device %s (%p) on thread %s\n",
    2443             :                       ch, ch->dev->name, ch->dev->io_device, thread->name);
    2444             : 
    2445        3221 :         assert(ch->thread == thread);
    2446             : 
    2447        3221 :         ch->destroy_ref--;
    2448             : 
    2449        3221 :         if (ch->ref > 0 || ch->destroy_ref > 0) {
    2450             :                 /*
    2451             :                  * Another reference to the associated io_device was requested
    2452             :                  *  after this message was sent but before it had a chance to
    2453             :                  *  execute.
    2454             :                  */
    2455           4 :                 return;
    2456             :         }
    2457             : 
    2458        3217 :         pthread_mutex_lock(&g_devlist_mutex);
    2459        3217 :         RB_REMOVE(io_channel_tree, &ch->thread->io_channels, ch);
    2460        3217 :         pthread_mutex_unlock(&g_devlist_mutex);
    2461             : 
    2462             :         /* Don't hold the devlist mutex while the destroy_cb is called. */
    2463        3217 :         ch->destroy_cb(ch->dev->io_device, spdk_io_channel_get_ctx(ch));
    2464             : 
    2465        3217 :         pthread_mutex_lock(&g_devlist_mutex);
    2466        3217 :         ch->dev->refcnt--;
    2467             : 
    2468        3217 :         if (!ch->dev->unregistered) {
    2469        2202 :                 do_remove_dev = false;
    2470             :         }
    2471             : 
    2472        3217 :         if (ch->dev->refcnt > 0) {
    2473         146 :                 do_remove_dev = false;
    2474             :         }
    2475             : 
    2476        3217 :         pthread_mutex_unlock(&g_devlist_mutex);
    2477             : 
    2478        3217 :         if (do_remove_dev) {
    2479        1008 :                 io_device_free(ch->dev);
    2480             :         }
    2481        3217 :         free(ch);
    2482             : }
    2483             : 
    2484             : void
    2485        4523 : spdk_put_io_channel(struct spdk_io_channel *ch)
    2486             : {
    2487             :         struct spdk_thread *thread;
    2488             :         int rc __attribute__((unused));
    2489             : 
    2490        4523 :         spdk_trace_record(TRACE_THREAD_IOCH_PUT, 0, 0,
    2491             :                           (uint64_t)spdk_io_channel_get_ctx(ch), ch->ref);
    2492             : 
    2493        4523 :         thread = spdk_get_thread();
    2494        4523 :         if (!thread) {
    2495           0 :                 SPDK_ERRLOG("called from non-SPDK thread\n");
    2496           0 :                 assert(false);
    2497             :                 return;
    2498             :         }
    2499             : 
    2500        4523 :         if (ch->thread != thread) {
    2501           0 :                 wrong_thread(__func__, "ch", ch->thread, thread);
    2502           0 :                 return;
    2503             :         }
    2504             : 
    2505        4523 :         SPDK_DEBUGLOG(thread,
    2506             :                       "Putting io_channel %p for io_device %s (%p) on thread %s refcnt %u\n",
    2507             :                       ch, ch->dev->name, ch->dev->io_device, thread->name, ch->ref);
    2508             : 
    2509        4523 :         ch->ref--;
    2510             : 
    2511        4523 :         if (ch->ref == 0) {
    2512        3221 :                 ch->destroy_ref++;
    2513        3221 :                 rc = spdk_thread_send_msg(thread, put_io_channel, ch);
    2514        3221 :                 assert(rc == 0);
    2515             :         }
    2516             : }
    2517             : 
    2518             : struct spdk_io_channel *
    2519      656909 : spdk_io_channel_from_ctx(void *ctx)
    2520             : {
    2521      656909 :         return (struct spdk_io_channel *)((uint8_t *)ctx - sizeof(struct spdk_io_channel));
    2522             : }
    2523             : 
    2524             : struct spdk_thread *
    2525        2412 : spdk_io_channel_get_thread(struct spdk_io_channel *ch)
    2526             : {
    2527        2412 :         return ch->thread;
    2528             : }
    2529             : 
    2530             : void *
    2531      654010 : spdk_io_channel_get_io_device(struct spdk_io_channel *ch)
    2532             : {
    2533      654010 :         return ch->dev->io_device;
    2534             : }
    2535             : 
    2536             : const char *
    2537           0 : spdk_io_channel_get_io_device_name(struct spdk_io_channel *ch)
    2538             : {
    2539           0 :         return spdk_io_device_get_name(ch->dev);
    2540             : }
    2541             : 
    2542             : int
    2543           0 : spdk_io_channel_get_ref_count(struct spdk_io_channel *ch)
    2544             : {
    2545           0 :         return ch->ref;
    2546             : }
    2547             : 
    2548             : struct spdk_io_channel_iter {
    2549             :         void *io_device;
    2550             :         struct io_device *dev;
    2551             :         spdk_channel_msg fn;
    2552             :         int status;
    2553             :         void *ctx;
    2554             :         struct spdk_io_channel *ch;
    2555             : 
    2556             :         struct spdk_thread *cur_thread;
    2557             : 
    2558             :         struct spdk_thread *orig_thread;
    2559             :         spdk_channel_for_each_cpl cpl;
    2560             : };
    2561             : 
    2562             : void *
    2563         763 : spdk_io_channel_iter_get_io_device(struct spdk_io_channel_iter *i)
    2564             : {
    2565         763 :         return i->io_device;
    2566             : }
    2567             : 
    2568             : struct spdk_io_channel *
    2569        1219 : spdk_io_channel_iter_get_channel(struct spdk_io_channel_iter *i)
    2570             : {
    2571        1219 :         return i->ch;
    2572             : }
    2573             : 
    2574             : void *
    2575        3063 : spdk_io_channel_iter_get_ctx(struct spdk_io_channel_iter *i)
    2576             : {
    2577        3063 :         return i->ctx;
    2578             : }
    2579             : 
    2580             : static void
    2581        1853 : _call_completion(void *ctx)
    2582             : {
    2583        1853 :         struct spdk_io_channel_iter *i = ctx;
    2584             : 
    2585        1853 :         assert(i->orig_thread->for_each_count > 0);
    2586        1853 :         i->orig_thread->for_each_count--;
    2587             : 
    2588        1853 :         if (i->cpl != NULL) {
    2589        1853 :                 i->cpl(i, i->status);
    2590             :         }
    2591        1853 :         free(i);
    2592        1853 : }
    2593             : 
    2594             : static void
    2595        1915 : _call_channel(void *ctx)
    2596             : {
    2597        1915 :         struct spdk_io_channel_iter *i = ctx;
    2598             :         struct spdk_io_channel *ch;
    2599             : 
    2600             :         /*
    2601             :          * It is possible that the channel was deleted before this
    2602             :          *  message had a chance to execute.  If so, skip calling
    2603             :          *  the fn() on this thread.
    2604             :          */
    2605        1915 :         pthread_mutex_lock(&g_devlist_mutex);
    2606        1915 :         ch = thread_get_io_channel(i->cur_thread, i->dev);
    2607        1915 :         pthread_mutex_unlock(&g_devlist_mutex);
    2608             : 
    2609        1915 :         if (ch) {
    2610        1873 :                 i->fn(i);
    2611             :         } else {
    2612          42 :                 spdk_for_each_channel_continue(i, 0);
    2613             :         }
    2614        1915 : }
    2615             : 
    2616             : void
    2617        1853 : spdk_for_each_channel(void *io_device, spdk_channel_msg fn, void *ctx,
    2618             :                       spdk_channel_for_each_cpl cpl)
    2619             : {
    2620             :         struct spdk_thread *thread;
    2621             :         struct spdk_io_channel *ch;
    2622             :         struct spdk_io_channel_iter *i;
    2623             :         int rc __attribute__((unused));
    2624             : 
    2625        1853 :         i = calloc(1, sizeof(*i));
    2626        1853 :         if (!i) {
    2627           0 :                 SPDK_ERRLOG("Unable to allocate iterator\n");
    2628           0 :                 assert(false);
    2629             :                 return;
    2630             :         }
    2631             : 
    2632        1853 :         i->io_device = io_device;
    2633        1853 :         i->fn = fn;
    2634        1853 :         i->ctx = ctx;
    2635        1853 :         i->cpl = cpl;
    2636        1853 :         i->orig_thread = _get_thread();
    2637             : 
    2638        1853 :         i->orig_thread->for_each_count++;
    2639             : 
    2640        1853 :         pthread_mutex_lock(&g_devlist_mutex);
    2641        1853 :         i->dev = io_device_get(io_device);
    2642        1853 :         if (i->dev == NULL) {
    2643           0 :                 SPDK_ERRLOG("could not find io_device %p\n", io_device);
    2644           0 :                 assert(false);
    2645             :                 i->status = -ENODEV;
    2646             :                 goto end;
    2647             :         }
    2648             : 
    2649             :         /* Do not allow new for_each operations if we are already waiting to unregister
    2650             :          * the device for other for_each operations to complete.
    2651             :          */
    2652        1853 :         if (i->dev->pending_unregister) {
    2653           0 :                 SPDK_ERRLOG("io_device %p has a pending unregister\n", io_device);
    2654           0 :                 i->status = -ENODEV;
    2655           0 :                 goto end;
    2656             :         }
    2657             : 
    2658        2150 :         TAILQ_FOREACH(thread, &g_threads, tailq) {
    2659        2014 :                 ch = thread_get_io_channel(thread, i->dev);
    2660        2014 :                 if (ch != NULL) {
    2661        1717 :                         ch->dev->for_each_count++;
    2662        1717 :                         i->cur_thread = thread;
    2663        1717 :                         i->ch = ch;
    2664        1717 :                         pthread_mutex_unlock(&g_devlist_mutex);
    2665        1717 :                         rc = spdk_thread_send_msg(thread, _call_channel, i);
    2666        1717 :                         assert(rc == 0);
    2667        1717 :                         return;
    2668             :                 }
    2669             :         }
    2670             : 
    2671         136 : end:
    2672         136 :         pthread_mutex_unlock(&g_devlist_mutex);
    2673             : 
    2674         136 :         rc = spdk_thread_send_msg(i->orig_thread, _call_completion, i);
    2675         136 :         assert(rc == 0);
    2676             : }
    2677             : 
    2678             : static void
    2679           1 : __pending_unregister(void *arg)
    2680             : {
    2681           1 :         struct io_device *dev = arg;
    2682             : 
    2683           1 :         assert(dev->pending_unregister);
    2684           1 :         assert(dev->for_each_count == 0);
    2685           1 :         spdk_io_device_unregister(dev->io_device, dev->unregister_cb);
    2686           1 : }
    2687             : 
    2688             : void
    2689        1915 : spdk_for_each_channel_continue(struct spdk_io_channel_iter *i, int status)
    2690             : {
    2691             :         struct spdk_thread *thread;
    2692             :         struct spdk_io_channel *ch;
    2693             :         struct io_device *dev;
    2694             :         int rc __attribute__((unused));
    2695             : 
    2696        1915 :         assert(i->cur_thread == spdk_get_thread());
    2697             : 
    2698        1915 :         i->status = status;
    2699             : 
    2700        1915 :         pthread_mutex_lock(&g_devlist_mutex);
    2701        1915 :         dev = i->dev;
    2702        1915 :         if (status) {
    2703          11 :                 goto end;
    2704             :         }
    2705             : 
    2706        1904 :         thread = TAILQ_NEXT(i->cur_thread, tailq);
    2707        3674 :         while (thread) {
    2708        1968 :                 ch = thread_get_io_channel(thread, dev);
    2709        1968 :                 if (ch != NULL) {
    2710         198 :                         i->cur_thread = thread;
    2711         198 :                         i->ch = ch;
    2712         198 :                         pthread_mutex_unlock(&g_devlist_mutex);
    2713         198 :                         rc = spdk_thread_send_msg(thread, _call_channel, i);
    2714         198 :                         assert(rc == 0);
    2715         198 :                         return;
    2716             :                 }
    2717        1770 :                 thread = TAILQ_NEXT(thread, tailq);
    2718             :         }
    2719             : 
    2720        1706 : end:
    2721        1717 :         dev->for_each_count--;
    2722        1717 :         i->ch = NULL;
    2723        1717 :         pthread_mutex_unlock(&g_devlist_mutex);
    2724             : 
    2725        1717 :         rc = spdk_thread_send_msg(i->orig_thread, _call_completion, i);
    2726        1717 :         assert(rc == 0);
    2727             : 
    2728        1717 :         pthread_mutex_lock(&g_devlist_mutex);
    2729        1717 :         if (dev->pending_unregister && dev->for_each_count == 0) {
    2730           1 :                 rc = spdk_thread_send_msg(dev->unregister_thread, __pending_unregister, dev);
    2731           1 :                 assert(rc == 0);
    2732             :         }
    2733        1717 :         pthread_mutex_unlock(&g_devlist_mutex);
    2734             : }
    2735             : 
    2736             : static void
    2737           0 : thread_interrupt_destroy(struct spdk_thread *thread)
    2738             : {
    2739           0 :         struct spdk_fd_group *fgrp = thread->fgrp;
    2740             : 
    2741           0 :         SPDK_INFOLOG(thread, "destroy fgrp for thread (%s)\n", thread->name);
    2742             : 
    2743           0 :         if (thread->msg_fd < 0) {
    2744           0 :                 return;
    2745             :         }
    2746             : 
    2747           0 :         spdk_fd_group_remove(fgrp, thread->msg_fd);
    2748           0 :         close(thread->msg_fd);
    2749           0 :         thread->msg_fd = -1;
    2750             : 
    2751           0 :         spdk_fd_group_destroy(fgrp);
    2752           0 :         thread->fgrp = NULL;
    2753             : }
    2754             : 
    2755             : #ifdef __linux__
    2756             : static int
    2757           0 : thread_interrupt_msg_process(void *arg)
    2758             : {
    2759           0 :         struct spdk_thread *thread = arg;
    2760             :         struct spdk_thread *orig_thread;
    2761             :         uint32_t msg_count;
    2762             :         spdk_msg_fn critical_msg;
    2763           0 :         int rc = 0;
    2764           0 :         uint64_t notify = 1;
    2765             : 
    2766           0 :         assert(spdk_interrupt_mode_is_enabled());
    2767             : 
    2768           0 :         orig_thread = spdk_get_thread();
    2769           0 :         spdk_set_thread(thread);
    2770             : 
    2771             :         /* There may be race between msg_acknowledge and another producer's msg_notify,
    2772             :          * so msg_acknowledge should be applied ahead. And then check for self's msg_notify.
    2773             :          * This can avoid msg notification missing.
    2774             :          */
    2775           0 :         rc = read(thread->msg_fd, &notify, sizeof(notify));
    2776           0 :         if (rc < 0 && errno != EAGAIN) {
    2777           0 :                 SPDK_ERRLOG("failed to acknowledge msg event: %s.\n", spdk_strerror(errno));
    2778             :         }
    2779             : 
    2780           0 :         critical_msg = thread->critical_msg;
    2781           0 :         if (spdk_unlikely(critical_msg != NULL)) {
    2782           0 :                 critical_msg(NULL);
    2783           0 :                 thread->critical_msg = NULL;
    2784           0 :                 rc = 1;
    2785             :         }
    2786             : 
    2787           0 :         msg_count = msg_queue_run_batch(thread, 0);
    2788           0 :         if (msg_count) {
    2789           0 :                 rc = 1;
    2790             :         }
    2791             : 
    2792           0 :         SPIN_ASSERT(thread->lock_count == 0, SPIN_ERR_HOLD_DURING_SWITCH);
    2793           0 :         if (spdk_unlikely(!thread->in_interrupt)) {
    2794             :                 /* The thread transitioned to poll mode in a msg during the above processing.
    2795             :                  * Clear msg_fd since thread messages will be polled directly in poll mode.
    2796             :                  */
    2797           0 :                 rc = read(thread->msg_fd, &notify, sizeof(notify));
    2798           0 :                 if (rc < 0 && errno != EAGAIN) {
    2799           0 :                         SPDK_ERRLOG("failed to acknowledge msg queue: %s.\n", spdk_strerror(errno));
    2800             :                 }
    2801             :         }
    2802             : 
    2803           0 :         spdk_set_thread(orig_thread);
    2804           0 :         return rc;
    2805             : }
    2806             : 
    2807             : static int
    2808           0 : thread_interrupt_create(struct spdk_thread *thread)
    2809             : {
    2810             :         int rc;
    2811             : 
    2812           0 :         SPDK_INFOLOG(thread, "Create fgrp for thread (%s)\n", thread->name);
    2813             : 
    2814           0 :         rc = spdk_fd_group_create(&thread->fgrp);
    2815           0 :         if (rc) {
    2816           0 :                 return rc;
    2817             :         }
    2818             : 
    2819           0 :         thread->msg_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
    2820           0 :         if (thread->msg_fd < 0) {
    2821           0 :                 rc = -errno;
    2822           0 :                 spdk_fd_group_destroy(thread->fgrp);
    2823           0 :                 thread->fgrp = NULL;
    2824             : 
    2825           0 :                 return rc;
    2826             :         }
    2827             : 
    2828           0 :         return SPDK_FD_GROUP_ADD(thread->fgrp, thread->msg_fd,
    2829             :                                  thread_interrupt_msg_process, thread);
    2830             : }
    2831             : #else
    2832             : static int
    2833             : thread_interrupt_create(struct spdk_thread *thread)
    2834             : {
    2835             :         return -ENOTSUP;
    2836             : }
    2837             : #endif
    2838             : 
    2839             : static int
    2840           0 : _interrupt_wrapper(void *ctx)
    2841             : {
    2842           0 :         struct spdk_interrupt *intr = ctx;
    2843             :         struct spdk_thread *orig_thread, *thread;
    2844             :         int rc;
    2845             : 
    2846           0 :         orig_thread = spdk_get_thread();
    2847           0 :         thread = intr->thread;
    2848             : 
    2849           0 :         spdk_set_thread(thread);
    2850             : 
    2851             :         SPDK_DTRACE_PROBE4(interrupt_fd_process, intr->name, intr->efd,
    2852             :                            intr->fn, intr->arg);
    2853             : 
    2854           0 :         rc = intr->fn(intr->arg);
    2855             : 
    2856           0 :         SPIN_ASSERT(thread->lock_count == 0, SPIN_ERR_HOLD_DURING_SWITCH);
    2857             : 
    2858           0 :         spdk_set_thread(orig_thread);
    2859             : 
    2860           0 :         return rc;
    2861             : }
    2862             : 
    2863             : struct spdk_interrupt *
    2864           0 : spdk_interrupt_register(int efd, spdk_interrupt_fn fn,
    2865             :                         void *arg, const char *name)
    2866             : {
    2867           0 :         return spdk_interrupt_register_for_events(efd, SPDK_INTERRUPT_EVENT_IN, fn, arg, name);
    2868             : }
    2869             : 
    2870             : struct spdk_interrupt *
    2871           0 : spdk_interrupt_register_for_events(int efd, uint32_t events, spdk_interrupt_fn fn, void *arg,
    2872             :                                    const char *name)
    2873             : {
    2874           0 :         struct spdk_event_handler_opts opts = {};
    2875             : 
    2876           0 :         spdk_fd_group_get_default_event_handler_opts(&opts, sizeof(opts));
    2877           0 :         opts.events = events;
    2878           0 :         opts.fd_type = SPDK_FD_TYPE_DEFAULT;
    2879             : 
    2880           0 :         return spdk_interrupt_register_ext(efd, fn, arg, name, &opts);
    2881             : }
    2882             : 
    2883             : struct spdk_interrupt *
    2884           0 : spdk_interrupt_register_ext(int efd, spdk_interrupt_fn fn, void *arg, const char *name,
    2885             :                             struct spdk_event_handler_opts *opts)
    2886             : {
    2887             :         struct spdk_thread *thread;
    2888             :         struct spdk_interrupt *intr;
    2889             :         int ret;
    2890             : 
    2891           0 :         thread = spdk_get_thread();
    2892           0 :         if (!thread) {
    2893           0 :                 assert(false);
    2894             :                 return NULL;
    2895             :         }
    2896             : 
    2897           0 :         if (spdk_unlikely(thread->state != SPDK_THREAD_STATE_RUNNING)) {
    2898           0 :                 SPDK_ERRLOG("thread %s is marked as exited\n", thread->name);
    2899           0 :                 return NULL;
    2900             :         }
    2901             : 
    2902           0 :         intr = calloc(1, sizeof(*intr));
    2903           0 :         if (intr == NULL) {
    2904           0 :                 SPDK_ERRLOG("Interrupt handler allocation failed\n");
    2905           0 :                 return NULL;
    2906             :         }
    2907             : 
    2908           0 :         if (name) {
    2909           0 :                 snprintf(intr->name, sizeof(intr->name), "%s", name);
    2910             :         } else {
    2911           0 :                 snprintf(intr->name, sizeof(intr->name), "%p", fn);
    2912             :         }
    2913             : 
    2914           0 :         intr->efd = efd;
    2915           0 :         intr->thread = thread;
    2916           0 :         intr->fn = fn;
    2917           0 :         intr->arg = arg;
    2918             : 
    2919           0 :         ret = spdk_fd_group_add_ext(thread->fgrp, efd, _interrupt_wrapper, intr, intr->name, opts);
    2920             : 
    2921           0 :         if (ret != 0) {
    2922           0 :                 SPDK_ERRLOG("thread %s: failed to add fd %d: %s\n",
    2923             :                             thread->name, efd, spdk_strerror(-ret));
    2924           0 :                 free(intr);
    2925           0 :                 return NULL;
    2926             :         }
    2927             : 
    2928           0 :         return intr;
    2929             : }
    2930             : 
    2931             : void
    2932           1 : spdk_interrupt_unregister(struct spdk_interrupt **pintr)
    2933             : {
    2934             :         struct spdk_thread *thread;
    2935             :         struct spdk_interrupt *intr;
    2936             : 
    2937           1 :         intr = *pintr;
    2938           1 :         if (intr == NULL) {
    2939           1 :                 return;
    2940             :         }
    2941             : 
    2942           0 :         *pintr = NULL;
    2943             : 
    2944           0 :         thread = spdk_get_thread();
    2945           0 :         if (!thread) {
    2946           0 :                 assert(false);
    2947             :                 return;
    2948             :         }
    2949             : 
    2950           0 :         if (intr->thread != thread) {
    2951           0 :                 wrong_thread(__func__, intr->name, intr->thread, thread);
    2952           0 :                 return;
    2953             :         }
    2954             : 
    2955           0 :         spdk_fd_group_remove(thread->fgrp, intr->efd);
    2956           0 :         free(intr);
    2957             : }
    2958             : 
    2959             : int
    2960           0 : spdk_interrupt_set_event_types(struct spdk_interrupt *intr,
    2961             :                                enum spdk_interrupt_event_types event_types)
    2962             : {
    2963             :         struct spdk_thread *thread;
    2964             : 
    2965           0 :         thread = spdk_get_thread();
    2966           0 :         if (!thread) {
    2967           0 :                 assert(false);
    2968             :                 return -EINVAL;
    2969             :         }
    2970             : 
    2971           0 :         if (intr->thread != thread) {
    2972           0 :                 wrong_thread(__func__, intr->name, intr->thread, thread);
    2973           0 :                 return -EINVAL;
    2974             :         }
    2975             : 
    2976           0 :         return spdk_fd_group_event_modify(thread->fgrp, intr->efd, event_types);
    2977             : }
    2978             : 
    2979             : int
    2980           0 : spdk_thread_get_interrupt_fd(struct spdk_thread *thread)
    2981             : {
    2982           0 :         return spdk_fd_group_get_fd(thread->fgrp);
    2983             : }
    2984             : 
    2985             : struct spdk_fd_group *
    2986           0 : spdk_thread_get_interrupt_fd_group(struct spdk_thread *thread)
    2987             : {
    2988           0 :         return thread->fgrp;
    2989             : }
    2990             : 
    2991             : static bool g_interrupt_mode = false;
    2992             : 
    2993             : int
    2994           0 : spdk_interrupt_mode_enable(void)
    2995             : {
    2996             :         /* It must be called once prior to initializing the threading library.
    2997             :          * g_spdk_msg_mempool will be valid if thread library is initialized.
    2998             :          */
    2999           0 :         if (g_spdk_msg_mempool) {
    3000           0 :                 SPDK_ERRLOG("Failed due to threading library is already initialized.\n");
    3001           0 :                 return -1;
    3002             :         }
    3003             : 
    3004             : #ifdef __linux__
    3005           0 :         SPDK_NOTICELOG("Set SPDK running in interrupt mode.\n");
    3006           0 :         g_interrupt_mode = true;
    3007           0 :         return 0;
    3008             : #else
    3009             :         SPDK_ERRLOG("SPDK interrupt mode supports only Linux platform now.\n");
    3010             :         g_interrupt_mode = false;
    3011             :         return -ENOTSUP;
    3012             : #endif
    3013             : }
    3014             : 
    3015             : bool
    3016      169096 : spdk_interrupt_mode_is_enabled(void)
    3017             : {
    3018      169096 :         return g_interrupt_mode;
    3019             : }
    3020             : 
    3021             : #define SSPIN_DEBUG_STACK_FRAMES 16
    3022             : 
    3023             : struct sspin_stack {
    3024             :         void *addrs[SSPIN_DEBUG_STACK_FRAMES];
    3025             :         uint32_t depth;
    3026             : };
    3027             : 
    3028             : struct spdk_spinlock_internal {
    3029             :         struct sspin_stack init_stack;
    3030             :         struct sspin_stack lock_stack;
    3031             :         struct sspin_stack unlock_stack;
    3032             : };
    3033             : 
    3034             : static void
    3035        1501 : sspin_init_internal(struct spdk_spinlock *sspin)
    3036             : {
    3037             : #ifdef DEBUG
    3038        1501 :         sspin->internal = calloc(1, sizeof(*sspin->internal));
    3039             : #endif
    3040        1501 : }
    3041             : 
    3042             : static void
    3043        1490 : sspin_fini_internal(struct spdk_spinlock *sspin)
    3044             : {
    3045             : #ifdef DEBUG
    3046        1490 :         free(sspin->internal);
    3047        1490 :         sspin->internal = NULL;
    3048             : #endif
    3049        1490 : }
    3050             : 
    3051             : #if defined(DEBUG) && defined(SPDK_HAVE_EXECINFO_H)
    3052             : #define SSPIN_GET_STACK(sspin, which) \
    3053             :         do { \
    3054             :                 if (sspin->internal != NULL) { \
    3055             :                         struct sspin_stack *stack = &sspin->internal->which ## _stack; \
    3056             :                         stack->depth = backtrace(stack->addrs, SPDK_COUNTOF(stack->addrs)); \
    3057             :                 } \
    3058             :         } while (0)
    3059             : #else
    3060             : #define SSPIN_GET_STACK(sspin, which) do { } while (0)
    3061             : #endif
    3062             : 
    3063             : static void
    3064          15 : sspin_stack_print(const char *title, const struct sspin_stack *sspin_stack)
    3065             : {
    3066             : #ifdef SPDK_HAVE_EXECINFO_H
    3067             :         char **stack;
    3068             :         size_t i;
    3069             : 
    3070             :         stack = backtrace_symbols(sspin_stack->addrs, sspin_stack->depth);
    3071             :         if (stack == NULL) {
    3072             :                 SPDK_ERRLOG("Out of memory while allocate stack for %s\n", title);
    3073             :                 return;
    3074             :         }
    3075             :         SPDK_ERRLOG("  %s:\n", title);
    3076             :         for (i = 0; i < sspin_stack->depth; i++) {
    3077             :                 /*
    3078             :                  * This does not print line numbers. In gdb, use something like "list *0x444b6b" or
    3079             :                  * "list *sspin_stack->addrs[0]".  Or more conveniently, load the spdk gdb macros
    3080             :                  * and use use "print *sspin" or "print sspin->internal.lock_stack".  See
    3081             :                  * gdb_macros.md in the docs directory for details.
    3082             :                  */
    3083             :                 SPDK_ERRLOG("    #%" PRIu64 ": %s\n", i, stack[i]);
    3084             :         }
    3085             :         free(stack);
    3086             : #endif /* SPDK_HAVE_EXECINFO_H */
    3087          15 : }
    3088             : 
    3089             : static void
    3090           5 : sspin_stacks_print(const struct spdk_spinlock *sspin)
    3091             : {
    3092           5 :         if (sspin->internal == NULL) {
    3093           0 :                 return;
    3094             :         }
    3095           5 :         SPDK_ERRLOG("spinlock %p\n", sspin);
    3096           5 :         sspin_stack_print("Lock initialized at", &sspin->internal->init_stack);
    3097           5 :         sspin_stack_print("Last locked at", &sspin->internal->lock_stack);
    3098           5 :         sspin_stack_print("Last unlocked at", &sspin->internal->unlock_stack);
    3099             : }
    3100             : 
    3101             : void
    3102        1501 : spdk_spin_init(struct spdk_spinlock *sspin)
    3103             : {
    3104             :         int rc;
    3105             : 
    3106        1501 :         memset(sspin, 0, sizeof(*sspin));
    3107        1501 :         rc = pthread_spin_init(&sspin->spinlock, PTHREAD_PROCESS_PRIVATE);
    3108        1501 :         SPIN_ASSERT_LOG_STACKS(rc == 0, SPIN_ERR_PTHREAD, sspin);
    3109        1501 :         sspin_init_internal(sspin);
    3110             :         SSPIN_GET_STACK(sspin, init);
    3111        1501 :         sspin->initialized = true;
    3112             : }
    3113             : 
    3114             : void
    3115        1491 : spdk_spin_destroy(struct spdk_spinlock *sspin)
    3116             : {
    3117             :         int rc;
    3118             : 
    3119        1491 :         SPIN_ASSERT_LOG_STACKS(!sspin->destroyed, SPIN_ERR_DESTROYED, sspin);
    3120        1491 :         SPIN_ASSERT_LOG_STACKS(sspin->initialized, SPIN_ERR_NOT_INITIALIZED, sspin);
    3121        1491 :         SPIN_ASSERT_LOG_STACKS(sspin->thread == NULL, SPIN_ERR_LOCK_HELD, sspin);
    3122             : 
    3123        1490 :         rc = pthread_spin_destroy(&sspin->spinlock);
    3124        1490 :         SPIN_ASSERT_LOG_STACKS(rc == 0, SPIN_ERR_PTHREAD, sspin);
    3125             : 
    3126        1490 :         sspin_fini_internal(sspin);
    3127        1490 :         sspin->initialized = false;
    3128        1490 :         sspin->destroyed = true;
    3129             : }
    3130             : 
    3131             : void
    3132       33089 : spdk_spin_lock(struct spdk_spinlock *sspin)
    3133             : {
    3134       33089 :         struct spdk_thread *thread = spdk_get_thread();
    3135             :         int rc;
    3136             : 
    3137       33089 :         SPIN_ASSERT_LOG_STACKS(!sspin->destroyed, SPIN_ERR_DESTROYED, sspin);
    3138       33089 :         SPIN_ASSERT_LOG_STACKS(sspin->initialized, SPIN_ERR_NOT_INITIALIZED, sspin);
    3139       33089 :         SPIN_ASSERT_LOG_STACKS(thread != NULL, SPIN_ERR_NOT_SPDK_THREAD, sspin);
    3140       33088 :         SPIN_ASSERT_LOG_STACKS(thread != sspin->thread, SPIN_ERR_DEADLOCK, sspin);
    3141             : 
    3142       33087 :         rc = pthread_spin_lock(&sspin->spinlock);
    3143       33087 :         SPIN_ASSERT_LOG_STACKS(rc == 0, SPIN_ERR_PTHREAD, sspin);
    3144             : 
    3145       33087 :         sspin->thread = thread;
    3146       33087 :         sspin->thread->lock_count++;
    3147             : 
    3148             :         SSPIN_GET_STACK(sspin, lock);
    3149             : }
    3150             : 
    3151             : void
    3152       33089 : spdk_spin_unlock(struct spdk_spinlock *sspin)
    3153             : {
    3154       33089 :         struct spdk_thread *thread = spdk_get_thread();
    3155             :         int rc;
    3156             : 
    3157       33089 :         SPIN_ASSERT_LOG_STACKS(!sspin->destroyed, SPIN_ERR_DESTROYED, sspin);
    3158       33089 :         SPIN_ASSERT_LOG_STACKS(sspin->initialized, SPIN_ERR_NOT_INITIALIZED, sspin);
    3159       33089 :         SPIN_ASSERT_LOG_STACKS(thread != NULL, SPIN_ERR_NOT_SPDK_THREAD, sspin);
    3160       33089 :         SPIN_ASSERT_LOG_STACKS(thread == sspin->thread, SPIN_ERR_WRONG_THREAD, sspin);
    3161             : 
    3162       33087 :         SPIN_ASSERT_LOG_STACKS(thread->lock_count > 0, SPIN_ERR_LOCK_COUNT, sspin);
    3163       33087 :         thread->lock_count--;
    3164       33087 :         sspin->thread = NULL;
    3165             : 
    3166             :         SSPIN_GET_STACK(sspin, unlock);
    3167             : 
    3168       33087 :         rc = pthread_spin_unlock(&sspin->spinlock);
    3169       33087 :         SPIN_ASSERT_LOG_STACKS(rc == 0, SPIN_ERR_PTHREAD, sspin);
    3170             : }
    3171             : 
    3172             : bool
    3173       38576 : spdk_spin_held(struct spdk_spinlock *sspin)
    3174             : {
    3175       38576 :         struct spdk_thread *thread = spdk_get_thread();
    3176             : 
    3177       38576 :         SPIN_ASSERT_RETURN(thread != NULL, SPIN_ERR_NOT_SPDK_THREAD, false);
    3178             : 
    3179       38575 :         return sspin->thread == thread;
    3180             : }
    3181             : 
    3182             : void
    3183           0 : spdk_thread_register_post_poller_handler(spdk_post_poller_fn fn, void *fn_arg)
    3184             : {
    3185             :         struct spdk_thread *thr;
    3186             : 
    3187           0 :         thr = _get_thread();
    3188           0 :         assert(thr);
    3189           0 :         if (spdk_unlikely(thr->num_pp_handlers == SPDK_THREAD_MAX_POST_POLLER_HANDLERS)) {
    3190           0 :                 SPDK_ERRLOG("Too many handlers registered");
    3191           0 :                 return;
    3192             :         }
    3193             : 
    3194           0 :         thr->pp_handlers[thr->num_pp_handlers].fn = fn;
    3195           0 :         thr->pp_handlers[thr->num_pp_handlers].fn_arg = fn_arg;
    3196           0 :         thr->num_pp_handlers++;
    3197             : }
    3198             : 
    3199          49 : SPDK_LOG_REGISTER_COMPONENT(thread)

Generated by: LCOV version 1.15