LCOV - code coverage report
Current view: top level - module/bdev/rbd - bdev_rbd.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 0 828 0.0 %
Date: 2024-12-08 02:22:01 Functions: 0 54 0.0 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2017 Intel Corporation.
       3             :  *   All rights reserved.
       4             :  */
       5             : 
       6             : #include "spdk/stdinc.h"
       7             : 
       8             : #include "bdev_rbd.h"
       9             : 
      10             : #include <rbd/librbd.h>
      11             : #include <rados/librados.h>
      12             : 
      13             : #include "spdk/env.h"
      14             : #include "spdk/bdev.h"
      15             : #include "spdk/thread.h"
      16             : #include "spdk/json.h"
      17             : #include "spdk/string.h"
      18             : #include "spdk/util.h"
      19             : #include "spdk/likely.h"
      20             : 
      21             : #include "spdk/bdev_module.h"
      22             : #include "spdk/log.h"
      23             : 
      24             : static int bdev_rbd_count = 0;
      25             : 
      26             : struct bdev_rbd_pool_ctx {
      27             :         rados_t *cluster_p;
      28             :         char *name;
      29             :         rados_ioctx_t io_ctx;
      30             :         uint32_t ref;
      31             :         STAILQ_ENTRY(bdev_rbd_pool_ctx) link;
      32             : };
      33             : 
      34             : static STAILQ_HEAD(, bdev_rbd_pool_ctx) g_map_bdev_rbd_pool_ctx = STAILQ_HEAD_INITIALIZER(
      35             :                         g_map_bdev_rbd_pool_ctx);
      36             : 
      37             : struct bdev_rbd {
      38             :         struct spdk_bdev disk;
      39             :         char *rbd_name;
      40             :         char *user_id;
      41             :         char *pool_name;
      42             :         char **config;
      43             : 
      44             :         rados_t cluster;
      45             :         rados_t *cluster_p;
      46             :         char *cluster_name;
      47             : 
      48             :         union rbd_ctx {
      49             :                 rados_ioctx_t io_ctx;
      50             :                 struct bdev_rbd_pool_ctx *ctx;
      51             :         } rados_ctx;
      52             : 
      53             :         rbd_image_t image;
      54             : 
      55             :         rbd_image_info_t info;
      56             :         struct spdk_thread *destruct_td;
      57             : 
      58             :         TAILQ_ENTRY(bdev_rbd) tailq;
      59             :         struct spdk_poller *reset_timer;
      60             :         struct spdk_bdev_io *reset_bdev_io;
      61             : 
      62             :         uint64_t rbd_watch_handle;
      63             : };
      64             : 
      65             : struct bdev_rbd_io_channel {
      66             :         struct bdev_rbd *disk;
      67             :         struct spdk_io_channel *group_ch;
      68             : };
      69             : 
      70             : struct bdev_rbd_io {
      71             :         struct                  spdk_thread *submit_td;
      72             :         enum                    spdk_bdev_io_status status;
      73             :         rbd_completion_t        comp;
      74             :         size_t                  total_len;
      75             : };
      76             : 
      77             : struct bdev_rbd_cluster {
      78             :         char *name;
      79             :         char *user_id;
      80             :         char **config_param;
      81             :         char *config_file;
      82             :         char *key_file;
      83             :         char *core_mask;
      84             :         rados_t cluster;
      85             :         uint32_t ref;
      86             :         STAILQ_ENTRY(bdev_rbd_cluster) link;
      87             : };
      88             : 
      89             : static STAILQ_HEAD(, bdev_rbd_cluster) g_map_bdev_rbd_cluster = STAILQ_HEAD_INITIALIZER(
      90             :                         g_map_bdev_rbd_cluster);
      91             : static pthread_mutex_t g_map_bdev_rbd_cluster_mutex = PTHREAD_MUTEX_INITIALIZER;
      92             : 
      93             : static struct spdk_io_channel *bdev_rbd_get_io_channel(void *ctx);
      94             : 
      95             : static void
      96           0 : _rbd_update_callback(void *arg)
      97             : {
      98           0 :         struct bdev_rbd *rbd = arg;
      99           0 :         uint64_t current_size_in_bytes = 0;
     100           0 :         int rc;
     101             : 
     102           0 :         rc = rbd_get_size(rbd->image, &current_size_in_bytes);
     103           0 :         if (rc < 0) {
     104           0 :                 SPDK_ERRLOG("Failed getting size %d\n", rc);
     105           0 :                 return;
     106             :         }
     107             : 
     108           0 :         rc = spdk_bdev_notify_blockcnt_change(&rbd->disk, current_size_in_bytes / rbd->disk.blocklen);
     109           0 :         if (rc != 0) {
     110           0 :                 SPDK_ERRLOG("failed to notify block cnt change.\n");
     111           0 :         }
     112           0 : }
     113             : 
     114             : static void
     115           0 : rbd_update_callback(void *arg)
     116             : {
     117           0 :         spdk_thread_send_msg(spdk_thread_get_app_thread(), _rbd_update_callback, arg);
     118           0 : }
     119             : 
     120             : static void
     121           0 : bdev_rbd_cluster_free(struct bdev_rbd_cluster *entry)
     122             : {
     123           0 :         assert(entry != NULL);
     124             : 
     125           0 :         bdev_rbd_free_config(entry->config_param);
     126           0 :         free(entry->config_file);
     127           0 :         free(entry->key_file);
     128           0 :         free(entry->user_id);
     129           0 :         free(entry->name);
     130           0 :         free(entry->core_mask);
     131           0 :         free(entry);
     132           0 : }
     133             : 
     134             : static void
     135           0 : bdev_rbd_put_cluster(rados_t **cluster)
     136             : {
     137           0 :         struct bdev_rbd_cluster *entry;
     138             : 
     139           0 :         assert(cluster != NULL);
     140             : 
     141             :         /* No need go through the map if *cluster equals to NULL */
     142           0 :         if (*cluster == NULL) {
     143           0 :                 return;
     144             :         }
     145             : 
     146           0 :         pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
     147           0 :         STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
     148           0 :                 if (*cluster != &entry->cluster) {
     149           0 :                         continue;
     150             :                 }
     151             : 
     152           0 :                 assert(entry->ref > 0);
     153           0 :                 entry->ref--;
     154           0 :                 *cluster = NULL;
     155           0 :                 pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
     156           0 :                 return;
     157             :         }
     158             : 
     159           0 :         pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
     160           0 :         SPDK_ERRLOG("Cannot find the entry for cluster=%p\n", cluster);
     161           0 : }
     162             : 
     163             : static void
     164           0 : bdev_rbd_put_pool_ctx(struct bdev_rbd_pool_ctx *entry)
     165             : {
     166           0 :         assert(spdk_get_thread() == spdk_thread_get_app_thread());
     167             : 
     168           0 :         assert(entry != NULL);
     169           0 :         assert(entry->ref > 0);
     170           0 :         entry->ref--;
     171           0 :         if (entry->ref == 0) {
     172           0 :                 STAILQ_REMOVE(&g_map_bdev_rbd_pool_ctx, entry, bdev_rbd_pool_ctx, link);
     173           0 :                 rados_ioctx_destroy(entry->io_ctx);
     174           0 :                 free(entry->name);
     175           0 :                 free(entry);
     176           0 :         }
     177           0 : }
     178             : 
     179             : static void
     180           0 : bdev_rbd_free(struct bdev_rbd *rbd)
     181             : {
     182           0 :         if (!rbd) {
     183           0 :                 return;
     184             :         }
     185             : 
     186           0 :         if (rbd->image) {
     187           0 :                 rbd_update_unwatch(rbd->image, rbd->rbd_watch_handle);
     188           0 :                 rbd_flush(rbd->image);
     189           0 :                 rbd_close(rbd->image);
     190           0 :         }
     191             : 
     192           0 :         free(rbd->disk.name);
     193           0 :         free(rbd->rbd_name);
     194           0 :         free(rbd->user_id);
     195           0 :         free(rbd->pool_name);
     196           0 :         bdev_rbd_free_config(rbd->config);
     197             : 
     198           0 :         if (rbd->cluster_name) {
     199             :                 /* When rbd is destructed by bdev_rbd_destruct, it will not enter here
     200             :                  * because the ctx will already freed by bdev_rbd_free_cb in async manner.
     201             :                  * This path only happens during the rbd initialization procedure of rbd */
     202           0 :                 if (rbd->rados_ctx.ctx) {
     203           0 :                         bdev_rbd_put_pool_ctx(rbd->rados_ctx.ctx);
     204           0 :                         rbd->rados_ctx.ctx = NULL;
     205           0 :                 }
     206             : 
     207           0 :                 bdev_rbd_put_cluster(&rbd->cluster_p);
     208           0 :                 free(rbd->cluster_name);
     209           0 :         } else if (rbd->cluster) {
     210           0 :                 if (rbd->rados_ctx.io_ctx) {
     211           0 :                         rados_ioctx_destroy(rbd->rados_ctx.io_ctx);
     212           0 :                 }
     213           0 :                 rados_shutdown(rbd->cluster);
     214           0 :         }
     215             : 
     216           0 :         free(rbd);
     217           0 : }
     218             : 
     219             : void
     220           0 : bdev_rbd_free_config(char **config)
     221             : {
     222           0 :         char **entry;
     223             : 
     224           0 :         if (config) {
     225           0 :                 for (entry = config; *entry; entry++) {
     226           0 :                         free(*entry);
     227           0 :                 }
     228           0 :                 free(config);
     229           0 :         }
     230           0 : }
     231             : 
     232             : char **
     233           0 : bdev_rbd_dup_config(const char *const *config)
     234             : {
     235           0 :         size_t count;
     236           0 :         char **copy;
     237             : 
     238           0 :         if (!config) {
     239           0 :                 return NULL;
     240             :         }
     241           0 :         for (count = 0; config[count]; count++) {}
     242           0 :         copy = calloc(count + 1, sizeof(*copy));
     243           0 :         if (!copy) {
     244           0 :                 return NULL;
     245             :         }
     246           0 :         for (count = 0; config[count]; count++) {
     247           0 :                 if (!(copy[count] = strdup(config[count]))) {
     248           0 :                         bdev_rbd_free_config(copy);
     249           0 :                         return NULL;
     250             :                 }
     251           0 :         }
     252           0 :         return copy;
     253           0 : }
     254             : 
     255             : static int
     256           0 : bdev_rados_cluster_init(const char *user_id, const char *const *config,
     257             :                         rados_t *cluster)
     258             : {
     259           0 :         int ret;
     260             : 
     261           0 :         ret = rados_create(cluster, user_id);
     262           0 :         if (ret < 0) {
     263           0 :                 SPDK_ERRLOG("Failed to create rados_t struct\n");
     264           0 :                 return -1;
     265             :         }
     266             : 
     267           0 :         if (config) {
     268           0 :                 const char *const *entry = config;
     269           0 :                 while (*entry) {
     270           0 :                         ret = rados_conf_set(*cluster, entry[0], entry[1]);
     271           0 :                         if (ret < 0) {
     272           0 :                                 SPDK_ERRLOG("Failed to set %s = %s\n", entry[0], entry[1]);
     273           0 :                                 rados_shutdown(*cluster);
     274           0 :                                 *cluster = NULL;
     275           0 :                                 return -1;
     276             :                         }
     277           0 :                         entry += 2;
     278             :                 }
     279           0 :         } else {
     280           0 :                 ret = rados_conf_read_file(*cluster, NULL);
     281           0 :                 if (ret < 0) {
     282           0 :                         SPDK_ERRLOG("Failed to read conf file\n");
     283           0 :                         rados_shutdown(*cluster);
     284           0 :                         *cluster = NULL;
     285           0 :                         return -1;
     286             :                 }
     287             :         }
     288             : 
     289           0 :         ret = rados_connect(*cluster);
     290           0 :         if (ret < 0) {
     291           0 :                 SPDK_ERRLOG("Failed to connect to rbd_pool\n");
     292           0 :                 rados_shutdown(*cluster);
     293           0 :                 *cluster = NULL;
     294           0 :                 return -1;
     295             :         }
     296             : 
     297           0 :         return 0;
     298           0 : }
     299             : 
     300             : static int
     301           0 : bdev_rbd_get_cluster(const char *cluster_name, rados_t **cluster)
     302             : {
     303           0 :         struct bdev_rbd_cluster *entry;
     304             : 
     305           0 :         if (cluster == NULL) {
     306           0 :                 SPDK_ERRLOG("cluster should not be NULL\n");
     307           0 :                 return -1;
     308             :         }
     309             : 
     310           0 :         pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
     311           0 :         STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
     312           0 :                 if (strcmp(cluster_name, entry->name) == 0) {
     313           0 :                         entry->ref++;
     314           0 :                         *cluster = &entry->cluster;
     315           0 :                         pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
     316           0 :                         return 0;
     317             :                 }
     318           0 :         }
     319             : 
     320           0 :         pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
     321           0 :         return -1;
     322           0 : }
     323             : 
     324             : static int
     325           0 : bdev_rbd_shared_cluster_init(const char *cluster_name, rados_t **cluster)
     326             : {
     327           0 :         int ret;
     328             : 
     329           0 :         ret = bdev_rbd_get_cluster(cluster_name, cluster);
     330           0 :         if (ret < 0) {
     331           0 :                 SPDK_ERRLOG("Failed to create rados_t struct\n");
     332           0 :                 return -1;
     333             :         }
     334             : 
     335           0 :         return ret;
     336           0 : }
     337             : 
     338             : static void *
     339           0 : bdev_rbd_cluster_handle(void *arg)
     340             : {
     341           0 :         void *ret = arg;
     342           0 :         struct bdev_rbd *rbd = arg;
     343           0 :         int rc;
     344             : 
     345           0 :         rc = bdev_rados_cluster_init(rbd->user_id, (const char *const *)rbd->config,
     346           0 :                                      &rbd->cluster);
     347           0 :         if (rc < 0) {
     348           0 :                 SPDK_ERRLOG("Failed to create rados cluster for user_id=%s and rbd_pool=%s\n",
     349             :                             rbd->user_id ? rbd->user_id : "admin (the default)", rbd->pool_name);
     350           0 :                 ret = NULL;
     351           0 :         }
     352             : 
     353           0 :         return ret;
     354           0 : }
     355             : 
     356             : static int
     357           0 : bdev_rbd_get_pool_ctx(rados_t *cluster_p, const char *name,  struct bdev_rbd_pool_ctx **ctx)
     358             : {
     359           0 :         struct bdev_rbd_pool_ctx *entry;
     360             : 
     361           0 :         assert(spdk_get_thread() == spdk_thread_get_app_thread());
     362             : 
     363           0 :         if (name == NULL || ctx == NULL) {
     364           0 :                 return -1;
     365             :         }
     366             : 
     367           0 :         STAILQ_FOREACH(entry, &g_map_bdev_rbd_pool_ctx, link) {
     368           0 :                 if (strcmp(name, entry->name) == 0 && cluster_p == entry->cluster_p) {
     369           0 :                         entry->ref++;
     370           0 :                         *ctx = entry;
     371           0 :                         return 0;
     372             :                 }
     373           0 :         }
     374             : 
     375           0 :         entry = calloc(1, sizeof(*entry));
     376           0 :         if (!entry) {
     377           0 :                 SPDK_ERRLOG("Cannot allocate an entry for name=%s\n", name);
     378           0 :                 return -1;
     379             :         }
     380             : 
     381           0 :         entry->name = strdup(name);
     382           0 :         if (entry->name == NULL) {
     383           0 :                 SPDK_ERRLOG("Failed to allocate the name =%s space on entry =%p\n", name, entry);
     384           0 :                 goto err_handle;
     385             :         }
     386             : 
     387           0 :         if (rados_ioctx_create(*cluster_p, name, &entry->io_ctx) < 0) {
     388           0 :                 goto err_handle1;
     389             :         }
     390             : 
     391           0 :         entry->cluster_p = cluster_p;
     392           0 :         entry->ref = 1;
     393           0 :         *ctx = entry;
     394           0 :         STAILQ_INSERT_TAIL(&g_map_bdev_rbd_pool_ctx, entry, link);
     395             : 
     396           0 :         return 0;
     397             : 
     398             : err_handle1:
     399           0 :         free(entry->name);
     400             : err_handle:
     401           0 :         free(entry);
     402             : 
     403           0 :         return -1;
     404           0 : }
     405             : 
     406             : static void *
     407           0 : bdev_rbd_init_context(void *arg)
     408             : {
     409           0 :         struct bdev_rbd *rbd = arg;
     410           0 :         int rc;
     411           0 :         rados_ioctx_t *io_ctx = NULL;
     412             : 
     413           0 :         if (rbd->cluster_name) {
     414           0 :                 if (bdev_rbd_get_pool_ctx(rbd->cluster_p, rbd->pool_name, &rbd->rados_ctx.ctx) < 0) {
     415           0 :                         SPDK_ERRLOG("Failed to create ioctx on rbd=%p with cluster_name=%s\n",
     416             :                                     rbd, rbd->cluster_name);
     417           0 :                         return NULL;
     418             :                 }
     419           0 :                 io_ctx = &rbd->rados_ctx.ctx->io_ctx;
     420           0 :         } else {
     421           0 :                 if (rados_ioctx_create(*(rbd->cluster_p), rbd->pool_name, &rbd->rados_ctx.io_ctx) < 0) {
     422           0 :                         SPDK_ERRLOG("Failed to create ioctx on rbd=%p\n", rbd);
     423           0 :                         return NULL;
     424             :                 }
     425           0 :                 io_ctx = &rbd->rados_ctx.io_ctx;
     426             :         }
     427             : 
     428           0 :         assert(io_ctx != NULL);
     429           0 :         rc = rbd_open(*io_ctx, rbd->rbd_name, &rbd->image, NULL);
     430           0 :         if (rc < 0) {
     431           0 :                 SPDK_ERRLOG("Failed to open specified rbd device\n");
     432           0 :                 return NULL;
     433             :         }
     434             : 
     435           0 :         rc = rbd_update_watch(rbd->image, &rbd->rbd_watch_handle, rbd_update_callback, (void *)rbd);
     436           0 :         if (rc < 0) {
     437           0 :                 SPDK_ERRLOG("Failed to set up watch %d\n", rc);
     438           0 :         }
     439             : 
     440           0 :         rc = rbd_stat(rbd->image, &rbd->info, sizeof(rbd->info));
     441           0 :         if (rc < 0) {
     442           0 :                 SPDK_ERRLOG("Failed to stat specified rbd device\n");
     443           0 :                 return NULL;
     444             :         }
     445             : 
     446           0 :         return arg;
     447           0 : }
     448             : 
     449             : static int
     450           0 : bdev_rbd_init(struct bdev_rbd *rbd)
     451             : {
     452           0 :         int ret = 0;
     453             : 
     454           0 :         if (!rbd->cluster_name) {
     455           0 :                 rbd->cluster_p = &rbd->cluster;
     456             :                 /* Cluster should be created in non-SPDK thread to avoid conflict between
     457             :                  * Rados and SPDK thread */
     458           0 :                 if (spdk_call_unaffinitized(bdev_rbd_cluster_handle, rbd) == NULL) {
     459           0 :                         SPDK_ERRLOG("Cannot create the rados object on rbd=%p\n", rbd);
     460           0 :                         return -1;
     461             :                 }
     462           0 :         } else {
     463           0 :                 ret = bdev_rbd_shared_cluster_init(rbd->cluster_name, &rbd->cluster_p);
     464           0 :                 if (ret < 0) {
     465           0 :                         SPDK_ERRLOG("Failed to create rados object for rbd =%p on cluster_name=%s\n",
     466             :                                     rbd, rbd->cluster_name);
     467           0 :                         return -1;
     468             :                 }
     469             :         }
     470             : 
     471           0 :         if (spdk_call_unaffinitized(bdev_rbd_init_context, rbd) == NULL) {
     472           0 :                 SPDK_ERRLOG("Cannot init rbd context for rbd=%p\n", rbd);
     473           0 :                 return -1;
     474             :         }
     475             : 
     476           0 :         return ret;
     477           0 : }
     478             : 
     479             : static void
     480           0 : _bdev_rbd_io_complete(void *_rbd_io)
     481             : {
     482           0 :         struct bdev_rbd_io *rbd_io = _rbd_io;
     483             : 
     484           0 :         spdk_bdev_io_complete(spdk_bdev_io_from_ctx(rbd_io), rbd_io->status);
     485           0 : }
     486             : 
     487             : static void
     488           0 : bdev_rbd_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status)
     489             : {
     490           0 :         struct bdev_rbd_io *rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx;
     491           0 :         struct spdk_thread *current_thread = spdk_get_thread();
     492             : 
     493           0 :         rbd_io->status = status;
     494           0 :         assert(rbd_io->submit_td != NULL);
     495           0 :         if (rbd_io->submit_td != current_thread) {
     496           0 :                 spdk_thread_send_msg(rbd_io->submit_td, _bdev_rbd_io_complete, rbd_io);
     497           0 :         } else {
     498           0 :                 _bdev_rbd_io_complete(rbd_io);
     499             :         }
     500           0 : }
     501             : 
     502             : static void
     503           0 : bdev_rbd_finish_aiocb(rbd_completion_t cb, void *arg)
     504             : {
     505           0 :         int io_status;
     506           0 :         struct spdk_bdev_io *bdev_io;
     507           0 :         struct bdev_rbd_io *rbd_io;
     508           0 :         enum spdk_bdev_io_status bio_status;
     509             : 
     510           0 :         bdev_io = rbd_aio_get_arg(cb);
     511           0 :         rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx;
     512           0 :         io_status = rbd_aio_get_return_value(cb);
     513           0 :         bio_status = SPDK_BDEV_IO_STATUS_SUCCESS;
     514             : 
     515           0 :         if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
     516           0 :                 if ((int)rbd_io->total_len != io_status) {
     517           0 :                         bio_status = SPDK_BDEV_IO_STATUS_FAILED;
     518           0 :                 }
     519             : #ifdef LIBRBD_SUPPORTS_COMPARE_AND_WRITE_IOVEC
     520           0 :         } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE && io_status == -EILSEQ) {
     521           0 :                 bio_status = SPDK_BDEV_IO_STATUS_MISCOMPARE;
     522             : #endif
     523           0 :         } else if (io_status != 0) { /* For others, 0 means success */
     524           0 :                 bio_status = SPDK_BDEV_IO_STATUS_FAILED;
     525           0 :         }
     526             : 
     527           0 :         rbd_aio_release(cb);
     528             : 
     529           0 :         bdev_rbd_io_complete(bdev_io, bio_status);
     530           0 : }
     531             : 
     532             : static void
     533           0 : _bdev_rbd_start_aio(struct bdev_rbd *disk, struct spdk_bdev_io *bdev_io,
     534             :                     struct iovec *iov, int iovcnt, uint64_t offset, size_t len)
     535             : {
     536           0 :         int ret;
     537           0 :         struct bdev_rbd_io *rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx;
     538           0 :         rbd_image_t image = disk->image;
     539             : 
     540           0 :         ret = rbd_aio_create_completion(bdev_io, bdev_rbd_finish_aiocb,
     541           0 :                                         &rbd_io->comp);
     542           0 :         if (ret < 0) {
     543           0 :                 goto err;
     544             :         }
     545             : 
     546           0 :         switch (bdev_io->type) {
     547             :         case SPDK_BDEV_IO_TYPE_READ:
     548           0 :                 rbd_io->total_len = len;
     549           0 :                 if (spdk_likely(iovcnt == 1)) {
     550           0 :                         ret = rbd_aio_read(image, offset, iov[0].iov_len, iov[0].iov_base,
     551           0 :                                            rbd_io->comp);
     552           0 :                 } else {
     553           0 :                         ret = rbd_aio_readv(image, iov, iovcnt, offset, rbd_io->comp);
     554             :                 }
     555           0 :                 break;
     556             :         case SPDK_BDEV_IO_TYPE_WRITE:
     557           0 :                 if (spdk_likely(iovcnt == 1)) {
     558           0 :                         ret = rbd_aio_write(image, offset, iov[0].iov_len, iov[0].iov_base,
     559           0 :                                             rbd_io->comp);
     560           0 :                 } else {
     561           0 :                         ret = rbd_aio_writev(image, iov, iovcnt, offset, rbd_io->comp);
     562             :                 }
     563           0 :                 break;
     564             :         case SPDK_BDEV_IO_TYPE_UNMAP:
     565           0 :                 ret = rbd_aio_discard(image, offset, len, rbd_io->comp);
     566           0 :                 break;
     567             :         case SPDK_BDEV_IO_TYPE_FLUSH:
     568           0 :                 ret = rbd_aio_flush(image, rbd_io->comp);
     569           0 :                 break;
     570             :         case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
     571           0 :                 ret = rbd_aio_write_zeroes(image, offset, len, rbd_io->comp, /* zero_flags */ 0,
     572             :                                            /* op_flags */ 0);
     573           0 :                 break;
     574             : #ifdef LIBRBD_SUPPORTS_COMPARE_AND_WRITE_IOVEC
     575             :         case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
     576           0 :                 ret = rbd_aio_compare_and_writev(image, offset, iov /* cmp */, iovcnt,
     577           0 :                                                  bdev_io->u.bdev.fused_iovs /* write */,
     578           0 :                                                  bdev_io->u.bdev.fused_iovcnt,
     579           0 :                                                  rbd_io->comp, NULL,
     580             :                                                  /* op_flags */ 0);
     581           0 :                 break;
     582             : #endif
     583             :         default:
     584             :                 /* This should not happen.
     585             :                  * Function should only be called with supported io types in bdev_rbd_submit_request
     586             :                  */
     587           0 :                 SPDK_ERRLOG("Unsupported IO type =%d\n", bdev_io->type);
     588           0 :                 ret = -ENOTSUP;
     589           0 :                 break;
     590             :         }
     591             : 
     592           0 :         if (ret < 0) {
     593           0 :                 rbd_aio_release(rbd_io->comp);
     594           0 :                 goto err;
     595             :         }
     596             : 
     597           0 :         return;
     598             : 
     599             : err:
     600           0 :         bdev_rbd_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
     601           0 : }
     602             : 
     603             : static void
     604           0 : bdev_rbd_start_aio(void *ctx)
     605             : {
     606           0 :         struct spdk_bdev_io *bdev_io = ctx;
     607           0 :         struct bdev_rbd *disk = (struct bdev_rbd *)bdev_io->bdev->ctxt;
     608             : 
     609           0 :         _bdev_rbd_start_aio(disk,
     610           0 :                             bdev_io,
     611           0 :                             bdev_io->u.bdev.iovs,
     612           0 :                             bdev_io->u.bdev.iovcnt,
     613           0 :                             bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen,
     614           0 :                             bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
     615           0 : }
     616             : 
     617             : static int bdev_rbd_library_init(void);
     618             : static void bdev_rbd_library_fini(void);
     619             : 
     620             : static int
     621           0 : bdev_rbd_get_ctx_size(void)
     622             : {
     623           0 :         return sizeof(struct bdev_rbd_io);
     624             : }
     625             : 
     626             : static struct spdk_bdev_module rbd_if = {
     627             :         .name = "rbd",
     628             :         .module_init = bdev_rbd_library_init,
     629             :         .module_fini = bdev_rbd_library_fini,
     630             :         .get_ctx_size = bdev_rbd_get_ctx_size,
     631             : 
     632             : };
     633           0 : SPDK_BDEV_MODULE_REGISTER(rbd, &rbd_if)
     634             : 
     635             : static int bdev_rbd_reset_timer(void *arg);
     636             : 
     637             : static void
     638           0 : bdev_rbd_check_outstanding_ios(struct spdk_bdev *bdev, uint64_t current_qd,
     639             :                                void *cb_arg, int rc)
     640             : {
     641           0 :         struct bdev_rbd *disk = cb_arg;
     642           0 :         enum spdk_bdev_io_status bio_status;
     643             : 
     644           0 :         if (rc == 0 && current_qd > 0) {
     645           0 :                 disk->reset_timer = SPDK_POLLER_REGISTER(bdev_rbd_reset_timer, disk, 1000);
     646           0 :                 return;
     647             :         }
     648             : 
     649           0 :         if (rc != 0) {
     650           0 :                 bio_status = SPDK_BDEV_IO_STATUS_FAILED;
     651           0 :         } else {
     652           0 :                 bio_status = SPDK_BDEV_IO_STATUS_SUCCESS;
     653             :         }
     654             : 
     655           0 :         bdev_rbd_io_complete(disk->reset_bdev_io, bio_status);
     656           0 :         disk->reset_bdev_io = NULL;
     657           0 : }
     658             : 
     659             : static int
     660           0 : bdev_rbd_reset_timer(void *arg)
     661             : {
     662           0 :         struct bdev_rbd *disk = arg;
     663             : 
     664           0 :         spdk_poller_unregister(&disk->reset_timer);
     665             : 
     666           0 :         spdk_bdev_get_current_qd(&disk->disk, bdev_rbd_check_outstanding_ios, disk);
     667             : 
     668           0 :         return SPDK_POLLER_BUSY;
     669           0 : }
     670             : 
     671             : static void
     672           0 : bdev_rbd_reset(void *ctx)
     673             : {
     674           0 :         struct spdk_bdev_io *bdev_io = ctx;
     675           0 :         struct bdev_rbd *disk = (struct bdev_rbd *)bdev_io->bdev->ctxt;
     676             : 
     677             :         /*
     678             :          * HACK: Since librbd doesn't provide any way to cancel outstanding aio, just kick off a
     679             :          * poller to wait for in-flight I/O to complete.
     680             :          */
     681           0 :         assert(disk->reset_bdev_io == NULL);
     682           0 :         disk->reset_bdev_io = bdev_io;
     683             : 
     684           0 :         bdev_rbd_reset_timer(disk);
     685           0 : }
     686             : 
     687             : static void
     688           0 : _bdev_rbd_destruct_done(void *io_device)
     689             : {
     690           0 :         struct bdev_rbd *rbd = io_device;
     691             : 
     692           0 :         assert(rbd != NULL);
     693             : 
     694           0 :         spdk_bdev_destruct_done(&rbd->disk, 0);
     695           0 :         bdev_rbd_free(rbd);
     696           0 : }
     697             : 
     698             : static void
     699           0 : bdev_rbd_free_cb(void *io_device)
     700             : {
     701           0 :         struct bdev_rbd *rbd = io_device;
     702             : 
     703           0 :         assert(spdk_get_thread() == spdk_thread_get_app_thread());
     704             : 
     705             :         /* free the ctx */
     706           0 :         if (rbd->cluster_name && rbd->rados_ctx.ctx) {
     707           0 :                 bdev_rbd_put_pool_ctx(rbd->rados_ctx.ctx);
     708           0 :                 rbd->rados_ctx.ctx = NULL;
     709           0 :         }
     710             : 
     711             :         /* The io device has been unregistered.  Send a message back to the
     712             :          * original thread that started the destruct operation, so that the
     713             :          * bdev unregister callback is invoked on the same thread that started
     714             :          * this whole process.
     715             :          */
     716           0 :         spdk_thread_send_msg(rbd->destruct_td, _bdev_rbd_destruct_done, rbd);
     717           0 : }
     718             : 
     719             : static void
     720           0 : _bdev_rbd_destruct(void *ctx)
     721             : {
     722           0 :         struct bdev_rbd *rbd = ctx;
     723             : 
     724           0 :         spdk_io_device_unregister(rbd, bdev_rbd_free_cb);
     725           0 : }
     726             : 
     727             : static int
     728           0 : bdev_rbd_destruct(void *ctx)
     729             : {
     730           0 :         struct bdev_rbd *rbd = ctx;
     731             : 
     732             :         /* Start the destruct operation on the rbd bdev's
     733             :          * main thread.  This guarantees it will only start
     734             :          * executing after any messages related to channel
     735             :          * deletions have finished completing.  *Always*
     736             :          * send a message, even if this function gets called
     737             :          * from the main thread, in case there are pending
     738             :          * channel delete messages in flight to this thread.
     739             :          */
     740           0 :         assert(rbd->destruct_td == NULL);
     741           0 :         rbd->destruct_td = spdk_get_thread();
     742           0 :         spdk_thread_send_msg(spdk_thread_get_app_thread(), _bdev_rbd_destruct, rbd);
     743             : 
     744             :         /* Return 1 to indicate the destruct path is asynchronous. */
     745           0 :         return 1;
     746           0 : }
     747             : 
     748             : static void
     749           0 : bdev_rbd_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
     750             :                     bool success)
     751             : {
     752           0 :         if (!success) {
     753           0 :                 bdev_rbd_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
     754           0 :                 return;
     755             :         }
     756             : 
     757           0 :         bdev_rbd_start_aio(bdev_io);
     758           0 : }
     759             : 
     760             : static void
     761           0 : bdev_rbd_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
     762             : {
     763           0 :         struct spdk_thread *submit_td = spdk_io_channel_get_thread(ch);
     764           0 :         struct bdev_rbd_io *rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx;
     765             : 
     766           0 :         rbd_io->submit_td = submit_td;
     767           0 :         switch (bdev_io->type) {
     768             :         case SPDK_BDEV_IO_TYPE_READ:
     769           0 :                 spdk_bdev_io_get_buf(bdev_io, bdev_rbd_get_buf_cb,
     770           0 :                                      bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
     771           0 :                 break;
     772             : 
     773             :         case SPDK_BDEV_IO_TYPE_WRITE:
     774             :         case SPDK_BDEV_IO_TYPE_UNMAP:
     775             :         case SPDK_BDEV_IO_TYPE_FLUSH:
     776             :         case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
     777             : #ifdef LIBRBD_SUPPORTS_COMPARE_AND_WRITE_IOVEC
     778             :         case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
     779             : #endif
     780           0 :                 bdev_rbd_start_aio(bdev_io);
     781           0 :                 break;
     782             : 
     783             :         case SPDK_BDEV_IO_TYPE_RESET:
     784           0 :                 spdk_thread_exec_msg(spdk_thread_get_app_thread(), bdev_rbd_reset, bdev_io);
     785           0 :                 break;
     786             : 
     787             :         default:
     788           0 :                 SPDK_ERRLOG("Unsupported IO type =%d\n", bdev_io->type);
     789           0 :                 bdev_rbd_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
     790           0 :                 break;
     791             :         }
     792           0 : }
     793             : 
     794             : static bool
     795           0 : bdev_rbd_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
     796             : {
     797           0 :         switch (io_type) {
     798             :         case SPDK_BDEV_IO_TYPE_READ:
     799             :         case SPDK_BDEV_IO_TYPE_WRITE:
     800             :         case SPDK_BDEV_IO_TYPE_UNMAP:
     801             :         case SPDK_BDEV_IO_TYPE_FLUSH:
     802             :         case SPDK_BDEV_IO_TYPE_RESET:
     803             :         case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
     804             : #ifdef LIBRBD_SUPPORTS_COMPARE_AND_WRITE_IOVEC
     805             :         case SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE:
     806             : #endif
     807           0 :                 return true;
     808             : 
     809             :         default:
     810           0 :                 return false;
     811             :         }
     812           0 : }
     813             : 
     814             : static int
     815           0 : bdev_rbd_create_cb(void *io_device, void *ctx_buf)
     816             : {
     817           0 :         struct bdev_rbd_io_channel *ch = ctx_buf;
     818           0 :         struct bdev_rbd *disk = io_device;
     819             : 
     820           0 :         ch->disk = disk;
     821           0 :         ch->group_ch = spdk_get_io_channel(&rbd_if);
     822           0 :         assert(ch->group_ch != NULL);
     823             : 
     824           0 :         return 0;
     825           0 : }
     826             : 
     827             : static void
     828           0 : bdev_rbd_destroy_cb(void *io_device, void *ctx_buf)
     829             : {
     830           0 :         struct bdev_rbd_io_channel *ch = ctx_buf;
     831             : 
     832           0 :         spdk_put_io_channel(ch->group_ch);
     833           0 : }
     834             : 
     835             : static struct spdk_io_channel *
     836           0 : bdev_rbd_get_io_channel(void *ctx)
     837             : {
     838           0 :         struct bdev_rbd *rbd_bdev = ctx;
     839             : 
     840           0 :         return spdk_get_io_channel(rbd_bdev);
     841           0 : }
     842             : 
     843             : static void
     844           0 : bdev_rbd_cluster_dump_entry(const char *cluster_name, struct spdk_json_write_ctx *w)
     845             : {
     846           0 :         struct bdev_rbd_cluster *entry;
     847             : 
     848           0 :         pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
     849           0 :         STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
     850           0 :                 if (strcmp(cluster_name, entry->name)) {
     851           0 :                         continue;
     852             :                 }
     853           0 :                 if (entry->user_id) {
     854           0 :                         spdk_json_write_named_string(w, "user_id", entry->user_id);
     855           0 :                 }
     856             : 
     857           0 :                 if (entry->config_param) {
     858           0 :                         char **config_entry = entry->config_param;
     859             : 
     860           0 :                         spdk_json_write_named_object_begin(w, "config_param");
     861           0 :                         while (*config_entry) {
     862           0 :                                 spdk_json_write_named_string(w, config_entry[0], config_entry[1]);
     863           0 :                                 config_entry += 2;
     864             :                         }
     865           0 :                         spdk_json_write_object_end(w);
     866           0 :                 }
     867           0 :                 if (entry->config_file) {
     868           0 :                         spdk_json_write_named_string(w, "config_file", entry->config_file);
     869           0 :                 }
     870           0 :                 if (entry->key_file) {
     871           0 :                         spdk_json_write_named_string(w, "key_file", entry->key_file);
     872           0 :                 }
     873             : 
     874           0 :                 pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
     875           0 :                 return;
     876             :         }
     877             : 
     878           0 :         pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
     879           0 : }
     880             : 
     881             : static int
     882           0 : bdev_rbd_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
     883             : {
     884           0 :         struct bdev_rbd *rbd_bdev = ctx;
     885             : 
     886           0 :         spdk_json_write_named_object_begin(w, "rbd");
     887             : 
     888           0 :         spdk_json_write_named_string(w, "pool_name", rbd_bdev->pool_name);
     889             : 
     890           0 :         spdk_json_write_named_string(w, "rbd_name", rbd_bdev->rbd_name);
     891             : 
     892           0 :         if (rbd_bdev->cluster_name) {
     893           0 :                 bdev_rbd_cluster_dump_entry(rbd_bdev->cluster_name, w);
     894           0 :                 goto end;
     895             :         }
     896             : 
     897           0 :         if (rbd_bdev->user_id) {
     898           0 :                 spdk_json_write_named_string(w, "user_id", rbd_bdev->user_id);
     899           0 :         }
     900             : 
     901           0 :         if (rbd_bdev->config) {
     902           0 :                 char **entry = rbd_bdev->config;
     903             : 
     904           0 :                 spdk_json_write_named_object_begin(w, "config");
     905           0 :                 while (*entry) {
     906           0 :                         spdk_json_write_named_string(w, entry[0], entry[1]);
     907           0 :                         entry += 2;
     908             :                 }
     909           0 :                 spdk_json_write_object_end(w);
     910           0 :         }
     911             : 
     912             : end:
     913           0 :         spdk_json_write_object_end(w);
     914             : 
     915           0 :         return 0;
     916           0 : }
     917             : 
     918             : static void
     919           0 : bdev_rbd_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
     920             : {
     921           0 :         struct bdev_rbd *rbd = bdev->ctxt;
     922             : 
     923           0 :         spdk_json_write_object_begin(w);
     924             : 
     925           0 :         spdk_json_write_named_string(w, "method", "bdev_rbd_create");
     926             : 
     927           0 :         spdk_json_write_named_object_begin(w, "params");
     928           0 :         spdk_json_write_named_string(w, "name", bdev->name);
     929           0 :         spdk_json_write_named_string(w, "pool_name", rbd->pool_name);
     930           0 :         spdk_json_write_named_string(w, "rbd_name", rbd->rbd_name);
     931           0 :         spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
     932           0 :         if (rbd->user_id) {
     933           0 :                 spdk_json_write_named_string(w, "user_id", rbd->user_id);
     934           0 :         }
     935             : 
     936           0 :         if (rbd->config) {
     937           0 :                 char **entry = rbd->config;
     938             : 
     939           0 :                 spdk_json_write_named_object_begin(w, "config");
     940           0 :                 while (*entry) {
     941           0 :                         spdk_json_write_named_string(w, entry[0], entry[1]);
     942           0 :                         entry += 2;
     943             :                 }
     944           0 :                 spdk_json_write_object_end(w);
     945           0 :         }
     946             : 
     947           0 :         spdk_json_write_named_uuid(w, "uuid", &bdev->uuid);
     948             : 
     949           0 :         spdk_json_write_object_end(w);
     950             : 
     951           0 :         spdk_json_write_object_end(w);
     952           0 : }
     953             : 
     954             : static void
     955           0 : dump_single_cluster_entry(struct bdev_rbd_cluster *entry, struct spdk_json_write_ctx *w)
     956             : {
     957           0 :         assert(entry != NULL);
     958             : 
     959           0 :         spdk_json_write_object_begin(w);
     960           0 :         spdk_json_write_named_string(w, "cluster_name", entry->name);
     961             : 
     962           0 :         if (entry->user_id) {
     963           0 :                 spdk_json_write_named_string(w, "user_id", entry->user_id);
     964           0 :         }
     965             : 
     966           0 :         if (entry->config_param) {
     967           0 :                 char **config_entry = entry->config_param;
     968             : 
     969           0 :                 spdk_json_write_named_object_begin(w, "config_param");
     970           0 :                 while (*config_entry) {
     971           0 :                         spdk_json_write_named_string(w, config_entry[0], config_entry[1]);
     972           0 :                         config_entry += 2;
     973             :                 }
     974           0 :                 spdk_json_write_object_end(w);
     975           0 :         }
     976           0 :         if (entry->config_file) {
     977           0 :                 spdk_json_write_named_string(w, "config_file", entry->config_file);
     978           0 :         }
     979           0 :         if (entry->key_file) {
     980           0 :                 spdk_json_write_named_string(w, "key_file", entry->key_file);
     981           0 :         }
     982             : 
     983           0 :         if (entry->core_mask) {
     984           0 :                 spdk_json_write_named_string(w, "core_mask", entry->core_mask);
     985           0 :         }
     986             : 
     987           0 :         spdk_json_write_object_end(w);
     988           0 : }
     989             : 
     990             : int
     991           0 : bdev_rbd_get_clusters_info(struct spdk_jsonrpc_request *request, const char *name)
     992             : {
     993           0 :         struct bdev_rbd_cluster *entry;
     994           0 :         struct spdk_json_write_ctx *w;
     995             : 
     996           0 :         pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
     997             : 
     998           0 :         if (STAILQ_EMPTY(&g_map_bdev_rbd_cluster)) {
     999           0 :                 pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
    1000           0 :                 return -ENOENT;
    1001             :         }
    1002             : 
    1003             :         /* If cluster name is provided */
    1004           0 :         if (name) {
    1005           0 :                 STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
    1006           0 :                         if (strcmp(name, entry->name) == 0) {
    1007           0 :                                 w = spdk_jsonrpc_begin_result(request);
    1008           0 :                                 dump_single_cluster_entry(entry, w);
    1009           0 :                                 spdk_jsonrpc_end_result(request, w);
    1010             : 
    1011           0 :                                 pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
    1012           0 :                                 return 0;
    1013             :                         }
    1014           0 :                 }
    1015             : 
    1016           0 :                 pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
    1017           0 :                 return -ENOENT;
    1018             :         }
    1019             : 
    1020           0 :         w = spdk_jsonrpc_begin_result(request);
    1021           0 :         spdk_json_write_array_begin(w);
    1022           0 :         STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
    1023           0 :                 dump_single_cluster_entry(entry, w);
    1024           0 :         }
    1025           0 :         spdk_json_write_array_end(w);
    1026           0 :         spdk_jsonrpc_end_result(request, w);
    1027           0 :         pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
    1028             : 
    1029           0 :         return 0;
    1030           0 : }
    1031             : 
    1032             : static const struct spdk_bdev_fn_table rbd_fn_table = {
    1033             :         .destruct               = bdev_rbd_destruct,
    1034             :         .submit_request         = bdev_rbd_submit_request,
    1035             :         .io_type_supported      = bdev_rbd_io_type_supported,
    1036             :         .get_io_channel         = bdev_rbd_get_io_channel,
    1037             :         .dump_info_json         = bdev_rbd_dump_info_json,
    1038             :         .write_config_json      = bdev_rbd_write_config_json,
    1039             : };
    1040             : 
    1041             : static int
    1042           0 : rbd_thread_set_cpumask(struct spdk_cpuset *set)
    1043             : {
    1044             : #ifdef __linux__
    1045           0 :         uint32_t lcore;
    1046           0 :         cpu_set_t mask;
    1047             : 
    1048           0 :         assert(set != NULL);
    1049           0 :         CPU_ZERO(&mask);
    1050             : 
    1051             :         /* get the core id on current spdk_cpuset and set to cpu_set_t */
    1052           0 :         for (lcore = 0; lcore < SPDK_CPUSET_SIZE; lcore++) {
    1053           0 :                 if (spdk_cpuset_get_cpu(set, lcore)) {
    1054           0 :                         CPU_SET(lcore, &mask);
    1055           0 :                 }
    1056           0 :         }
    1057             : 
    1058             :         /* change current thread core mask */
    1059           0 :         if (sched_setaffinity(0, sizeof(mask), &mask) < 0) {
    1060           0 :                 SPDK_ERRLOG("Set non SPDK thread cpu mask error (errno=%d)\n", errno);
    1061           0 :                 return -1;
    1062             :         }
    1063             : 
    1064           0 :         return 0;
    1065             : #else
    1066             :         SPDK_ERRLOG("SPDK non spdk thread cpumask setup supports only Linux platform now.\n");
    1067             :         return -ENOTSUP;
    1068             : #endif
    1069           0 : }
    1070             : 
    1071             : 
    1072             : static int
    1073           0 : rbd_register_cluster(const char *name, const char *user_id, const char *const *config_param,
    1074             :                      const char *config_file, const char *key_file, const char *core_mask)
    1075             : {
    1076           0 :         struct bdev_rbd_cluster *entry;
    1077           0 :         struct spdk_cpuset rbd_core_mask = {};
    1078           0 :         int rc;
    1079             : 
    1080           0 :         pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
    1081           0 :         STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
    1082           0 :                 if (strcmp(name, entry->name) == 0) {
    1083           0 :                         SPDK_ERRLOG("Cluster name=%s already exists\n", name);
    1084           0 :                         pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
    1085           0 :                         return -1;
    1086             :                 }
    1087           0 :         }
    1088             : 
    1089           0 :         entry = calloc(1, sizeof(*entry));
    1090           0 :         if (!entry) {
    1091           0 :                 SPDK_ERRLOG("Cannot allocate an entry for name=%s\n", name);
    1092           0 :                 pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
    1093           0 :                 return -1;
    1094             :         }
    1095             : 
    1096           0 :         entry->name = strdup(name);
    1097           0 :         if (entry->name == NULL) {
    1098           0 :                 SPDK_ERRLOG("Failed to save the name =%s on entry =%p\n", name, entry);
    1099           0 :                 goto err_handle;
    1100             :         }
    1101             : 
    1102           0 :         if (user_id) {
    1103           0 :                 entry->user_id = strdup(user_id);
    1104           0 :                 if (entry->user_id == NULL) {
    1105           0 :                         SPDK_ERRLOG("Failed to save the str =%s on entry =%p\n", user_id, entry);
    1106           0 :                         goto err_handle;
    1107             :                 }
    1108           0 :         }
    1109             : 
    1110             :         /* Support specify config_param or config_file separately, or both of them. */
    1111           0 :         if (config_param) {
    1112           0 :                 entry->config_param = bdev_rbd_dup_config(config_param);
    1113           0 :                 if (entry->config_param == NULL) {
    1114           0 :                         SPDK_ERRLOG("Failed to save the config_param=%p on entry = %p\n", config_param, entry);
    1115           0 :                         goto err_handle;
    1116             :                 }
    1117           0 :         }
    1118             : 
    1119           0 :         if (config_file) {
    1120           0 :                 entry->config_file = strdup(config_file);
    1121           0 :                 if (entry->config_file == NULL) {
    1122           0 :                         SPDK_ERRLOG("Failed to save the config_file=%s on entry = %p\n", config_file, entry);
    1123           0 :                         goto err_handle;
    1124             :                 }
    1125           0 :         }
    1126             : 
    1127           0 :         if (key_file) {
    1128           0 :                 entry->key_file = strdup(key_file);
    1129           0 :                 if (entry->key_file == NULL) {
    1130           0 :                         SPDK_ERRLOG("Failed to save the key_file=%s on entry = %p\n", key_file, entry);
    1131           0 :                         goto err_handle;
    1132             :                 }
    1133           0 :         }
    1134             : 
    1135           0 :         if (core_mask) {
    1136           0 :                 entry->core_mask = strdup(core_mask);
    1137           0 :                 if (entry->core_mask == NULL) {
    1138           0 :                         SPDK_ERRLOG("Core_mask=%s allocation failed on entry = %p\n", core_mask, entry);
    1139           0 :                         goto err_handle;
    1140             :                 }
    1141             : 
    1142           0 :                 if (spdk_cpuset_parse(&rbd_core_mask, entry->core_mask) < 0) {
    1143           0 :                         SPDK_ERRLOG("Invalid cpumask=%s on entry = %p\n", entry->core_mask, entry);
    1144           0 :                         goto err_handle;
    1145             :                 }
    1146             : 
    1147           0 :                 if (rbd_thread_set_cpumask(&rbd_core_mask) < 0) {
    1148           0 :                         SPDK_ERRLOG("Failed to change rbd threads to core_mask %s on entry = %p\n", core_mask, entry);
    1149           0 :                         goto err_handle;
    1150             :                 }
    1151           0 :         }
    1152             : 
    1153             : 
    1154             :         /* If rbd thread core mask is given, rados_create() must execute with
    1155             :          * the affinity set by rbd_thread_set_cpumask(). The affinity set
    1156             :          * by rbd_thread_set_cpumask() will be reverted once rbd_register_cluster() returns
    1157             :          * and when we leave the spdk_call_unaffinitized context. */
    1158           0 :         rc = rados_create(&entry->cluster, user_id);
    1159           0 :         if (rc < 0) {
    1160           0 :                 SPDK_ERRLOG("Failed to create rados_t struct\n");
    1161           0 :                 goto err_handle;
    1162             :         }
    1163             : 
    1164             :         /* Try default location when entry->config_file is NULL, but ignore failure when it is NULL */
    1165           0 :         rc = rados_conf_read_file(entry->cluster, entry->config_file);
    1166           0 :         if (entry->config_file && rc < 0) {
    1167           0 :                 SPDK_ERRLOG("Failed to read conf file %s\n", entry->config_file);
    1168           0 :                 rados_shutdown(entry->cluster);
    1169           0 :                 goto err_handle;
    1170             :         }
    1171             : 
    1172           0 :         if (config_param) {
    1173           0 :                 const char *const *config_entry = config_param;
    1174           0 :                 while (*config_entry) {
    1175           0 :                         rc = rados_conf_set(entry->cluster, config_entry[0], config_entry[1]);
    1176           0 :                         if (rc < 0) {
    1177           0 :                                 SPDK_ERRLOG("Failed to set %s = %s\n", config_entry[0], config_entry[1]);
    1178           0 :                                 rados_shutdown(entry->cluster);
    1179           0 :                                 goto err_handle;
    1180             :                         }
    1181           0 :                         config_entry += 2;
    1182             :                 }
    1183           0 :         }
    1184             : 
    1185           0 :         if (key_file) {
    1186           0 :                 rc = rados_conf_set(entry->cluster, "keyring", key_file);
    1187           0 :                 if (rc < 0) {
    1188           0 :                         SPDK_ERRLOG("Failed to set keyring = %s\n", key_file);
    1189           0 :                         rados_shutdown(entry->cluster);
    1190           0 :                         goto err_handle;
    1191             :                 }
    1192           0 :         }
    1193             : 
    1194           0 :         rc = rados_connect(entry->cluster);
    1195           0 :         if (rc < 0) {
    1196           0 :                 SPDK_ERRLOG("Failed to connect to rbd_pool on cluster=%p\n", entry->cluster);
    1197           0 :                 rados_shutdown(entry->cluster);
    1198           0 :                 goto err_handle;
    1199             :         }
    1200             : 
    1201           0 :         STAILQ_INSERT_TAIL(&g_map_bdev_rbd_cluster, entry, link);
    1202           0 :         pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
    1203             : 
    1204           0 :         return 0;
    1205             : 
    1206             : err_handle:
    1207           0 :         bdev_rbd_cluster_free(entry);
    1208           0 :         pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
    1209           0 :         return -1;
    1210           0 : }
    1211             : 
    1212             : int
    1213           0 : bdev_rbd_unregister_cluster(const char *name)
    1214             : {
    1215           0 :         struct bdev_rbd_cluster *entry;
    1216           0 :         int rc = 0;
    1217             : 
    1218           0 :         if (name == NULL) {
    1219           0 :                 return -1;
    1220             :         }
    1221             : 
    1222           0 :         pthread_mutex_lock(&g_map_bdev_rbd_cluster_mutex);
    1223           0 :         STAILQ_FOREACH(entry, &g_map_bdev_rbd_cluster, link) {
    1224           0 :                 if (strcmp(name, entry->name) == 0) {
    1225           0 :                         if (entry->ref == 0) {
    1226           0 :                                 STAILQ_REMOVE(&g_map_bdev_rbd_cluster, entry, bdev_rbd_cluster, link);
    1227           0 :                                 rados_shutdown(entry->cluster);
    1228           0 :                                 bdev_rbd_cluster_free(entry);
    1229           0 :                         } else {
    1230           0 :                                 SPDK_ERRLOG("Cluster with name=%p is still used and we cannot delete it\n",
    1231             :                                             entry->name);
    1232           0 :                                 rc = -1;
    1233             :                         }
    1234             : 
    1235           0 :                         pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
    1236           0 :                         return rc;
    1237             :                 }
    1238           0 :         }
    1239             : 
    1240           0 :         pthread_mutex_unlock(&g_map_bdev_rbd_cluster_mutex);
    1241             : 
    1242           0 :         SPDK_ERRLOG("Could not find the cluster name =%p\n", name);
    1243             : 
    1244           0 :         return -1;
    1245           0 : }
    1246             : 
    1247             : static void *
    1248           0 : _bdev_rbd_register_cluster(void *arg)
    1249             : {
    1250           0 :         struct cluster_register_info *info = arg;
    1251           0 :         void *ret = arg;
    1252           0 :         int rc;
    1253             : 
    1254           0 :         rc = rbd_register_cluster((const char *)info->name, (const char *)info->user_id,
    1255           0 :                                   (const char *const *)info->config_param, (const char *)info->config_file,
    1256           0 :                                   (const char *)info->key_file, info->core_mask);
    1257           0 :         if (rc) {
    1258           0 :                 ret = NULL;
    1259           0 :         }
    1260             : 
    1261           0 :         return ret;
    1262           0 : }
    1263             : 
    1264             : int
    1265           0 : bdev_rbd_register_cluster(struct cluster_register_info *info)
    1266             : {
    1267           0 :         assert(info != NULL);
    1268             : 
    1269             :         /* Rados cluster info need to be created in non SPDK-thread to avoid CPU
    1270             :          * resource contention */
    1271           0 :         if (spdk_call_unaffinitized(_bdev_rbd_register_cluster, info) == NULL) {
    1272           0 :                 return -1;
    1273             :         }
    1274             : 
    1275           0 :         return 0;
    1276           0 : }
    1277             : 
    1278             : int
    1279           0 : bdev_rbd_create(struct spdk_bdev **bdev, const char *name, const char *user_id,
    1280             :                 const char *pool_name,
    1281             :                 const char *const *config,
    1282             :                 const char *rbd_name,
    1283             :                 uint32_t block_size,
    1284             :                 const char *cluster_name,
    1285             :                 const struct spdk_uuid *uuid)
    1286             : {
    1287           0 :         struct bdev_rbd *rbd;
    1288           0 :         int ret;
    1289             : 
    1290           0 :         if ((pool_name == NULL) || (rbd_name == NULL) || (block_size == 0)) {
    1291           0 :                 return -EINVAL;
    1292             :         }
    1293             : 
    1294           0 :         rbd = calloc(1, sizeof(struct bdev_rbd));
    1295           0 :         if (rbd == NULL) {
    1296           0 :                 SPDK_ERRLOG("Failed to allocate bdev_rbd struct\n");
    1297           0 :                 return -ENOMEM;
    1298             :         }
    1299             : 
    1300           0 :         rbd->rbd_name = strdup(rbd_name);
    1301           0 :         if (!rbd->rbd_name) {
    1302           0 :                 bdev_rbd_free(rbd);
    1303           0 :                 return -ENOMEM;
    1304             :         }
    1305             : 
    1306           0 :         if (user_id) {
    1307           0 :                 rbd->user_id = strdup(user_id);
    1308           0 :                 if (!rbd->user_id) {
    1309           0 :                         bdev_rbd_free(rbd);
    1310           0 :                         return -ENOMEM;
    1311             :                 }
    1312           0 :         }
    1313             : 
    1314           0 :         if (cluster_name) {
    1315           0 :                 rbd->cluster_name = strdup(cluster_name);
    1316           0 :                 if (!rbd->cluster_name) {
    1317           0 :                         bdev_rbd_free(rbd);
    1318           0 :                         return -ENOMEM;
    1319             :                 }
    1320           0 :         }
    1321           0 :         rbd->pool_name = strdup(pool_name);
    1322           0 :         if (!rbd->pool_name) {
    1323           0 :                 bdev_rbd_free(rbd);
    1324           0 :                 return -ENOMEM;
    1325             :         }
    1326             : 
    1327           0 :         if (config && !(rbd->config = bdev_rbd_dup_config(config))) {
    1328           0 :                 bdev_rbd_free(rbd);
    1329           0 :                 return -ENOMEM;
    1330             :         }
    1331             : 
    1332           0 :         ret = bdev_rbd_init(rbd);
    1333           0 :         if (ret < 0) {
    1334           0 :                 bdev_rbd_free(rbd);
    1335           0 :                 SPDK_ERRLOG("Failed to init rbd device\n");
    1336           0 :                 return ret;
    1337             :         }
    1338             : 
    1339           0 :         rbd->disk.uuid = *uuid;
    1340           0 :         if (name) {
    1341           0 :                 rbd->disk.name = strdup(name);
    1342           0 :         } else {
    1343           0 :                 rbd->disk.name = spdk_sprintf_alloc("Ceph%d", bdev_rbd_count);
    1344             :         }
    1345           0 :         if (!rbd->disk.name) {
    1346           0 :                 bdev_rbd_free(rbd);
    1347           0 :                 return -ENOMEM;
    1348             :         }
    1349           0 :         rbd->disk.product_name = "Ceph Rbd Disk";
    1350           0 :         bdev_rbd_count++;
    1351             : 
    1352           0 :         rbd->disk.write_cache = 0;
    1353           0 :         rbd->disk.blocklen = block_size;
    1354           0 :         rbd->disk.blockcnt = rbd->info.size / rbd->disk.blocklen;
    1355           0 :         rbd->disk.ctxt = rbd;
    1356           0 :         rbd->disk.fn_table = &rbd_fn_table;
    1357           0 :         rbd->disk.module = &rbd_if;
    1358             : 
    1359           0 :         SPDK_NOTICELOG("Add %s rbd disk to lun\n", rbd->disk.name);
    1360             : 
    1361           0 :         spdk_io_device_register(rbd, bdev_rbd_create_cb,
    1362             :                                 bdev_rbd_destroy_cb,
    1363             :                                 sizeof(struct bdev_rbd_io_channel),
    1364           0 :                                 rbd_name);
    1365           0 :         ret = spdk_bdev_register(&rbd->disk);
    1366           0 :         if (ret) {
    1367           0 :                 spdk_io_device_unregister(rbd, NULL);
    1368           0 :                 bdev_rbd_free(rbd);
    1369           0 :                 return ret;
    1370             :         }
    1371             : 
    1372           0 :         *bdev = &(rbd->disk);
    1373             : 
    1374           0 :         return ret;
    1375           0 : }
    1376             : 
    1377             : void
    1378           0 : bdev_rbd_delete(const char *name, spdk_delete_rbd_complete cb_fn, void *cb_arg)
    1379             : {
    1380           0 :         int rc;
    1381             : 
    1382           0 :         rc = spdk_bdev_unregister_by_name(name, &rbd_if, cb_fn, cb_arg);
    1383           0 :         if (rc != 0) {
    1384           0 :                 cb_fn(cb_arg, rc);
    1385           0 :         }
    1386           0 : }
    1387             : 
    1388             : static void
    1389           0 : dummy_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
    1390             : {
    1391           0 : }
    1392             : 
    1393             : int
    1394           0 : bdev_rbd_resize(const char *name, const uint64_t new_size_in_mb)
    1395             : {
    1396           0 :         struct spdk_bdev_desc *desc;
    1397           0 :         struct spdk_bdev *bdev;
    1398           0 :         struct bdev_rbd *rbd;
    1399           0 :         int rc = 0;
    1400           0 :         uint64_t new_size_in_byte;
    1401           0 :         uint64_t current_size_in_mb;
    1402             : 
    1403           0 :         rc = spdk_bdev_open_ext(name, false, dummy_bdev_event_cb, NULL, &desc);
    1404           0 :         if (rc != 0) {
    1405           0 :                 return rc;
    1406             :         }
    1407             : 
    1408           0 :         bdev = spdk_bdev_desc_get_bdev(desc);
    1409             : 
    1410           0 :         if (bdev->module != &rbd_if) {
    1411           0 :                 rc = -EINVAL;
    1412           0 :                 goto exit;
    1413             :         }
    1414             : 
    1415           0 :         current_size_in_mb = bdev->blocklen * bdev->blockcnt / (1024 * 1024);
    1416           0 :         if (current_size_in_mb > new_size_in_mb) {
    1417           0 :                 SPDK_ERRLOG("The new bdev size must be larger than current bdev size.\n");
    1418           0 :                 rc = -EINVAL;
    1419           0 :                 goto exit;
    1420             :         }
    1421             : 
    1422           0 :         rbd = SPDK_CONTAINEROF(bdev, struct bdev_rbd, disk);
    1423           0 :         new_size_in_byte = new_size_in_mb * 1024 * 1024;
    1424           0 :         rc = rbd_resize(rbd->image, new_size_in_byte);
    1425           0 :         if (rc != 0) {
    1426           0 :                 SPDK_ERRLOG("failed to resize the ceph bdev.\n");
    1427           0 :                 goto exit;
    1428             :         }
    1429             : 
    1430           0 :         rc = spdk_bdev_notify_blockcnt_change(bdev, new_size_in_byte / bdev->blocklen);
    1431           0 :         if (rc != 0) {
    1432           0 :                 SPDK_ERRLOG("failed to notify block cnt change.\n");
    1433           0 :         }
    1434             : 
    1435             : exit:
    1436           0 :         spdk_bdev_close(desc);
    1437           0 :         return rc;
    1438           0 : }
    1439             : 
    1440             : static int
    1441           0 : bdev_rbd_group_create_cb(void *io_device, void *ctx_buf)
    1442             : {
    1443           0 :         return 0;
    1444             : }
    1445             : 
    1446             : static void
    1447           0 : bdev_rbd_group_destroy_cb(void *io_device, void *ctx_buf)
    1448             : {
    1449           0 : }
    1450             : 
    1451             : static int
    1452           0 : bdev_rbd_library_init(void)
    1453             : {
    1454           0 :         spdk_io_device_register(&rbd_if, bdev_rbd_group_create_cb, bdev_rbd_group_destroy_cb,
    1455             :                                 0, "bdev_rbd_poll_groups");
    1456           0 :         return 0;
    1457             : }
    1458             : 
    1459             : static void
    1460           0 : bdev_rbd_library_fini(void)
    1461             : {
    1462           0 :         spdk_io_device_unregister(&rbd_if, NULL);
    1463           0 : }
    1464             : 
    1465           0 : SPDK_LOG_REGISTER_COMPONENT(bdev_rbd)

Generated by: LCOV version 1.15