Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2017 Intel Corporation.
3 : * All rights reserved.
4 : */
5 :
6 : #include "spdk/stdinc.h"
7 :
8 : #include "spdk/blobfs.h"
9 : #include "cache_tree.h"
10 :
11 : #include "spdk/queue.h"
12 : #include "spdk/thread.h"
13 : #include "spdk/assert.h"
14 : #include "spdk/env.h"
15 : #include "spdk/util.h"
16 : #include "spdk/log.h"
17 : #include "spdk/trace.h"
18 :
19 : #include "spdk_internal/trace_defs.h"
20 :
21 : #define BLOBFS_TRACE(file, str, args...) \
22 : SPDK_DEBUGLOG(blobfs, "file=%s " str, file->name, ##args)
23 :
24 : #define BLOBFS_TRACE_RW(file, str, args...) \
25 : SPDK_DEBUGLOG(blobfs_rw, "file=%s " str, file->name, ##args)
26 :
27 : #define BLOBFS_DEFAULT_CACHE_SIZE (4ULL * 1024 * 1024 * 1024)
28 : #define SPDK_BLOBFS_DEFAULT_OPTS_CLUSTER_SZ (1024 * 1024)
29 :
30 : #define SPDK_BLOBFS_SIGNATURE "BLOBFS"
31 :
32 : static uint64_t g_fs_cache_size = BLOBFS_DEFAULT_CACHE_SIZE;
33 : static struct spdk_mempool *g_cache_pool;
34 : static TAILQ_HEAD(, spdk_file) g_caches = TAILQ_HEAD_INITIALIZER(g_caches);
35 : static struct spdk_poller *g_cache_pool_mgmt_poller;
36 : static struct spdk_thread *g_cache_pool_thread;
37 : #define BLOBFS_CACHE_POOL_POLL_PERIOD_IN_US 1000ULL
38 : static int g_fs_count = 0;
39 : static pthread_mutex_t g_cache_init_lock = PTHREAD_MUTEX_INITIALIZER;
40 :
41 : static void
42 0 : blobfs_trace(void)
43 : {
44 0 : struct spdk_trace_tpoint_opts opts[] = {
45 : {
46 : "BLOBFS_XATTR_START", TRACE_BLOBFS_XATTR_START,
47 : OWNER_TYPE_NONE, OBJECT_NONE, 0,
48 : {{ "file", SPDK_TRACE_ARG_TYPE_STR, 40 }},
49 : },
50 : {
51 : "BLOBFS_XATTR_END", TRACE_BLOBFS_XATTR_END,
52 : OWNER_TYPE_NONE, OBJECT_NONE, 0,
53 : {{ "file", SPDK_TRACE_ARG_TYPE_STR, 40 }},
54 : },
55 : {
56 : "BLOBFS_OPEN", TRACE_BLOBFS_OPEN,
57 : OWNER_TYPE_NONE, OBJECT_NONE, 0,
58 : {{ "file", SPDK_TRACE_ARG_TYPE_STR, 40 }},
59 : },
60 : {
61 : "BLOBFS_CLOSE", TRACE_BLOBFS_CLOSE,
62 : OWNER_TYPE_NONE, OBJECT_NONE, 0,
63 : {{ "file", SPDK_TRACE_ARG_TYPE_STR, 40 }},
64 : },
65 : {
66 : "BLOBFS_DELETE_START", TRACE_BLOBFS_DELETE_START,
67 : OWNER_TYPE_NONE, OBJECT_NONE, 0,
68 : {{ "file", SPDK_TRACE_ARG_TYPE_STR, 40 }},
69 : },
70 : {
71 : "BLOBFS_DELETE_DONE", TRACE_BLOBFS_DELETE_DONE,
72 : OWNER_TYPE_NONE, OBJECT_NONE, 0,
73 : {{ "file", SPDK_TRACE_ARG_TYPE_STR, 40 }},
74 : }
75 : };
76 :
77 0 : spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
78 0 : }
79 2 : SPDK_TRACE_REGISTER_FN(blobfs_trace, "blobfs", TRACE_GROUP_BLOBFS)
80 :
81 : void
82 13 : cache_buffer_free(struct cache_buffer *cache_buffer)
83 : {
84 13 : spdk_mempool_put(g_cache_pool, cache_buffer->buf);
85 13 : free(cache_buffer);
86 13 : }
87 :
88 : #define CACHE_READAHEAD_THRESHOLD (128 * 1024)
89 :
90 : struct spdk_file {
91 : struct spdk_filesystem *fs;
92 : struct spdk_blob *blob;
93 : char *name;
94 : uint64_t length;
95 : bool is_deleted;
96 : bool open_for_writing;
97 : uint64_t length_flushed;
98 : uint64_t length_xattr;
99 : uint64_t append_pos;
100 : uint64_t seq_byte_count;
101 : uint64_t next_seq_offset;
102 : uint32_t priority;
103 : TAILQ_ENTRY(spdk_file) tailq;
104 : spdk_blob_id blobid;
105 : uint32_t ref_count;
106 : pthread_spinlock_t lock;
107 : struct cache_buffer *last;
108 : struct cache_tree *tree;
109 : TAILQ_HEAD(open_requests_head, spdk_fs_request) open_requests;
110 : TAILQ_HEAD(sync_requests_head, spdk_fs_request) sync_requests;
111 : TAILQ_ENTRY(spdk_file) cache_tailq;
112 : };
113 :
114 : struct spdk_deleted_file {
115 : spdk_blob_id id;
116 : TAILQ_ENTRY(spdk_deleted_file) tailq;
117 : };
118 :
119 : struct spdk_filesystem {
120 : struct spdk_blob_store *bs;
121 : TAILQ_HEAD(, spdk_file) files;
122 : struct spdk_bs_opts bs_opts;
123 : struct spdk_bs_dev *bdev;
124 : fs_send_request_fn send_request;
125 :
126 : struct {
127 : uint32_t max_ops;
128 : struct spdk_io_channel *sync_io_channel;
129 : struct spdk_fs_channel *sync_fs_channel;
130 : } sync_target;
131 :
132 : struct {
133 : uint32_t max_ops;
134 : struct spdk_io_channel *md_io_channel;
135 : struct spdk_fs_channel *md_fs_channel;
136 : } md_target;
137 :
138 : struct {
139 : uint32_t max_ops;
140 : } io_target;
141 : };
142 :
143 : struct spdk_fs_cb_args {
144 : union {
145 : spdk_fs_op_with_handle_complete fs_op_with_handle;
146 : spdk_fs_op_complete fs_op;
147 : spdk_file_op_with_handle_complete file_op_with_handle;
148 : spdk_file_op_complete file_op;
149 : spdk_file_stat_op_complete stat_op;
150 : } fn;
151 : void *arg;
152 : sem_t *sem;
153 : struct spdk_filesystem *fs;
154 : struct spdk_file *file;
155 : int rc;
156 : int *rwerrno;
157 : struct iovec *iovs;
158 : uint32_t iovcnt;
159 : struct iovec iov;
160 : union {
161 : struct {
162 : TAILQ_HEAD(, spdk_deleted_file) deleted_files;
163 : } fs_load;
164 : struct {
165 : uint64_t length;
166 : } truncate;
167 : struct {
168 : struct spdk_io_channel *channel;
169 : void *pin_buf;
170 : int is_read;
171 : off_t offset;
172 : size_t length;
173 : uint64_t start_lba;
174 : uint64_t num_lba;
175 : uint32_t blocklen;
176 : } rw;
177 : struct {
178 : const char *old_name;
179 : const char *new_name;
180 : } rename;
181 : struct {
182 : struct cache_buffer *cache_buffer;
183 : uint64_t length;
184 : } flush;
185 : struct {
186 : struct cache_buffer *cache_buffer;
187 : uint64_t length;
188 : uint64_t offset;
189 : } readahead;
190 : struct {
191 : /* offset of the file when the sync request was made */
192 : uint64_t offset;
193 : TAILQ_ENTRY(spdk_fs_request) tailq;
194 : bool xattr_in_progress;
195 : /* length written to the xattr for this file - this should
196 : * always be the same as the offset if only one thread is
197 : * writing to the file, but could differ if multiple threads
198 : * are appending
199 : */
200 : uint64_t length;
201 : } sync;
202 : struct {
203 : uint32_t num_clusters;
204 : } resize;
205 : struct {
206 : const char *name;
207 : uint32_t flags;
208 : TAILQ_ENTRY(spdk_fs_request) tailq;
209 : } open;
210 : struct {
211 : const char *name;
212 : struct spdk_blob *blob;
213 : } create;
214 : struct {
215 : const char *name;
216 : } delete;
217 : struct {
218 : const char *name;
219 : } stat;
220 : } op;
221 : };
222 :
223 : static void file_free(struct spdk_file *file);
224 : static void fs_io_device_unregister(struct spdk_filesystem *fs);
225 : static void fs_free_io_channels(struct spdk_filesystem *fs);
226 :
227 : void
228 0 : spdk_fs_opts_init(struct spdk_blobfs_opts *opts)
229 : {
230 0 : opts->cluster_sz = SPDK_BLOBFS_DEFAULT_OPTS_CLUSTER_SZ;
231 0 : }
232 :
233 : static int _blobfs_cache_pool_reclaim(void *arg);
234 :
235 : static bool
236 0 : blobfs_cache_pool_need_reclaim(void)
237 : {
238 0 : size_t count;
239 :
240 0 : count = spdk_mempool_count(g_cache_pool);
241 : /* We define a aggressive policy here as the requirements from db_bench are batched, so start the poller
242 : * when the number of available cache buffer is less than 1/5 of total buffers.
243 : */
244 0 : if (count > (size_t)g_fs_cache_size / CACHE_BUFFER_SIZE / 5) {
245 0 : return false;
246 : }
247 :
248 0 : return true;
249 0 : }
250 :
251 : static void
252 20 : __start_cache_pool_mgmt(void *ctx)
253 : {
254 20 : assert(g_cache_pool_mgmt_poller == NULL);
255 20 : g_cache_pool_mgmt_poller = SPDK_POLLER_REGISTER(_blobfs_cache_pool_reclaim, NULL,
256 : BLOBFS_CACHE_POOL_POLL_PERIOD_IN_US);
257 20 : }
258 :
259 : static void
260 20 : __stop_cache_pool_mgmt(void *ctx)
261 : {
262 20 : spdk_poller_unregister(&g_cache_pool_mgmt_poller);
263 :
264 20 : assert(g_cache_pool != NULL);
265 20 : assert(spdk_mempool_count(g_cache_pool) == g_fs_cache_size / CACHE_BUFFER_SIZE);
266 20 : spdk_mempool_free(g_cache_pool);
267 20 : g_cache_pool = NULL;
268 :
269 20 : spdk_thread_exit(g_cache_pool_thread);
270 20 : }
271 :
272 : static void
273 20 : allocate_cache_pool(void)
274 : {
275 20 : assert(g_cache_pool == NULL);
276 20 : g_cache_pool = spdk_mempool_create("spdk_fs_cache",
277 20 : g_fs_cache_size / CACHE_BUFFER_SIZE,
278 20 : CACHE_BUFFER_SIZE,
279 : SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
280 : SPDK_ENV_NUMA_ID_ANY);
281 20 : if (!g_cache_pool) {
282 0 : if (spdk_mempool_lookup("spdk_fs_cache") != NULL) {
283 0 : SPDK_ERRLOG("Unable to allocate mempool: already exists\n");
284 0 : SPDK_ERRLOG("Probably running in multiprocess environment, which is "
285 : "unsupported by the blobfs library\n");
286 0 : } else {
287 0 : SPDK_ERRLOG("Create mempool failed, you may "
288 : "increase the memory and try again\n");
289 : }
290 0 : assert(false);
291 : }
292 20 : }
293 :
294 : static void
295 20 : initialize_global_cache(void)
296 : {
297 20 : pthread_mutex_lock(&g_cache_init_lock);
298 20 : if (g_fs_count == 0) {
299 20 : allocate_cache_pool();
300 20 : g_cache_pool_thread = spdk_thread_create("cache_pool_mgmt", NULL);
301 20 : assert(g_cache_pool_thread != NULL);
302 20 : spdk_thread_send_msg(g_cache_pool_thread, __start_cache_pool_mgmt, NULL);
303 20 : }
304 20 : g_fs_count++;
305 20 : pthread_mutex_unlock(&g_cache_init_lock);
306 20 : }
307 :
308 : static void
309 20 : free_global_cache(void)
310 : {
311 20 : pthread_mutex_lock(&g_cache_init_lock);
312 20 : g_fs_count--;
313 20 : if (g_fs_count == 0) {
314 20 : spdk_thread_send_msg(g_cache_pool_thread, __stop_cache_pool_mgmt, NULL);
315 20 : }
316 20 : pthread_mutex_unlock(&g_cache_init_lock);
317 20 : }
318 :
319 : static uint64_t
320 21 : __file_get_blob_size(struct spdk_file *file)
321 : {
322 21 : uint64_t cluster_sz;
323 :
324 21 : cluster_sz = file->fs->bs_opts.cluster_sz;
325 21 : return cluster_sz * spdk_blob_get_num_clusters(file->blob);
326 21 : }
327 :
328 : struct spdk_fs_request {
329 : struct spdk_fs_cb_args args;
330 : TAILQ_ENTRY(spdk_fs_request) link;
331 : struct spdk_fs_channel *channel;
332 : };
333 :
334 : struct spdk_fs_channel {
335 : struct spdk_fs_request *req_mem;
336 : TAILQ_HEAD(, spdk_fs_request) reqs;
337 : sem_t sem;
338 : struct spdk_filesystem *fs;
339 : struct spdk_io_channel *bs_channel;
340 : fs_send_request_fn send_request;
341 : bool sync;
342 : uint32_t outstanding_reqs;
343 : pthread_spinlock_t lock;
344 : };
345 :
346 : /* For now, this is effectively an alias. But eventually we'll shift
347 : * some data members over. */
348 : struct spdk_fs_thread_ctx {
349 : struct spdk_fs_channel ch;
350 : };
351 :
352 : static struct spdk_fs_request *
353 156 : alloc_fs_request_with_iov(struct spdk_fs_channel *channel, uint32_t iovcnt)
354 : {
355 156 : struct spdk_fs_request *req;
356 156 : struct iovec *iovs = NULL;
357 :
358 156 : if (iovcnt > 1) {
359 4 : iovs = calloc(iovcnt, sizeof(struct iovec));
360 4 : if (!iovs) {
361 0 : return NULL;
362 : }
363 4 : }
364 :
365 156 : if (channel->sync) {
366 66 : pthread_spin_lock(&channel->lock);
367 66 : }
368 :
369 156 : req = TAILQ_FIRST(&channel->reqs);
370 156 : if (req) {
371 156 : channel->outstanding_reqs++;
372 156 : TAILQ_REMOVE(&channel->reqs, req, link);
373 156 : }
374 :
375 156 : if (channel->sync) {
376 66 : pthread_spin_unlock(&channel->lock);
377 66 : }
378 :
379 156 : if (req == NULL) {
380 0 : SPDK_ERRLOG("Cannot allocate req on spdk_fs_channel =%p\n", channel);
381 0 : free(iovs);
382 0 : return NULL;
383 : }
384 156 : memset(req, 0, sizeof(*req));
385 156 : req->channel = channel;
386 156 : if (iovcnt > 1) {
387 4 : req->args.iovs = iovs;
388 4 : } else {
389 152 : req->args.iovs = &req->args.iov;
390 : }
391 156 : req->args.iovcnt = iovcnt;
392 :
393 156 : return req;
394 156 : }
395 :
396 : static struct spdk_fs_request *
397 148 : alloc_fs_request(struct spdk_fs_channel *channel)
398 : {
399 148 : return alloc_fs_request_with_iov(channel, 0);
400 : }
401 :
402 : static void
403 156 : free_fs_request(struct spdk_fs_request *req)
404 : {
405 156 : struct spdk_fs_channel *channel = req->channel;
406 :
407 156 : if (req->args.iovcnt > 1) {
408 4 : free(req->args.iovs);
409 4 : }
410 :
411 156 : if (channel->sync) {
412 66 : pthread_spin_lock(&channel->lock);
413 66 : }
414 :
415 156 : TAILQ_INSERT_HEAD(&req->channel->reqs, req, link);
416 156 : channel->outstanding_reqs--;
417 :
418 156 : if (channel->sync) {
419 66 : pthread_spin_unlock(&channel->lock);
420 66 : }
421 156 : }
422 :
423 : static int
424 53 : fs_channel_create(struct spdk_filesystem *fs, struct spdk_fs_channel *channel,
425 : uint32_t max_ops)
426 : {
427 53 : uint32_t i;
428 :
429 53 : channel->req_mem = calloc(max_ops, sizeof(struct spdk_fs_request));
430 53 : if (!channel->req_mem) {
431 0 : return -1;
432 : }
433 :
434 53 : channel->outstanding_reqs = 0;
435 53 : TAILQ_INIT(&channel->reqs);
436 53 : sem_init(&channel->sem, 0, 0);
437 :
438 27189 : for (i = 0; i < max_ops; i++) {
439 27136 : TAILQ_INSERT_TAIL(&channel->reqs, &channel->req_mem[i], link);
440 27136 : }
441 :
442 53 : channel->fs = fs;
443 :
444 53 : return 0;
445 53 : }
446 :
447 : static int
448 20 : fs_md_channel_create(void *io_device, void *ctx_buf)
449 : {
450 20 : struct spdk_filesystem *fs;
451 20 : struct spdk_fs_channel *channel = ctx_buf;
452 :
453 20 : fs = SPDK_CONTAINEROF(io_device, struct spdk_filesystem, md_target);
454 :
455 20 : return fs_channel_create(fs, channel, fs->md_target.max_ops);
456 20 : }
457 :
458 : static int
459 20 : fs_sync_channel_create(void *io_device, void *ctx_buf)
460 : {
461 20 : struct spdk_filesystem *fs;
462 20 : struct spdk_fs_channel *channel = ctx_buf;
463 :
464 20 : fs = SPDK_CONTAINEROF(io_device, struct spdk_filesystem, sync_target);
465 :
466 20 : return fs_channel_create(fs, channel, fs->sync_target.max_ops);
467 20 : }
468 :
469 : static int
470 1 : fs_io_channel_create(void *io_device, void *ctx_buf)
471 : {
472 1 : struct spdk_filesystem *fs;
473 1 : struct spdk_fs_channel *channel = ctx_buf;
474 :
475 1 : fs = SPDK_CONTAINEROF(io_device, struct spdk_filesystem, io_target);
476 :
477 1 : return fs_channel_create(fs, channel, fs->io_target.max_ops);
478 1 : }
479 :
480 : static void
481 53 : fs_channel_destroy(void *io_device, void *ctx_buf)
482 : {
483 53 : struct spdk_fs_channel *channel = ctx_buf;
484 :
485 53 : if (channel->outstanding_reqs > 0) {
486 0 : SPDK_ERRLOG("channel freed with %" PRIu32 " outstanding requests!\n",
487 : channel->outstanding_reqs);
488 0 : }
489 :
490 53 : free(channel->req_mem);
491 53 : if (channel->bs_channel != NULL) {
492 41 : spdk_bs_free_io_channel(channel->bs_channel);
493 41 : }
494 53 : }
495 :
496 : static void
497 0 : __send_request_direct(fs_request_fn fn, void *arg)
498 : {
499 0 : fn(arg);
500 0 : }
501 :
502 : static void
503 20 : common_fs_bs_init(struct spdk_filesystem *fs, struct spdk_blob_store *bs)
504 : {
505 20 : fs->bs = bs;
506 20 : fs->bs_opts.cluster_sz = spdk_bs_get_cluster_size(bs);
507 20 : fs->md_target.md_fs_channel->bs_channel = spdk_bs_alloc_io_channel(fs->bs);
508 20 : fs->md_target.md_fs_channel->send_request = __send_request_direct;
509 20 : fs->sync_target.sync_fs_channel->bs_channel = spdk_bs_alloc_io_channel(fs->bs);
510 20 : fs->sync_target.sync_fs_channel->send_request = __send_request_direct;
511 :
512 20 : initialize_global_cache();
513 20 : }
514 :
515 : static void
516 18 : init_cb(void *ctx, struct spdk_blob_store *bs, int bserrno)
517 : {
518 18 : struct spdk_fs_request *req = ctx;
519 18 : struct spdk_fs_cb_args *args = &req->args;
520 18 : struct spdk_filesystem *fs = args->fs;
521 :
522 18 : if (bserrno == 0) {
523 18 : common_fs_bs_init(fs, bs);
524 18 : } else {
525 0 : free(fs);
526 0 : fs = NULL;
527 : }
528 :
529 18 : args->fn.fs_op_with_handle(args->arg, fs, bserrno);
530 18 : free_fs_request(req);
531 18 : }
532 :
533 : static struct spdk_filesystem *
534 20 : fs_alloc(struct spdk_bs_dev *dev, fs_send_request_fn send_request_fn)
535 : {
536 20 : struct spdk_filesystem *fs;
537 :
538 20 : fs = calloc(1, sizeof(*fs));
539 20 : if (fs == NULL) {
540 0 : return NULL;
541 : }
542 :
543 20 : fs->bdev = dev;
544 20 : fs->send_request = send_request_fn;
545 20 : TAILQ_INIT(&fs->files);
546 :
547 20 : fs->md_target.max_ops = 512;
548 20 : spdk_io_device_register(&fs->md_target, fs_md_channel_create, fs_channel_destroy,
549 : sizeof(struct spdk_fs_channel), "blobfs_md");
550 20 : fs->md_target.md_io_channel = spdk_get_io_channel(&fs->md_target);
551 20 : fs->md_target.md_fs_channel = spdk_io_channel_get_ctx(fs->md_target.md_io_channel);
552 :
553 20 : fs->sync_target.max_ops = 512;
554 20 : spdk_io_device_register(&fs->sync_target, fs_sync_channel_create, fs_channel_destroy,
555 : sizeof(struct spdk_fs_channel), "blobfs_sync");
556 20 : fs->sync_target.sync_io_channel = spdk_get_io_channel(&fs->sync_target);
557 20 : fs->sync_target.sync_fs_channel = spdk_io_channel_get_ctx(fs->sync_target.sync_io_channel);
558 :
559 20 : fs->io_target.max_ops = 512;
560 20 : spdk_io_device_register(&fs->io_target, fs_io_channel_create, fs_channel_destroy,
561 : sizeof(struct spdk_fs_channel), "blobfs_io");
562 :
563 20 : return fs;
564 20 : }
565 :
566 : static void
567 51 : __wake_caller(void *arg, int fserrno)
568 : {
569 51 : struct spdk_fs_cb_args *args = arg;
570 :
571 51 : if ((args->rwerrno != NULL) && (*(args->rwerrno) == 0) && fserrno) {
572 0 : *(args->rwerrno) = fserrno;
573 0 : }
574 51 : args->rc = fserrno;
575 51 : sem_post(args->sem);
576 51 : }
577 :
578 : void
579 18 : spdk_fs_init(struct spdk_bs_dev *dev, struct spdk_blobfs_opts *opt,
580 : fs_send_request_fn send_request_fn,
581 : spdk_fs_op_with_handle_complete cb_fn, void *cb_arg)
582 : {
583 18 : struct spdk_filesystem *fs;
584 18 : struct spdk_fs_request *req;
585 18 : struct spdk_fs_cb_args *args;
586 18 : struct spdk_bs_opts opts = {};
587 :
588 18 : fs = fs_alloc(dev, send_request_fn);
589 18 : if (fs == NULL) {
590 0 : cb_fn(cb_arg, NULL, -ENOMEM);
591 0 : return;
592 : }
593 :
594 18 : req = alloc_fs_request(fs->md_target.md_fs_channel);
595 18 : if (req == NULL) {
596 0 : fs_free_io_channels(fs);
597 0 : fs_io_device_unregister(fs);
598 0 : cb_fn(cb_arg, NULL, -ENOMEM);
599 0 : return;
600 : }
601 :
602 18 : args = &req->args;
603 18 : args->fn.fs_op_with_handle = cb_fn;
604 18 : args->arg = cb_arg;
605 18 : args->fs = fs;
606 :
607 18 : spdk_bs_opts_init(&opts, sizeof(opts));
608 18 : snprintf(opts.bstype.bstype, sizeof(opts.bstype.bstype), SPDK_BLOBFS_SIGNATURE);
609 18 : if (opt) {
610 0 : opts.cluster_sz = opt->cluster_sz;
611 0 : }
612 18 : spdk_bs_init(dev, &opts, init_cb, req);
613 18 : }
614 :
615 : static struct spdk_file *
616 18 : file_alloc(struct spdk_filesystem *fs)
617 : {
618 18 : struct spdk_file *file;
619 :
620 18 : file = calloc(1, sizeof(*file));
621 18 : if (file == NULL) {
622 0 : return NULL;
623 : }
624 :
625 18 : file->tree = calloc(1, sizeof(*file->tree));
626 18 : if (file->tree == NULL) {
627 0 : free(file);
628 0 : return NULL;
629 : }
630 :
631 18 : if (pthread_spin_init(&file->lock, 0)) {
632 0 : free(file->tree);
633 0 : free(file);
634 0 : return NULL;
635 : }
636 :
637 18 : file->fs = fs;
638 18 : TAILQ_INIT(&file->open_requests);
639 18 : TAILQ_INIT(&file->sync_requests);
640 18 : TAILQ_INSERT_TAIL(&fs->files, file, tailq);
641 18 : file->priority = SPDK_FILE_PRIORITY_LOW;
642 18 : return file;
643 18 : }
644 :
645 : static void fs_load_done(void *ctx, int bserrno);
646 :
647 : static int
648 2 : _handle_deleted_files(struct spdk_fs_request *req)
649 : {
650 2 : struct spdk_fs_cb_args *args = &req->args;
651 2 : struct spdk_filesystem *fs = args->fs;
652 :
653 2 : if (!TAILQ_EMPTY(&args->op.fs_load.deleted_files)) {
654 0 : struct spdk_deleted_file *deleted_file;
655 :
656 0 : deleted_file = TAILQ_FIRST(&args->op.fs_load.deleted_files);
657 0 : TAILQ_REMOVE(&args->op.fs_load.deleted_files, deleted_file, tailq);
658 0 : spdk_bs_delete_blob(fs->bs, deleted_file->id, fs_load_done, req);
659 0 : free(deleted_file);
660 0 : return 0;
661 0 : }
662 :
663 2 : return 1;
664 2 : }
665 :
666 : static void
667 2 : fs_load_done(void *ctx, int bserrno)
668 : {
669 2 : struct spdk_fs_request *req = ctx;
670 2 : struct spdk_fs_cb_args *args = &req->args;
671 2 : struct spdk_filesystem *fs = args->fs;
672 :
673 : /* The filesystem has been loaded. Now check if there are any files that
674 : * were marked for deletion before last unload. Do not complete the
675 : * fs_load callback until all of them have been deleted on disk.
676 : */
677 2 : if (_handle_deleted_files(req) == 0) {
678 : /* We found a file that's been marked for deleting but not actually
679 : * deleted yet. This function will get called again once the delete
680 : * operation is completed.
681 : */
682 0 : return;
683 : }
684 :
685 2 : args->fn.fs_op_with_handle(args->arg, fs, 0);
686 2 : free_fs_request(req);
687 :
688 2 : }
689 :
690 : static void
691 2 : iter_cb(void *ctx, struct spdk_blob *blob, int rc)
692 : {
693 2 : struct spdk_fs_request *req = ctx;
694 2 : struct spdk_fs_cb_args *args = &req->args;
695 2 : struct spdk_filesystem *fs = args->fs;
696 2 : uint64_t *length;
697 2 : const char *name;
698 2 : uint32_t *is_deleted;
699 2 : size_t value_len;
700 :
701 2 : if (rc < 0) {
702 0 : args->fn.fs_op_with_handle(args->arg, fs, rc);
703 0 : free_fs_request(req);
704 0 : return;
705 : }
706 :
707 2 : rc = spdk_blob_get_xattr_value(blob, "name", (const void **)&name, &value_len);
708 2 : if (rc < 0) {
709 0 : args->fn.fs_op_with_handle(args->arg, fs, rc);
710 0 : free_fs_request(req);
711 0 : return;
712 : }
713 :
714 2 : rc = spdk_blob_get_xattr_value(blob, "length", (const void **)&length, &value_len);
715 2 : if (rc < 0) {
716 0 : args->fn.fs_op_with_handle(args->arg, fs, rc);
717 0 : free_fs_request(req);
718 0 : return;
719 : }
720 :
721 2 : assert(value_len == 8);
722 :
723 : /* This file could be deleted last time without close it, then app crashed, so we delete it now */
724 2 : rc = spdk_blob_get_xattr_value(blob, "is_deleted", (const void **)&is_deleted, &value_len);
725 2 : if (rc < 0) {
726 2 : struct spdk_file *f;
727 :
728 2 : f = file_alloc(fs);
729 2 : if (f == NULL) {
730 0 : SPDK_ERRLOG("Cannot allocate file to handle deleted file on disk\n");
731 0 : args->fn.fs_op_with_handle(args->arg, fs, -ENOMEM);
732 0 : free_fs_request(req);
733 0 : return;
734 : }
735 :
736 2 : f->name = strdup(name);
737 2 : if (!f->name) {
738 0 : SPDK_ERRLOG("Cannot allocate memory for file name\n");
739 0 : args->fn.fs_op_with_handle(args->arg, fs, -ENOMEM);
740 0 : free_fs_request(req);
741 0 : file_free(f);
742 0 : return;
743 : }
744 :
745 2 : f->blobid = spdk_blob_get_id(blob);
746 2 : f->length = *length;
747 2 : f->length_flushed = *length;
748 2 : f->length_xattr = *length;
749 2 : f->append_pos = *length;
750 2 : SPDK_DEBUGLOG(blobfs, "added file %s length=%ju\n", f->name, f->length);
751 2 : } else {
752 0 : struct spdk_deleted_file *deleted_file;
753 :
754 0 : deleted_file = calloc(1, sizeof(*deleted_file));
755 0 : if (deleted_file == NULL) {
756 0 : args->fn.fs_op_with_handle(args->arg, fs, -ENOMEM);
757 0 : free_fs_request(req);
758 0 : return;
759 : }
760 0 : deleted_file->id = spdk_blob_get_id(blob);
761 0 : TAILQ_INSERT_TAIL(&args->op.fs_load.deleted_files, deleted_file, tailq);
762 0 : }
763 2 : }
764 :
765 : static void
766 2 : load_cb(void *ctx, struct spdk_blob_store *bs, int bserrno)
767 : {
768 2 : struct spdk_fs_request *req = ctx;
769 2 : struct spdk_fs_cb_args *args = &req->args;
770 2 : struct spdk_filesystem *fs = args->fs;
771 2 : struct spdk_bs_type bstype;
772 : static const struct spdk_bs_type blobfs_type = {SPDK_BLOBFS_SIGNATURE};
773 : static const struct spdk_bs_type zeros;
774 :
775 2 : if (bserrno != 0) {
776 0 : args->fn.fs_op_with_handle(args->arg, NULL, bserrno);
777 0 : free_fs_request(req);
778 0 : fs_free_io_channels(fs);
779 0 : fs_io_device_unregister(fs);
780 0 : return;
781 : }
782 :
783 2 : bstype = spdk_bs_get_bstype(bs);
784 :
785 2 : if (!memcmp(&bstype, &zeros, sizeof(bstype))) {
786 0 : SPDK_DEBUGLOG(blobfs, "assigning bstype\n");
787 0 : spdk_bs_set_bstype(bs, blobfs_type);
788 2 : } else if (memcmp(&bstype, &blobfs_type, sizeof(bstype))) {
789 0 : SPDK_ERRLOG("not blobfs\n");
790 0 : SPDK_LOGDUMP(blobfs, "bstype", &bstype, sizeof(bstype));
791 0 : args->fn.fs_op_with_handle(args->arg, NULL, -EINVAL);
792 0 : free_fs_request(req);
793 0 : fs_free_io_channels(fs);
794 0 : fs_io_device_unregister(fs);
795 0 : return;
796 : }
797 :
798 2 : common_fs_bs_init(fs, bs);
799 2 : fs_load_done(req, 0);
800 2 : }
801 :
802 : static void
803 20 : fs_io_device_unregister(struct spdk_filesystem *fs)
804 : {
805 20 : assert(fs != NULL);
806 20 : spdk_io_device_unregister(&fs->md_target, NULL);
807 20 : spdk_io_device_unregister(&fs->sync_target, NULL);
808 20 : spdk_io_device_unregister(&fs->io_target, NULL);
809 20 : free(fs);
810 20 : }
811 :
812 : static void
813 20 : fs_free_io_channels(struct spdk_filesystem *fs)
814 : {
815 20 : assert(fs != NULL);
816 20 : spdk_fs_free_io_channel(fs->md_target.md_io_channel);
817 20 : spdk_fs_free_io_channel(fs->sync_target.sync_io_channel);
818 20 : }
819 :
820 : void
821 2 : spdk_fs_load(struct spdk_bs_dev *dev, fs_send_request_fn send_request_fn,
822 : spdk_fs_op_with_handle_complete cb_fn, void *cb_arg)
823 : {
824 2 : struct spdk_filesystem *fs;
825 2 : struct spdk_fs_cb_args *args;
826 2 : struct spdk_fs_request *req;
827 2 : struct spdk_bs_opts bs_opts;
828 :
829 2 : fs = fs_alloc(dev, send_request_fn);
830 2 : if (fs == NULL) {
831 0 : cb_fn(cb_arg, NULL, -ENOMEM);
832 0 : return;
833 : }
834 :
835 2 : req = alloc_fs_request(fs->md_target.md_fs_channel);
836 2 : if (req == NULL) {
837 0 : fs_free_io_channels(fs);
838 0 : fs_io_device_unregister(fs);
839 0 : cb_fn(cb_arg, NULL, -ENOMEM);
840 0 : return;
841 : }
842 :
843 2 : args = &req->args;
844 2 : args->fn.fs_op_with_handle = cb_fn;
845 2 : args->arg = cb_arg;
846 2 : args->fs = fs;
847 2 : TAILQ_INIT(&args->op.fs_load.deleted_files);
848 2 : spdk_bs_opts_init(&bs_opts, sizeof(bs_opts));
849 2 : bs_opts.iter_cb_fn = iter_cb;
850 2 : bs_opts.iter_cb_arg = req;
851 2 : spdk_bs_load(dev, &bs_opts, load_cb, req);
852 2 : }
853 :
854 : static void
855 20 : unload_cb(void *ctx, int bserrno)
856 : {
857 20 : struct spdk_fs_request *req = ctx;
858 20 : struct spdk_fs_cb_args *args = &req->args;
859 20 : struct spdk_filesystem *fs = args->fs;
860 20 : struct spdk_file *file, *tmp;
861 :
862 26 : TAILQ_FOREACH_SAFE(file, &fs->files, tailq, tmp) {
863 6 : TAILQ_REMOVE(&fs->files, file, tailq);
864 6 : file_free(file);
865 6 : }
866 :
867 20 : free_global_cache();
868 :
869 20 : args->fn.fs_op(args->arg, bserrno);
870 20 : free(req);
871 :
872 20 : fs_io_device_unregister(fs);
873 20 : }
874 :
875 : void
876 20 : spdk_fs_unload(struct spdk_filesystem *fs, spdk_fs_op_complete cb_fn, void *cb_arg)
877 : {
878 20 : struct spdk_fs_request *req;
879 20 : struct spdk_fs_cb_args *args;
880 :
881 : /*
882 : * We must free the md_channel before unloading the blobstore, so just
883 : * allocate this request from the general heap.
884 : */
885 20 : req = calloc(1, sizeof(*req));
886 20 : if (req == NULL) {
887 0 : cb_fn(cb_arg, -ENOMEM);
888 0 : return;
889 : }
890 :
891 20 : args = &req->args;
892 20 : args->fn.fs_op = cb_fn;
893 20 : args->arg = cb_arg;
894 20 : args->fs = fs;
895 :
896 20 : fs_free_io_channels(fs);
897 20 : spdk_bs_unload(fs->bs, unload_cb, req);
898 20 : }
899 :
900 : static struct spdk_file *
901 75 : fs_find_file(struct spdk_filesystem *fs, const char *name)
902 : {
903 75 : struct spdk_file *file;
904 :
905 82 : TAILQ_FOREACH(file, &fs->files, tailq) {
906 48 : if (!strncmp(name, file->name, SPDK_FILE_NAME_MAX)) {
907 41 : return file;
908 : }
909 7 : }
910 :
911 34 : return NULL;
912 75 : }
913 :
914 : void
915 4 : spdk_fs_file_stat_async(struct spdk_filesystem *fs, const char *name,
916 : spdk_file_stat_op_complete cb_fn, void *cb_arg)
917 : {
918 4 : struct spdk_file_stat stat;
919 4 : struct spdk_file *f = NULL;
920 :
921 4 : if (strnlen(name, SPDK_FILE_NAME_MAX + 1) == SPDK_FILE_NAME_MAX + 1) {
922 0 : cb_fn(cb_arg, NULL, -ENAMETOOLONG);
923 0 : return;
924 : }
925 :
926 4 : f = fs_find_file(fs, name);
927 4 : if (f != NULL) {
928 4 : stat.blobid = f->blobid;
929 4 : stat.size = f->append_pos >= f->length ? f->append_pos : f->length;
930 4 : cb_fn(cb_arg, &stat, 0);
931 4 : return;
932 : }
933 :
934 0 : cb_fn(cb_arg, NULL, -ENOENT);
935 4 : }
936 :
937 : static void
938 4 : __copy_stat(void *arg, struct spdk_file_stat *stat, int fserrno)
939 : {
940 4 : struct spdk_fs_request *req = arg;
941 4 : struct spdk_fs_cb_args *args = &req->args;
942 :
943 4 : args->rc = fserrno;
944 4 : if (fserrno == 0) {
945 4 : memcpy(args->arg, stat, sizeof(*stat));
946 4 : }
947 4 : sem_post(args->sem);
948 4 : }
949 :
950 : static void
951 4 : __file_stat(void *arg)
952 : {
953 4 : struct spdk_fs_request *req = arg;
954 4 : struct spdk_fs_cb_args *args = &req->args;
955 :
956 8 : spdk_fs_file_stat_async(args->fs, args->op.stat.name,
957 4 : args->fn.stat_op, req);
958 4 : }
959 :
960 : int
961 4 : spdk_fs_file_stat(struct spdk_filesystem *fs, struct spdk_fs_thread_ctx *ctx,
962 : const char *name, struct spdk_file_stat *stat)
963 : {
964 4 : struct spdk_fs_channel *channel = (struct spdk_fs_channel *)ctx;
965 4 : struct spdk_fs_request *req;
966 4 : int rc;
967 :
968 4 : req = alloc_fs_request(channel);
969 4 : if (req == NULL) {
970 0 : SPDK_ERRLOG("Cannot allocate stat req on file=%s\n", name);
971 0 : return -ENOMEM;
972 : }
973 :
974 4 : req->args.fs = fs;
975 4 : req->args.op.stat.name = name;
976 4 : req->args.fn.stat_op = __copy_stat;
977 4 : req->args.arg = stat;
978 4 : req->args.sem = &channel->sem;
979 4 : channel->send_request(__file_stat, req);
980 4 : sem_wait(&channel->sem);
981 :
982 4 : rc = req->args.rc;
983 4 : free_fs_request(req);
984 :
985 4 : return rc;
986 4 : }
987 :
988 : static void
989 16 : fs_create_blob_close_cb(void *ctx, int bserrno)
990 : {
991 16 : int rc;
992 16 : struct spdk_fs_request *req = ctx;
993 16 : struct spdk_fs_cb_args *args = &req->args;
994 :
995 16 : rc = args->rc ? args->rc : bserrno;
996 16 : args->fn.file_op(args->arg, rc);
997 16 : free_fs_request(req);
998 16 : }
999 :
1000 : static void
1001 16 : fs_create_blob_resize_cb(void *ctx, int bserrno)
1002 : {
1003 16 : struct spdk_fs_request *req = ctx;
1004 16 : struct spdk_fs_cb_args *args = &req->args;
1005 16 : struct spdk_file *f = args->file;
1006 16 : struct spdk_blob *blob = args->op.create.blob;
1007 16 : uint64_t length = 0;
1008 :
1009 16 : args->rc = bserrno;
1010 16 : if (bserrno) {
1011 0 : spdk_blob_close(blob, fs_create_blob_close_cb, args);
1012 0 : return;
1013 : }
1014 :
1015 16 : spdk_blob_set_xattr(blob, "name", f->name, strlen(f->name) + 1);
1016 16 : spdk_blob_set_xattr(blob, "length", &length, sizeof(length));
1017 :
1018 16 : spdk_blob_close(blob, fs_create_blob_close_cb, args);
1019 16 : }
1020 :
1021 : static void
1022 16 : fs_create_blob_open_cb(void *ctx, struct spdk_blob *blob, int bserrno)
1023 : {
1024 16 : struct spdk_fs_request *req = ctx;
1025 16 : struct spdk_fs_cb_args *args = &req->args;
1026 :
1027 16 : if (bserrno) {
1028 0 : args->fn.file_op(args->arg, bserrno);
1029 0 : free_fs_request(req);
1030 0 : return;
1031 : }
1032 :
1033 16 : args->op.create.blob = blob;
1034 16 : spdk_blob_resize(blob, 1, fs_create_blob_resize_cb, req);
1035 16 : }
1036 :
1037 : static void
1038 16 : fs_create_blob_create_cb(void *ctx, spdk_blob_id blobid, int bserrno)
1039 : {
1040 16 : struct spdk_fs_request *req = ctx;
1041 16 : struct spdk_fs_cb_args *args = &req->args;
1042 16 : struct spdk_file *f = args->file;
1043 :
1044 16 : if (bserrno) {
1045 0 : args->fn.file_op(args->arg, bserrno);
1046 0 : free_fs_request(req);
1047 0 : return;
1048 : }
1049 :
1050 16 : f->blobid = blobid;
1051 16 : spdk_bs_open_blob(f->fs->bs, blobid, fs_create_blob_open_cb, req);
1052 16 : }
1053 :
1054 : void
1055 19 : spdk_fs_create_file_async(struct spdk_filesystem *fs, const char *name,
1056 : spdk_file_op_complete cb_fn, void *cb_arg)
1057 : {
1058 19 : struct spdk_file *file;
1059 19 : struct spdk_fs_request *req;
1060 19 : struct spdk_fs_cb_args *args;
1061 :
1062 19 : if (strnlen(name, SPDK_FILE_NAME_MAX + 1) == SPDK_FILE_NAME_MAX + 1) {
1063 1 : cb_fn(cb_arg, -ENAMETOOLONG);
1064 1 : return;
1065 : }
1066 :
1067 18 : file = fs_find_file(fs, name);
1068 18 : if (file != NULL) {
1069 2 : cb_fn(cb_arg, -EEXIST);
1070 2 : return;
1071 : }
1072 :
1073 16 : file = file_alloc(fs);
1074 16 : if (file == NULL) {
1075 0 : SPDK_ERRLOG("Cannot allocate new file for creation\n");
1076 0 : cb_fn(cb_arg, -ENOMEM);
1077 0 : return;
1078 : }
1079 :
1080 16 : req = alloc_fs_request(fs->md_target.md_fs_channel);
1081 16 : if (req == NULL) {
1082 0 : SPDK_ERRLOG("Cannot allocate create async req for file=%s\n", name);
1083 0 : TAILQ_REMOVE(&fs->files, file, tailq);
1084 0 : file_free(file);
1085 0 : cb_fn(cb_arg, -ENOMEM);
1086 0 : return;
1087 : }
1088 :
1089 16 : args = &req->args;
1090 16 : args->file = file;
1091 16 : args->fn.file_op = cb_fn;
1092 16 : args->arg = cb_arg;
1093 :
1094 16 : file->name = strdup(name);
1095 16 : if (!file->name) {
1096 0 : SPDK_ERRLOG("Cannot allocate file->name for file=%s\n", name);
1097 0 : free_fs_request(req);
1098 0 : TAILQ_REMOVE(&fs->files, file, tailq);
1099 0 : file_free(file);
1100 0 : cb_fn(cb_arg, -ENOMEM);
1101 0 : return;
1102 : }
1103 16 : spdk_bs_create_blob(fs->bs, fs_create_blob_create_cb, args);
1104 19 : }
1105 :
1106 : static void
1107 2 : __fs_create_file_done(void *arg, int fserrno)
1108 : {
1109 2 : struct spdk_fs_request *req = arg;
1110 2 : struct spdk_fs_cb_args *args = &req->args;
1111 :
1112 2 : SPDK_DEBUGLOG(blobfs, "file=%s\n", args->op.create.name);
1113 2 : __wake_caller(args, fserrno);
1114 2 : }
1115 :
1116 : static void
1117 2 : __fs_create_file(void *arg)
1118 : {
1119 2 : struct spdk_fs_request *req = arg;
1120 2 : struct spdk_fs_cb_args *args = &req->args;
1121 :
1122 2 : SPDK_DEBUGLOG(blobfs, "file=%s\n", args->op.create.name);
1123 2 : spdk_fs_create_file_async(args->fs, args->op.create.name, __fs_create_file_done, req);
1124 2 : }
1125 :
1126 : int
1127 2 : spdk_fs_create_file(struct spdk_filesystem *fs, struct spdk_fs_thread_ctx *ctx, const char *name)
1128 : {
1129 2 : struct spdk_fs_channel *channel = (struct spdk_fs_channel *)ctx;
1130 2 : struct spdk_fs_request *req;
1131 2 : struct spdk_fs_cb_args *args;
1132 2 : int rc;
1133 :
1134 2 : SPDK_DEBUGLOG(blobfs, "file=%s\n", name);
1135 :
1136 2 : req = alloc_fs_request(channel);
1137 2 : if (req == NULL) {
1138 0 : SPDK_ERRLOG("Cannot allocate req to create file=%s\n", name);
1139 0 : return -ENOMEM;
1140 : }
1141 :
1142 2 : args = &req->args;
1143 2 : args->fs = fs;
1144 2 : args->op.create.name = name;
1145 2 : args->sem = &channel->sem;
1146 2 : fs->send_request(__fs_create_file, req);
1147 2 : sem_wait(&channel->sem);
1148 2 : rc = args->rc;
1149 2 : free_fs_request(req);
1150 :
1151 2 : return rc;
1152 2 : }
1153 :
1154 : static void
1155 17 : fs_open_blob_done(void *ctx, struct spdk_blob *blob, int bserrno)
1156 : {
1157 17 : struct spdk_fs_request *req = ctx;
1158 17 : struct spdk_fs_cb_args *args = &req->args;
1159 17 : struct spdk_file *f = args->file;
1160 :
1161 17 : f->blob = blob;
1162 34 : while (!TAILQ_EMPTY(&f->open_requests)) {
1163 17 : req = TAILQ_FIRST(&f->open_requests);
1164 17 : args = &req->args;
1165 17 : TAILQ_REMOVE(&f->open_requests, req, args.op.open.tailq);
1166 17 : spdk_trace_record(TRACE_BLOBFS_OPEN, 0, 0, 0, f->name);
1167 17 : args->fn.file_op_with_handle(args->arg, f, bserrno);
1168 17 : free_fs_request(req);
1169 : }
1170 17 : }
1171 :
1172 : static void
1173 17 : fs_open_blob_create_cb(void *ctx, int bserrno)
1174 : {
1175 17 : struct spdk_fs_request *req = ctx;
1176 17 : struct spdk_fs_cb_args *args = &req->args;
1177 17 : struct spdk_file *file = args->file;
1178 17 : struct spdk_filesystem *fs = args->fs;
1179 :
1180 17 : if (file == NULL) {
1181 : /*
1182 : * This is from an open with CREATE flag - the file
1183 : * is now created so look it up in the file list for this
1184 : * filesystem.
1185 : */
1186 13 : file = fs_find_file(fs, args->op.open.name);
1187 13 : assert(file != NULL);
1188 13 : args->file = file;
1189 13 : }
1190 :
1191 17 : file->ref_count++;
1192 17 : TAILQ_INSERT_TAIL(&file->open_requests, req, args.op.open.tailq);
1193 17 : if (file->ref_count == 1) {
1194 17 : assert(file->blob == NULL);
1195 17 : spdk_bs_open_blob(fs->bs, file->blobid, fs_open_blob_done, req);
1196 17 : } else if (file->blob != NULL) {
1197 0 : fs_open_blob_done(req, file->blob, 0);
1198 0 : } else {
1199 : /*
1200 : * The blob open for this file is in progress due to a previous
1201 : * open request. When that open completes, it will invoke the
1202 : * open callback for this request.
1203 : */
1204 : }
1205 17 : }
1206 :
1207 : void
1208 21 : spdk_fs_open_file_async(struct spdk_filesystem *fs, const char *name, uint32_t flags,
1209 : spdk_file_op_with_handle_complete cb_fn, void *cb_arg)
1210 : {
1211 21 : struct spdk_file *f = NULL;
1212 21 : struct spdk_fs_request *req;
1213 21 : struct spdk_fs_cb_args *args;
1214 :
1215 21 : if (strnlen(name, SPDK_FILE_NAME_MAX + 1) == SPDK_FILE_NAME_MAX + 1) {
1216 1 : cb_fn(cb_arg, NULL, -ENAMETOOLONG);
1217 1 : return;
1218 : }
1219 :
1220 20 : f = fs_find_file(fs, name);
1221 20 : if (f == NULL && !(flags & SPDK_BLOBFS_OPEN_CREATE)) {
1222 2 : cb_fn(cb_arg, NULL, -ENOENT);
1223 2 : return;
1224 : }
1225 :
1226 18 : if (f != NULL && f->is_deleted == true) {
1227 1 : cb_fn(cb_arg, NULL, -ENOENT);
1228 1 : return;
1229 : }
1230 :
1231 17 : req = alloc_fs_request(fs->md_target.md_fs_channel);
1232 17 : if (req == NULL) {
1233 0 : SPDK_ERRLOG("Cannot allocate async open req for file=%s\n", name);
1234 0 : cb_fn(cb_arg, NULL, -ENOMEM);
1235 0 : return;
1236 : }
1237 :
1238 17 : args = &req->args;
1239 17 : args->fn.file_op_with_handle = cb_fn;
1240 17 : args->arg = cb_arg;
1241 17 : args->file = f;
1242 17 : args->fs = fs;
1243 17 : args->op.open.name = name;
1244 :
1245 17 : if (f == NULL) {
1246 13 : spdk_fs_create_file_async(fs, name, fs_open_blob_create_cb, req);
1247 13 : } else {
1248 4 : fs_open_blob_create_cb(req, 0);
1249 : }
1250 21 : }
1251 :
1252 : static void
1253 13 : __fs_open_file_done(void *arg, struct spdk_file *file, int bserrno)
1254 : {
1255 13 : struct spdk_fs_request *req = arg;
1256 13 : struct spdk_fs_cb_args *args = &req->args;
1257 :
1258 13 : args->file = file;
1259 13 : SPDK_DEBUGLOG(blobfs, "file=%s\n", args->op.open.name);
1260 13 : __wake_caller(args, bserrno);
1261 13 : }
1262 :
1263 : static void
1264 13 : __fs_open_file(void *arg)
1265 : {
1266 13 : struct spdk_fs_request *req = arg;
1267 13 : struct spdk_fs_cb_args *args = &req->args;
1268 :
1269 13 : SPDK_DEBUGLOG(blobfs, "file=%s\n", args->op.open.name);
1270 13 : spdk_fs_open_file_async(args->fs, args->op.open.name, args->op.open.flags,
1271 13 : __fs_open_file_done, req);
1272 13 : }
1273 :
1274 : int
1275 13 : spdk_fs_open_file(struct spdk_filesystem *fs, struct spdk_fs_thread_ctx *ctx,
1276 : const char *name, uint32_t flags, struct spdk_file **file)
1277 : {
1278 13 : struct spdk_fs_channel *channel = (struct spdk_fs_channel *)ctx;
1279 13 : struct spdk_fs_request *req;
1280 13 : struct spdk_fs_cb_args *args;
1281 13 : int rc;
1282 :
1283 13 : SPDK_DEBUGLOG(blobfs, "file=%s\n", name);
1284 :
1285 13 : req = alloc_fs_request(channel);
1286 13 : if (req == NULL) {
1287 0 : SPDK_ERRLOG("Cannot allocate req for opening file=%s\n", name);
1288 0 : return -ENOMEM;
1289 : }
1290 :
1291 13 : args = &req->args;
1292 13 : args->fs = fs;
1293 13 : args->op.open.name = name;
1294 13 : args->op.open.flags = flags;
1295 13 : args->sem = &channel->sem;
1296 13 : fs->send_request(__fs_open_file, req);
1297 13 : sem_wait(&channel->sem);
1298 13 : rc = args->rc;
1299 13 : if (rc == 0) {
1300 11 : *file = args->file;
1301 11 : } else {
1302 2 : *file = NULL;
1303 : }
1304 13 : free_fs_request(req);
1305 :
1306 13 : return rc;
1307 13 : }
1308 :
1309 : static void
1310 2 : fs_rename_blob_close_cb(void *ctx, int bserrno)
1311 : {
1312 2 : struct spdk_fs_request *req = ctx;
1313 2 : struct spdk_fs_cb_args *args = &req->args;
1314 :
1315 2 : args->fn.fs_op(args->arg, bserrno);
1316 2 : free_fs_request(req);
1317 2 : }
1318 :
1319 : static void
1320 2 : fs_rename_blob_open_cb(void *ctx, struct spdk_blob *blob, int bserrno)
1321 : {
1322 2 : struct spdk_fs_request *req = ctx;
1323 2 : struct spdk_fs_cb_args *args = &req->args;
1324 2 : const char *new_name = args->op.rename.new_name;
1325 :
1326 2 : spdk_blob_set_xattr(blob, "name", new_name, strlen(new_name) + 1);
1327 2 : spdk_blob_close(blob, fs_rename_blob_close_cb, req);
1328 2 : }
1329 :
1330 : static void
1331 2 : _fs_md_rename_file(struct spdk_fs_request *req)
1332 : {
1333 2 : struct spdk_fs_cb_args *args = &req->args;
1334 2 : struct spdk_file *f;
1335 :
1336 2 : f = fs_find_file(args->fs, args->op.rename.old_name);
1337 2 : if (f == NULL) {
1338 0 : args->fn.fs_op(args->arg, -ENOENT);
1339 0 : free_fs_request(req);
1340 0 : return;
1341 : }
1342 :
1343 2 : free(f->name);
1344 2 : f->name = strdup(args->op.rename.new_name);
1345 2 : if (!f->name) {
1346 0 : SPDK_ERRLOG("Cannot allocate memory for file name\n");
1347 0 : args->fn.fs_op(args->arg, -ENOMEM);
1348 0 : free_fs_request(req);
1349 0 : return;
1350 : }
1351 :
1352 2 : args->file = f;
1353 2 : spdk_bs_open_blob(args->fs->bs, f->blobid, fs_rename_blob_open_cb, req);
1354 2 : }
1355 :
1356 : static void
1357 1 : fs_rename_delete_done(void *arg, int fserrno)
1358 : {
1359 1 : _fs_md_rename_file(arg);
1360 1 : }
1361 :
1362 : void
1363 2 : spdk_fs_rename_file_async(struct spdk_filesystem *fs,
1364 : const char *old_name, const char *new_name,
1365 : spdk_file_op_complete cb_fn, void *cb_arg)
1366 : {
1367 2 : struct spdk_file *f;
1368 2 : struct spdk_fs_request *req;
1369 2 : struct spdk_fs_cb_args *args;
1370 :
1371 2 : SPDK_DEBUGLOG(blobfs, "old=%s new=%s\n", old_name, new_name);
1372 2 : if (strnlen(new_name, SPDK_FILE_NAME_MAX + 1) == SPDK_FILE_NAME_MAX + 1) {
1373 0 : cb_fn(cb_arg, -ENAMETOOLONG);
1374 0 : return;
1375 : }
1376 :
1377 2 : req = alloc_fs_request(fs->md_target.md_fs_channel);
1378 2 : if (req == NULL) {
1379 0 : SPDK_ERRLOG("Cannot allocate rename async req for renaming file from %s to %s\n", old_name,
1380 : new_name);
1381 0 : cb_fn(cb_arg, -ENOMEM);
1382 0 : return;
1383 : }
1384 :
1385 2 : args = &req->args;
1386 2 : args->fn.fs_op = cb_fn;
1387 2 : args->fs = fs;
1388 2 : args->arg = cb_arg;
1389 2 : args->op.rename.old_name = old_name;
1390 2 : args->op.rename.new_name = new_name;
1391 :
1392 2 : f = fs_find_file(fs, new_name);
1393 2 : if (f == NULL) {
1394 1 : _fs_md_rename_file(req);
1395 1 : return;
1396 : }
1397 :
1398 : /*
1399 : * The rename overwrites an existing file. So delete the existing file, then
1400 : * do the actual rename.
1401 : */
1402 1 : spdk_fs_delete_file_async(fs, new_name, fs_rename_delete_done, req);
1403 2 : }
1404 :
1405 : static void
1406 1 : __fs_rename_file_done(void *arg, int fserrno)
1407 : {
1408 1 : struct spdk_fs_request *req = arg;
1409 1 : struct spdk_fs_cb_args *args = &req->args;
1410 :
1411 1 : __wake_caller(args, fserrno);
1412 1 : }
1413 :
1414 : static void
1415 1 : __fs_rename_file(void *arg)
1416 : {
1417 1 : struct spdk_fs_request *req = arg;
1418 1 : struct spdk_fs_cb_args *args = &req->args;
1419 :
1420 1 : spdk_fs_rename_file_async(args->fs, args->op.rename.old_name, args->op.rename.new_name,
1421 1 : __fs_rename_file_done, req);
1422 1 : }
1423 :
1424 : int
1425 1 : spdk_fs_rename_file(struct spdk_filesystem *fs, struct spdk_fs_thread_ctx *ctx,
1426 : const char *old_name, const char *new_name)
1427 : {
1428 1 : struct spdk_fs_channel *channel = (struct spdk_fs_channel *)ctx;
1429 1 : struct spdk_fs_request *req;
1430 1 : struct spdk_fs_cb_args *args;
1431 1 : int rc;
1432 :
1433 1 : req = alloc_fs_request(channel);
1434 1 : if (req == NULL) {
1435 0 : SPDK_ERRLOG("Cannot allocate rename req for file=%s\n", old_name);
1436 0 : return -ENOMEM;
1437 : }
1438 :
1439 1 : args = &req->args;
1440 :
1441 1 : args->fs = fs;
1442 1 : args->op.rename.old_name = old_name;
1443 1 : args->op.rename.new_name = new_name;
1444 1 : args->sem = &channel->sem;
1445 1 : fs->send_request(__fs_rename_file, req);
1446 1 : sem_wait(&channel->sem);
1447 1 : rc = args->rc;
1448 1 : free_fs_request(req);
1449 1 : return rc;
1450 1 : }
1451 :
1452 : static void
1453 16 : blob_delete_cb(void *ctx, int bserrno)
1454 : {
1455 16 : struct spdk_fs_request *req = ctx;
1456 16 : struct spdk_fs_cb_args *args = &req->args;
1457 :
1458 16 : args->fn.file_op(args->arg, bserrno);
1459 16 : free_fs_request(req);
1460 16 : }
1461 :
1462 : void
1463 16 : spdk_fs_delete_file_async(struct spdk_filesystem *fs, const char *name,
1464 : spdk_file_op_complete cb_fn, void *cb_arg)
1465 : {
1466 16 : struct spdk_file *f;
1467 16 : spdk_blob_id blobid;
1468 16 : struct spdk_fs_request *req;
1469 16 : struct spdk_fs_cb_args *args;
1470 :
1471 16 : SPDK_DEBUGLOG(blobfs, "file=%s\n", name);
1472 :
1473 16 : if (strnlen(name, SPDK_FILE_NAME_MAX + 1) == SPDK_FILE_NAME_MAX + 1) {
1474 0 : cb_fn(cb_arg, -ENAMETOOLONG);
1475 0 : return;
1476 : }
1477 :
1478 16 : f = fs_find_file(fs, name);
1479 16 : if (f == NULL) {
1480 2 : SPDK_ERRLOG("Cannot find the file=%s to deleted\n", name);
1481 2 : cb_fn(cb_arg, -ENOENT);
1482 2 : return;
1483 : }
1484 :
1485 14 : req = alloc_fs_request(fs->md_target.md_fs_channel);
1486 14 : if (req == NULL) {
1487 0 : SPDK_ERRLOG("Cannot allocate the req for the file=%s to deleted\n", name);
1488 0 : cb_fn(cb_arg, -ENOMEM);
1489 0 : return;
1490 : }
1491 :
1492 14 : args = &req->args;
1493 14 : args->fn.file_op = cb_fn;
1494 14 : args->arg = cb_arg;
1495 :
1496 14 : if (f->ref_count > 0) {
1497 : /* If the ref > 0, we mark the file as deleted and delete it when we close it. */
1498 2 : f->is_deleted = true;
1499 2 : spdk_blob_set_xattr(f->blob, "is_deleted", &f->is_deleted, sizeof(bool));
1500 2 : spdk_blob_sync_md(f->blob, blob_delete_cb, req);
1501 2 : return;
1502 : }
1503 :
1504 12 : blobid = f->blobid;
1505 12 : TAILQ_REMOVE(&fs->files, f, tailq);
1506 :
1507 12 : file_free(f);
1508 :
1509 12 : spdk_bs_delete_blob(fs->bs, blobid, blob_delete_cb, req);
1510 16 : }
1511 :
1512 : static void
1513 8 : __fs_delete_file_done(void *arg, int fserrno)
1514 : {
1515 8 : struct spdk_fs_request *req = arg;
1516 8 : struct spdk_fs_cb_args *args = &req->args;
1517 :
1518 8 : spdk_trace_record(TRACE_BLOBFS_DELETE_DONE, 0, 0, 0, args->op.delete.name);
1519 8 : __wake_caller(args, fserrno);
1520 8 : }
1521 :
1522 : static void
1523 8 : __fs_delete_file(void *arg)
1524 : {
1525 8 : struct spdk_fs_request *req = arg;
1526 8 : struct spdk_fs_cb_args *args = &req->args;
1527 :
1528 8 : spdk_trace_record(TRACE_BLOBFS_DELETE_START, 0, 0, 0, args->op.delete.name);
1529 8 : spdk_fs_delete_file_async(args->fs, args->op.delete.name, __fs_delete_file_done, req);
1530 8 : }
1531 :
1532 : int
1533 8 : spdk_fs_delete_file(struct spdk_filesystem *fs, struct spdk_fs_thread_ctx *ctx,
1534 : const char *name)
1535 : {
1536 8 : struct spdk_fs_channel *channel = (struct spdk_fs_channel *)ctx;
1537 8 : struct spdk_fs_request *req;
1538 8 : struct spdk_fs_cb_args *args;
1539 8 : int rc;
1540 :
1541 8 : req = alloc_fs_request(channel);
1542 8 : if (req == NULL) {
1543 0 : SPDK_DEBUGLOG(blobfs, "Cannot allocate req to delete file=%s\n", name);
1544 0 : return -ENOMEM;
1545 : }
1546 :
1547 8 : args = &req->args;
1548 8 : args->fs = fs;
1549 8 : args->op.delete.name = name;
1550 8 : args->sem = &channel->sem;
1551 8 : fs->send_request(__fs_delete_file, req);
1552 8 : sem_wait(&channel->sem);
1553 8 : rc = args->rc;
1554 8 : free_fs_request(req);
1555 :
1556 8 : return rc;
1557 8 : }
1558 :
1559 : spdk_fs_iter
1560 1 : spdk_fs_iter_first(struct spdk_filesystem *fs)
1561 : {
1562 1 : struct spdk_file *f;
1563 :
1564 1 : f = TAILQ_FIRST(&fs->files);
1565 2 : return f;
1566 1 : }
1567 :
1568 : spdk_fs_iter
1569 1 : spdk_fs_iter_next(spdk_fs_iter iter)
1570 : {
1571 1 : struct spdk_file *f = iter;
1572 :
1573 1 : if (f == NULL) {
1574 0 : return NULL;
1575 : }
1576 :
1577 1 : f = TAILQ_NEXT(f, tailq);
1578 1 : return f;
1579 1 : }
1580 :
1581 : const char *
1582 2 : spdk_file_get_name(struct spdk_file *file)
1583 : {
1584 2 : return file->name;
1585 : }
1586 :
1587 : uint64_t
1588 6 : spdk_file_get_length(struct spdk_file *file)
1589 : {
1590 6 : uint64_t length;
1591 :
1592 6 : assert(file != NULL);
1593 :
1594 6 : length = file->append_pos >= file->length ? file->append_pos : file->length;
1595 6 : SPDK_DEBUGLOG(blobfs, "file=%s length=0x%jx\n", file->name, length);
1596 12 : return length;
1597 6 : }
1598 :
1599 : static void
1600 8 : fs_truncate_complete_cb(void *ctx, int bserrno)
1601 : {
1602 8 : struct spdk_fs_request *req = ctx;
1603 8 : struct spdk_fs_cb_args *args = &req->args;
1604 :
1605 8 : args->fn.file_op(args->arg, bserrno);
1606 8 : free_fs_request(req);
1607 8 : }
1608 :
1609 : static void
1610 8 : fs_truncate_resize_cb(void *ctx, int bserrno)
1611 : {
1612 8 : struct spdk_fs_request *req = ctx;
1613 8 : struct spdk_fs_cb_args *args = &req->args;
1614 8 : struct spdk_file *file = args->file;
1615 8 : uint64_t *length = &args->op.truncate.length;
1616 :
1617 8 : if (bserrno) {
1618 0 : args->fn.file_op(args->arg, bserrno);
1619 0 : free_fs_request(req);
1620 0 : return;
1621 : }
1622 :
1623 8 : spdk_blob_set_xattr(file->blob, "length", length, sizeof(*length));
1624 :
1625 8 : file->length = *length;
1626 8 : if (file->append_pos > file->length) {
1627 0 : file->append_pos = file->length;
1628 0 : }
1629 :
1630 8 : spdk_blob_sync_md(file->blob, fs_truncate_complete_cb, req);
1631 8 : }
1632 :
1633 : static uint64_t
1634 8 : __bytes_to_clusters(uint64_t length, uint64_t cluster_sz)
1635 : {
1636 8 : return (length + cluster_sz - 1) / cluster_sz;
1637 : }
1638 :
1639 : void
1640 9 : spdk_file_truncate_async(struct spdk_file *file, uint64_t length,
1641 : spdk_file_op_complete cb_fn, void *cb_arg)
1642 : {
1643 9 : struct spdk_filesystem *fs;
1644 9 : size_t num_clusters;
1645 9 : struct spdk_fs_request *req;
1646 9 : struct spdk_fs_cb_args *args;
1647 :
1648 9 : SPDK_DEBUGLOG(blobfs, "file=%s old=0x%jx new=0x%jx\n", file->name, file->length, length);
1649 9 : if (length == file->length) {
1650 1 : cb_fn(cb_arg, 0);
1651 1 : return;
1652 : }
1653 :
1654 8 : req = alloc_fs_request(file->fs->md_target.md_fs_channel);
1655 8 : if (req == NULL) {
1656 0 : cb_fn(cb_arg, -ENOMEM);
1657 0 : return;
1658 : }
1659 :
1660 8 : args = &req->args;
1661 8 : args->fn.file_op = cb_fn;
1662 8 : args->arg = cb_arg;
1663 8 : args->file = file;
1664 8 : args->op.truncate.length = length;
1665 8 : fs = file->fs;
1666 :
1667 8 : num_clusters = __bytes_to_clusters(length, fs->bs_opts.cluster_sz);
1668 :
1669 8 : spdk_blob_resize(file->blob, num_clusters, fs_truncate_resize_cb, req);
1670 9 : }
1671 :
1672 : static void
1673 3 : __truncate(void *arg)
1674 : {
1675 3 : struct spdk_fs_request *req = arg;
1676 3 : struct spdk_fs_cb_args *args = &req->args;
1677 :
1678 6 : spdk_file_truncate_async(args->file, args->op.truncate.length,
1679 3 : args->fn.file_op, args);
1680 3 : }
1681 :
1682 : int
1683 3 : spdk_file_truncate(struct spdk_file *file, struct spdk_fs_thread_ctx *ctx,
1684 : uint64_t length)
1685 : {
1686 3 : struct spdk_fs_channel *channel = (struct spdk_fs_channel *)ctx;
1687 3 : struct spdk_fs_request *req;
1688 3 : struct spdk_fs_cb_args *args;
1689 3 : int rc;
1690 :
1691 3 : req = alloc_fs_request(channel);
1692 3 : if (req == NULL) {
1693 0 : return -ENOMEM;
1694 : }
1695 :
1696 3 : args = &req->args;
1697 :
1698 3 : args->file = file;
1699 3 : args->op.truncate.length = length;
1700 3 : args->fn.file_op = __wake_caller;
1701 3 : args->sem = &channel->sem;
1702 :
1703 3 : channel->send_request(__truncate, req);
1704 3 : sem_wait(&channel->sem);
1705 3 : rc = args->rc;
1706 3 : free_fs_request(req);
1707 :
1708 3 : return rc;
1709 3 : }
1710 :
1711 : static void
1712 7 : __rw_done(void *ctx, int bserrno)
1713 : {
1714 7 : struct spdk_fs_request *req = ctx;
1715 7 : struct spdk_fs_cb_args *args = &req->args;
1716 :
1717 7 : spdk_free(args->op.rw.pin_buf);
1718 7 : args->fn.file_op(args->arg, bserrno);
1719 7 : free_fs_request(req);
1720 7 : }
1721 :
1722 : static void
1723 6 : __read_done(void *ctx, int bserrno)
1724 : {
1725 6 : struct spdk_fs_request *req = ctx;
1726 6 : struct spdk_fs_cb_args *args = &req->args;
1727 6 : void *buf;
1728 :
1729 6 : if (bserrno) {
1730 0 : __rw_done(req, bserrno);
1731 0 : return;
1732 : }
1733 :
1734 6 : assert(req != NULL);
1735 6 : buf = (void *)((uintptr_t)args->op.rw.pin_buf + (args->op.rw.offset & (args->op.rw.blocklen - 1)));
1736 6 : if (args->op.rw.is_read) {
1737 3 : spdk_copy_buf_to_iovs(args->iovs, args->iovcnt, buf, args->op.rw.length);
1738 3 : __rw_done(req, 0);
1739 3 : } else {
1740 3 : spdk_copy_iovs_to_buf(buf, args->op.rw.length, args->iovs, args->iovcnt);
1741 6 : spdk_blob_io_write(args->file->blob, args->op.rw.channel,
1742 3 : args->op.rw.pin_buf,
1743 3 : args->op.rw.start_lba, args->op.rw.num_lba,
1744 3 : __rw_done, req);
1745 : }
1746 6 : }
1747 :
1748 : static void
1749 6 : __do_blob_read(void *ctx, int fserrno)
1750 : {
1751 6 : struct spdk_fs_request *req = ctx;
1752 6 : struct spdk_fs_cb_args *args = &req->args;
1753 :
1754 6 : if (fserrno) {
1755 0 : __rw_done(req, fserrno);
1756 0 : return;
1757 : }
1758 12 : spdk_blob_io_read(args->file->blob, args->op.rw.channel,
1759 6 : args->op.rw.pin_buf,
1760 6 : args->op.rw.start_lba, args->op.rw.num_lba,
1761 6 : __read_done, req);
1762 6 : }
1763 :
1764 : static void
1765 17 : __get_page_parameters(struct spdk_file *file, uint64_t offset, uint64_t length,
1766 : uint64_t *start_lba, uint32_t *lba_size, uint64_t *num_lba)
1767 : {
1768 17 : uint64_t end_lba;
1769 :
1770 17 : *lba_size = spdk_bs_get_io_unit_size(file->fs->bs);
1771 17 : *start_lba = offset / *lba_size;
1772 17 : end_lba = (offset + length - 1) / *lba_size;
1773 17 : *num_lba = (end_lba - *start_lba + 1);
1774 17 : }
1775 :
1776 : static bool
1777 1 : __is_lba_aligned(struct spdk_file *file, uint64_t offset, uint64_t length)
1778 : {
1779 1 : uint32_t lba_size = spdk_bs_get_io_unit_size(file->fs->bs);
1780 :
1781 1 : if ((offset % lba_size == 0) && (length % lba_size == 0)) {
1782 1 : return true;
1783 : }
1784 :
1785 0 : return false;
1786 1 : }
1787 :
1788 : static void
1789 7 : _fs_request_setup_iovs(struct spdk_fs_request *req, struct iovec *iovs, uint32_t iovcnt)
1790 : {
1791 7 : uint32_t i;
1792 :
1793 18 : for (i = 0; i < iovcnt; i++) {
1794 11 : req->args.iovs[i].iov_base = iovs[i].iov_base;
1795 11 : req->args.iovs[i].iov_len = iovs[i].iov_len;
1796 11 : }
1797 7 : }
1798 :
1799 : static void
1800 7 : __readvwritev(struct spdk_file *file, struct spdk_io_channel *_channel,
1801 : struct iovec *iovs, uint32_t iovcnt, uint64_t offset, uint64_t length,
1802 : spdk_file_op_complete cb_fn, void *cb_arg, int is_read)
1803 : {
1804 7 : struct spdk_fs_request *req;
1805 7 : struct spdk_fs_cb_args *args;
1806 7 : struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel);
1807 7 : uint64_t start_lba, num_lba, pin_buf_length;
1808 7 : uint32_t lba_size;
1809 :
1810 7 : if (is_read && offset + length > file->length) {
1811 0 : cb_fn(cb_arg, -EINVAL);
1812 0 : return;
1813 : }
1814 :
1815 7 : req = alloc_fs_request_with_iov(channel, iovcnt);
1816 7 : if (req == NULL) {
1817 0 : cb_fn(cb_arg, -ENOMEM);
1818 0 : return;
1819 : }
1820 :
1821 7 : __get_page_parameters(file, offset, length, &start_lba, &lba_size, &num_lba);
1822 :
1823 7 : args = &req->args;
1824 7 : args->fn.file_op = cb_fn;
1825 7 : args->arg = cb_arg;
1826 7 : args->file = file;
1827 7 : args->op.rw.channel = channel->bs_channel;
1828 7 : _fs_request_setup_iovs(req, iovs, iovcnt);
1829 7 : args->op.rw.is_read = is_read;
1830 7 : args->op.rw.offset = offset;
1831 7 : args->op.rw.blocklen = lba_size;
1832 :
1833 7 : pin_buf_length = num_lba * lba_size;
1834 7 : args->op.rw.length = pin_buf_length;
1835 7 : args->op.rw.pin_buf = spdk_malloc(pin_buf_length, lba_size, NULL,
1836 : SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
1837 7 : if (args->op.rw.pin_buf == NULL) {
1838 0 : SPDK_DEBUGLOG(blobfs, "Failed to allocate buf for: file=%s offset=%jx length=%jx\n",
1839 : file->name, offset, length);
1840 0 : free_fs_request(req);
1841 0 : cb_fn(cb_arg, -ENOMEM);
1842 0 : return;
1843 : }
1844 :
1845 7 : args->op.rw.start_lba = start_lba;
1846 7 : args->op.rw.num_lba = num_lba;
1847 :
1848 7 : if (!is_read && file->length < offset + length) {
1849 3 : spdk_file_truncate_async(file, offset + length, __do_blob_read, req);
1850 7 : } else if (!is_read && __is_lba_aligned(file, offset, length)) {
1851 1 : spdk_copy_iovs_to_buf(args->op.rw.pin_buf, args->op.rw.length, args->iovs, args->iovcnt);
1852 2 : spdk_blob_io_write(args->file->blob, args->op.rw.channel,
1853 1 : args->op.rw.pin_buf,
1854 1 : args->op.rw.start_lba, args->op.rw.num_lba,
1855 1 : __rw_done, req);
1856 1 : } else {
1857 3 : __do_blob_read(req, 0);
1858 : }
1859 7 : }
1860 :
1861 : static void
1862 3 : __readwrite(struct spdk_file *file, struct spdk_io_channel *channel,
1863 : void *payload, uint64_t offset, uint64_t length,
1864 : spdk_file_op_complete cb_fn, void *cb_arg, int is_read)
1865 : {
1866 3 : struct iovec iov;
1867 :
1868 3 : iov.iov_base = payload;
1869 3 : iov.iov_len = (size_t)length;
1870 :
1871 3 : __readvwritev(file, channel, &iov, 1, offset, length, cb_fn, cb_arg, is_read);
1872 3 : }
1873 :
1874 : void
1875 2 : spdk_file_write_async(struct spdk_file *file, struct spdk_io_channel *channel,
1876 : void *payload, uint64_t offset, uint64_t length,
1877 : spdk_file_op_complete cb_fn, void *cb_arg)
1878 : {
1879 2 : __readwrite(file, channel, payload, offset, length, cb_fn, cb_arg, 0);
1880 2 : }
1881 :
1882 : void
1883 2 : spdk_file_writev_async(struct spdk_file *file, struct spdk_io_channel *channel,
1884 : struct iovec *iovs, uint32_t iovcnt, uint64_t offset, uint64_t length,
1885 : spdk_file_op_complete cb_fn, void *cb_arg)
1886 : {
1887 2 : SPDK_DEBUGLOG(blobfs, "file=%s offset=%jx length=%jx\n",
1888 : file->name, offset, length);
1889 :
1890 2 : __readvwritev(file, channel, iovs, iovcnt, offset, length, cb_fn, cb_arg, 0);
1891 2 : }
1892 :
1893 : void
1894 1 : spdk_file_read_async(struct spdk_file *file, struct spdk_io_channel *channel,
1895 : void *payload, uint64_t offset, uint64_t length,
1896 : spdk_file_op_complete cb_fn, void *cb_arg)
1897 : {
1898 1 : SPDK_DEBUGLOG(blobfs, "file=%s offset=%jx length=%jx\n",
1899 : file->name, offset, length);
1900 :
1901 1 : __readwrite(file, channel, payload, offset, length, cb_fn, cb_arg, 1);
1902 1 : }
1903 :
1904 : void
1905 2 : spdk_file_readv_async(struct spdk_file *file, struct spdk_io_channel *channel,
1906 : struct iovec *iovs, uint32_t iovcnt, uint64_t offset, uint64_t length,
1907 : spdk_file_op_complete cb_fn, void *cb_arg)
1908 : {
1909 2 : SPDK_DEBUGLOG(blobfs, "file=%s offset=%jx length=%jx\n",
1910 : file->name, offset, length);
1911 :
1912 2 : __readvwritev(file, channel, iovs, iovcnt, offset, length, cb_fn, cb_arg, 1);
1913 2 : }
1914 :
1915 : struct spdk_io_channel *
1916 1 : spdk_fs_alloc_io_channel(struct spdk_filesystem *fs)
1917 : {
1918 1 : struct spdk_io_channel *io_channel;
1919 1 : struct spdk_fs_channel *fs_channel;
1920 :
1921 1 : io_channel = spdk_get_io_channel(&fs->io_target);
1922 1 : fs_channel = spdk_io_channel_get_ctx(io_channel);
1923 1 : fs_channel->bs_channel = spdk_bs_alloc_io_channel(fs->bs);
1924 1 : fs_channel->send_request = __send_request_direct;
1925 :
1926 2 : return io_channel;
1927 1 : }
1928 :
1929 : void
1930 41 : spdk_fs_free_io_channel(struct spdk_io_channel *channel)
1931 : {
1932 41 : spdk_put_io_channel(channel);
1933 41 : }
1934 :
1935 : struct spdk_fs_thread_ctx *
1936 12 : spdk_fs_alloc_thread_ctx(struct spdk_filesystem *fs)
1937 : {
1938 12 : struct spdk_fs_thread_ctx *ctx;
1939 :
1940 12 : ctx = calloc(1, sizeof(*ctx));
1941 12 : if (!ctx) {
1942 0 : return NULL;
1943 : }
1944 :
1945 12 : if (pthread_spin_init(&ctx->ch.lock, 0)) {
1946 0 : free(ctx);
1947 0 : return NULL;
1948 : }
1949 :
1950 12 : fs_channel_create(fs, &ctx->ch, 512);
1951 :
1952 12 : ctx->ch.send_request = fs->send_request;
1953 12 : ctx->ch.sync = 1;
1954 :
1955 12 : return ctx;
1956 12 : }
1957 :
1958 :
1959 : void
1960 12 : spdk_fs_free_thread_ctx(struct spdk_fs_thread_ctx *ctx)
1961 : {
1962 12 : assert(ctx->ch.sync == 1);
1963 :
1964 12 : while (true) {
1965 12 : pthread_spin_lock(&ctx->ch.lock);
1966 12 : if (ctx->ch.outstanding_reqs == 0) {
1967 12 : pthread_spin_unlock(&ctx->ch.lock);
1968 12 : break;
1969 : }
1970 0 : pthread_spin_unlock(&ctx->ch.lock);
1971 0 : usleep(1000);
1972 : }
1973 :
1974 12 : fs_channel_destroy(NULL, &ctx->ch);
1975 12 : free(ctx);
1976 12 : }
1977 :
1978 : int
1979 0 : spdk_fs_set_cache_size(uint64_t size_in_mb)
1980 : {
1981 : /* setting g_fs_cache_size is only permitted if cache pool
1982 : * is already freed or hasn't been initialized
1983 : */
1984 0 : if (g_cache_pool != NULL) {
1985 0 : return -EPERM;
1986 : }
1987 :
1988 0 : g_fs_cache_size = size_in_mb * 1024 * 1024;
1989 :
1990 0 : return 0;
1991 0 : }
1992 :
1993 : uint64_t
1994 0 : spdk_fs_get_cache_size(void)
1995 : {
1996 0 : return g_fs_cache_size / (1024 * 1024);
1997 : }
1998 :
1999 : static void __file_flush(void *ctx);
2000 :
2001 : /* Try to free some cache buffers from this file.
2002 : */
2003 : static int
2004 0 : reclaim_cache_buffers(struct spdk_file *file)
2005 : {
2006 0 : int rc;
2007 :
2008 0 : BLOBFS_TRACE(file, "free=%s\n", file->name);
2009 :
2010 : /* The function is safe to be called with any threads, while the file
2011 : * lock maybe locked by other thread for now, so try to get the file
2012 : * lock here.
2013 : */
2014 0 : rc = pthread_spin_trylock(&file->lock);
2015 0 : if (rc != 0) {
2016 0 : return -1;
2017 : }
2018 :
2019 0 : if (file->tree->present_mask == 0) {
2020 0 : pthread_spin_unlock(&file->lock);
2021 0 : return -1;
2022 : }
2023 0 : tree_free_buffers(file->tree);
2024 :
2025 0 : TAILQ_REMOVE(&g_caches, file, cache_tailq);
2026 : /* If not freed, put it in the end of the queue */
2027 0 : if (file->tree->present_mask != 0) {
2028 0 : TAILQ_INSERT_TAIL(&g_caches, file, cache_tailq);
2029 0 : }
2030 :
2031 : /* tree_free_buffers() may have freed the buffer pointed to by file->last.
2032 : * So check if current append_pos is still in the cache, and if not, clear
2033 : * file->last.
2034 : */
2035 0 : if (tree_find_buffer(file->tree, file->append_pos) == NULL) {
2036 0 : file->last = NULL;
2037 0 : }
2038 :
2039 0 : pthread_spin_unlock(&file->lock);
2040 :
2041 0 : return 0;
2042 0 : }
2043 :
2044 : static int
2045 0 : _blobfs_cache_pool_reclaim(void *arg)
2046 : {
2047 0 : struct spdk_file *file, *tmp;
2048 0 : int rc;
2049 :
2050 0 : if (!blobfs_cache_pool_need_reclaim()) {
2051 0 : return SPDK_POLLER_IDLE;
2052 : }
2053 :
2054 0 : TAILQ_FOREACH_SAFE(file, &g_caches, cache_tailq, tmp) {
2055 0 : if (!file->open_for_writing &&
2056 0 : file->priority == SPDK_FILE_PRIORITY_LOW) {
2057 0 : rc = reclaim_cache_buffers(file);
2058 0 : if (rc < 0) {
2059 0 : continue;
2060 : }
2061 0 : if (!blobfs_cache_pool_need_reclaim()) {
2062 0 : return SPDK_POLLER_BUSY;
2063 : }
2064 0 : break;
2065 : }
2066 0 : }
2067 :
2068 0 : TAILQ_FOREACH_SAFE(file, &g_caches, cache_tailq, tmp) {
2069 0 : if (!file->open_for_writing) {
2070 0 : rc = reclaim_cache_buffers(file);
2071 0 : if (rc < 0) {
2072 0 : continue;
2073 : }
2074 0 : if (!blobfs_cache_pool_need_reclaim()) {
2075 0 : return SPDK_POLLER_BUSY;
2076 : }
2077 0 : break;
2078 : }
2079 0 : }
2080 :
2081 0 : TAILQ_FOREACH_SAFE(file, &g_caches, cache_tailq, tmp) {
2082 0 : rc = reclaim_cache_buffers(file);
2083 0 : if (rc < 0) {
2084 0 : continue;
2085 : }
2086 0 : break;
2087 : }
2088 :
2089 0 : return SPDK_POLLER_BUSY;
2090 0 : }
2091 :
2092 : static void
2093 5 : _add_file_to_cache_pool(void *ctx)
2094 : {
2095 5 : struct spdk_file *file = ctx;
2096 :
2097 5 : TAILQ_INSERT_TAIL(&g_caches, file, cache_tailq);
2098 5 : }
2099 :
2100 : static void
2101 0 : _remove_file_from_cache_pool(void *ctx)
2102 : {
2103 0 : struct spdk_file *file = ctx;
2104 :
2105 0 : TAILQ_REMOVE(&g_caches, file, cache_tailq);
2106 0 : }
2107 :
2108 : static struct cache_buffer *
2109 10 : cache_insert_buffer(struct spdk_file *file, uint64_t offset)
2110 : {
2111 10 : struct cache_buffer *buf;
2112 10 : int count = 0;
2113 10 : bool need_update = false;
2114 :
2115 10 : buf = calloc(1, sizeof(*buf));
2116 10 : if (buf == NULL) {
2117 0 : SPDK_DEBUGLOG(blobfs, "calloc failed\n");
2118 0 : return NULL;
2119 : }
2120 :
2121 10 : do {
2122 10 : buf->buf = spdk_mempool_get(g_cache_pool);
2123 10 : if (buf->buf) {
2124 10 : break;
2125 : }
2126 0 : if (count++ == 100) {
2127 0 : SPDK_ERRLOG("Could not allocate cache buffer for file=%p on offset=%jx\n",
2128 : file, offset);
2129 0 : free(buf);
2130 0 : return NULL;
2131 : }
2132 0 : usleep(BLOBFS_CACHE_POOL_POLL_PERIOD_IN_US);
2133 0 : } while (true);
2134 :
2135 10 : buf->buf_size = CACHE_BUFFER_SIZE;
2136 10 : buf->offset = offset;
2137 :
2138 10 : if (file->tree->present_mask == 0) {
2139 5 : need_update = true;
2140 5 : }
2141 10 : file->tree = tree_insert_buffer(file->tree, buf);
2142 :
2143 10 : if (need_update) {
2144 5 : spdk_thread_send_msg(g_cache_pool_thread, _add_file_to_cache_pool, file);
2145 5 : }
2146 :
2147 10 : return buf;
2148 10 : }
2149 :
2150 : static struct cache_buffer *
2151 10 : cache_append_buffer(struct spdk_file *file)
2152 : {
2153 10 : struct cache_buffer *last;
2154 :
2155 10 : assert(file->last == NULL || file->last->bytes_filled == file->last->buf_size);
2156 10 : assert((file->append_pos % CACHE_BUFFER_SIZE) == 0);
2157 :
2158 10 : last = cache_insert_buffer(file, file->append_pos);
2159 10 : if (last == NULL) {
2160 0 : SPDK_DEBUGLOG(blobfs, "cache_insert_buffer failed\n");
2161 0 : return NULL;
2162 : }
2163 :
2164 10 : file->last = last;
2165 :
2166 10 : return last;
2167 10 : }
2168 :
2169 : static void __check_sync_reqs(struct spdk_file *file);
2170 :
2171 : static void
2172 7 : __file_cache_finish_sync(void *ctx, int bserrno)
2173 : {
2174 7 : struct spdk_file *file;
2175 7 : struct spdk_fs_request *sync_req = ctx;
2176 7 : struct spdk_fs_cb_args *sync_args;
2177 :
2178 7 : sync_args = &sync_req->args;
2179 7 : file = sync_args->file;
2180 7 : pthread_spin_lock(&file->lock);
2181 7 : file->length_xattr = sync_args->op.sync.length;
2182 7 : assert(sync_args->op.sync.offset <= file->length_flushed);
2183 7 : spdk_trace_record(TRACE_BLOBFS_XATTR_END, 0, sync_args->op.sync.offset,
2184 : 0, file->name);
2185 7 : BLOBFS_TRACE(file, "sync done offset=%jx\n", sync_args->op.sync.offset);
2186 7 : TAILQ_REMOVE(&file->sync_requests, sync_req, args.op.sync.tailq);
2187 7 : pthread_spin_unlock(&file->lock);
2188 :
2189 7 : sync_args->fn.file_op(sync_args->arg, bserrno);
2190 :
2191 7 : free_fs_request(sync_req);
2192 7 : __check_sync_reqs(file);
2193 7 : }
2194 :
2195 : static void
2196 25 : __check_sync_reqs(struct spdk_file *file)
2197 : {
2198 25 : struct spdk_fs_request *sync_req;
2199 :
2200 25 : pthread_spin_lock(&file->lock);
2201 :
2202 26 : TAILQ_FOREACH(sync_req, &file->sync_requests, args.op.sync.tailq) {
2203 15 : if (sync_req->args.op.sync.offset <= file->length_flushed) {
2204 14 : break;
2205 : }
2206 1 : }
2207 :
2208 25 : if (sync_req != NULL && !sync_req->args.op.sync.xattr_in_progress) {
2209 7 : BLOBFS_TRACE(file, "set xattr length 0x%jx\n", file->length_flushed);
2210 7 : sync_req->args.op.sync.xattr_in_progress = true;
2211 7 : sync_req->args.op.sync.length = file->length_flushed;
2212 7 : spdk_blob_set_xattr(file->blob, "length", &file->length_flushed,
2213 : sizeof(file->length_flushed));
2214 :
2215 7 : pthread_spin_unlock(&file->lock);
2216 7 : spdk_trace_record(TRACE_BLOBFS_XATTR_START, 0, file->length_flushed,
2217 : 0, file->name);
2218 7 : spdk_blob_sync_md(file->blob, __file_cache_finish_sync, sync_req);
2219 7 : } else {
2220 18 : pthread_spin_unlock(&file->lock);
2221 : }
2222 25 : }
2223 :
2224 : static void
2225 10 : __file_flush_done(void *ctx, int bserrno)
2226 : {
2227 10 : struct spdk_fs_request *req = ctx;
2228 10 : struct spdk_fs_cb_args *args = &req->args;
2229 10 : struct spdk_file *file = args->file;
2230 10 : struct cache_buffer *next = args->op.flush.cache_buffer;
2231 :
2232 10 : BLOBFS_TRACE(file, "length=%jx\n", args->op.flush.length);
2233 :
2234 10 : pthread_spin_lock(&file->lock);
2235 10 : next->in_progress = false;
2236 10 : next->bytes_flushed += args->op.flush.length;
2237 10 : file->length_flushed += args->op.flush.length;
2238 10 : if (file->length_flushed > file->length) {
2239 0 : file->length = file->length_flushed;
2240 0 : }
2241 10 : if (next->bytes_flushed == next->buf_size) {
2242 5 : BLOBFS_TRACE(file, "write buffer fully flushed 0x%jx\n", file->length_flushed);
2243 5 : next = tree_find_buffer(file->tree, file->length_flushed);
2244 5 : }
2245 :
2246 : /*
2247 : * Assert that there is no cached data that extends past the end of the underlying
2248 : * blob.
2249 : */
2250 10 : assert(next == NULL || next->offset < __file_get_blob_size(file) ||
2251 : next->bytes_filled == 0);
2252 :
2253 10 : pthread_spin_unlock(&file->lock);
2254 :
2255 10 : __check_sync_reqs(file);
2256 :
2257 10 : __file_flush(req);
2258 10 : }
2259 :
2260 : static void
2261 20 : __file_flush(void *ctx)
2262 : {
2263 20 : struct spdk_fs_request *req = ctx;
2264 20 : struct spdk_fs_cb_args *args = &req->args;
2265 20 : struct spdk_file *file = args->file;
2266 20 : struct cache_buffer *next;
2267 20 : uint64_t offset, length, start_lba, num_lba;
2268 20 : uint32_t lba_size;
2269 :
2270 20 : pthread_spin_lock(&file->lock);
2271 20 : next = tree_find_buffer(file->tree, file->length_flushed);
2272 33 : if (next == NULL || next->in_progress ||
2273 18 : ((next->bytes_filled < next->buf_size) && TAILQ_EMPTY(&file->sync_requests))) {
2274 : /*
2275 : * There is either no data to flush, a flush I/O is already in
2276 : * progress, or the next buffer is partially filled but there's no
2277 : * outstanding request to sync it.
2278 : * So return immediately - if a flush I/O is in progress we will flush
2279 : * more data after that is completed, or a partial buffer will get flushed
2280 : * when it is either filled or the file is synced.
2281 : */
2282 3 : free_fs_request(req);
2283 3 : if (next == NULL) {
2284 : /*
2285 : * For cases where a file's cache was evicted, and then the
2286 : * file was later appended, we will write the data directly
2287 : * to disk and bypass cache. So just update length_flushed
2288 : * here to reflect that all data was already written to disk.
2289 : */
2290 1 : file->length_flushed = file->append_pos;
2291 1 : }
2292 3 : pthread_spin_unlock(&file->lock);
2293 3 : if (next == NULL) {
2294 : /*
2295 : * There is no data to flush, but we still need to check for any
2296 : * outstanding sync requests to make sure metadata gets updated.
2297 : */
2298 1 : __check_sync_reqs(file);
2299 1 : }
2300 3 : return;
2301 : }
2302 :
2303 17 : offset = next->offset + next->bytes_flushed;
2304 17 : length = next->bytes_filled - next->bytes_flushed;
2305 17 : if (length == 0) {
2306 7 : free_fs_request(req);
2307 7 : pthread_spin_unlock(&file->lock);
2308 : /*
2309 : * There is no data to flush, but we still need to check for any
2310 : * outstanding sync requests to make sure metadata gets updated.
2311 : */
2312 7 : __check_sync_reqs(file);
2313 7 : return;
2314 : }
2315 10 : args->op.flush.length = length;
2316 10 : args->op.flush.cache_buffer = next;
2317 :
2318 10 : __get_page_parameters(file, offset, length, &start_lba, &lba_size, &num_lba);
2319 :
2320 10 : next->in_progress = true;
2321 10 : BLOBFS_TRACE(file, "offset=0x%jx length=0x%jx page start=0x%jx num=0x%jx\n",
2322 : offset, length, start_lba, num_lba);
2323 10 : pthread_spin_unlock(&file->lock);
2324 20 : spdk_blob_io_write(file->blob, file->fs->sync_target.sync_fs_channel->bs_channel,
2325 10 : next->buf + (start_lba * lba_size) - next->offset,
2326 10 : start_lba, num_lba, __file_flush_done, req);
2327 20 : }
2328 :
2329 : static void
2330 0 : __file_extend_done(void *arg, int bserrno)
2331 : {
2332 0 : struct spdk_fs_cb_args *args = arg;
2333 :
2334 0 : __wake_caller(args, bserrno);
2335 0 : }
2336 :
2337 : static void
2338 0 : __file_extend_resize_cb(void *_args, int bserrno)
2339 : {
2340 0 : struct spdk_fs_cb_args *args = _args;
2341 0 : struct spdk_file *file = args->file;
2342 :
2343 0 : if (bserrno) {
2344 0 : __wake_caller(args, bserrno);
2345 0 : return;
2346 : }
2347 :
2348 0 : spdk_blob_sync_md(file->blob, __file_extend_done, args);
2349 0 : }
2350 :
2351 : static void
2352 0 : __file_extend_blob(void *_args)
2353 : {
2354 0 : struct spdk_fs_cb_args *args = _args;
2355 0 : struct spdk_file *file = args->file;
2356 :
2357 0 : spdk_blob_resize(file->blob, args->op.resize.num_clusters, __file_extend_resize_cb, args);
2358 0 : }
2359 :
2360 : static void
2361 1 : __rw_from_file_done(void *ctx, int bserrno)
2362 : {
2363 1 : struct spdk_fs_request *req = ctx;
2364 :
2365 1 : __wake_caller(&req->args, bserrno);
2366 1 : free_fs_request(req);
2367 1 : }
2368 :
2369 : static void
2370 1 : __rw_from_file(void *ctx)
2371 : {
2372 1 : struct spdk_fs_request *req = ctx;
2373 1 : struct spdk_fs_cb_args *args = &req->args;
2374 1 : struct spdk_file *file = args->file;
2375 :
2376 1 : if (args->op.rw.is_read) {
2377 0 : spdk_file_read_async(file, file->fs->sync_target.sync_io_channel, args->iovs[0].iov_base,
2378 0 : args->op.rw.offset, (uint64_t)args->iovs[0].iov_len,
2379 0 : __rw_from_file_done, req);
2380 0 : } else {
2381 2 : spdk_file_write_async(file, file->fs->sync_target.sync_io_channel, args->iovs[0].iov_base,
2382 1 : args->op.rw.offset, (uint64_t)args->iovs[0].iov_len,
2383 1 : __rw_from_file_done, req);
2384 : }
2385 1 : }
2386 :
2387 : struct rw_from_file_arg {
2388 : struct spdk_fs_channel *channel;
2389 : int rwerrno;
2390 : };
2391 :
2392 : static int
2393 1 : __send_rw_from_file(struct spdk_file *file, void *payload,
2394 : uint64_t offset, uint64_t length, bool is_read,
2395 : struct rw_from_file_arg *arg)
2396 : {
2397 1 : struct spdk_fs_request *req;
2398 1 : struct spdk_fs_cb_args *args;
2399 :
2400 1 : req = alloc_fs_request_with_iov(arg->channel, 1);
2401 1 : if (req == NULL) {
2402 0 : sem_post(&arg->channel->sem);
2403 0 : return -ENOMEM;
2404 : }
2405 :
2406 1 : args = &req->args;
2407 1 : args->file = file;
2408 1 : args->sem = &arg->channel->sem;
2409 1 : args->iovs[0].iov_base = payload;
2410 1 : args->iovs[0].iov_len = (size_t)length;
2411 1 : args->op.rw.offset = offset;
2412 1 : args->op.rw.is_read = is_read;
2413 1 : args->rwerrno = &arg->rwerrno;
2414 1 : file->fs->send_request(__rw_from_file, req);
2415 1 : return 0;
2416 1 : }
2417 :
2418 : int
2419 11 : spdk_file_write(struct spdk_file *file, struct spdk_fs_thread_ctx *ctx,
2420 : void *payload, uint64_t offset, uint64_t length)
2421 : {
2422 11 : struct spdk_fs_channel *channel = (struct spdk_fs_channel *)ctx;
2423 11 : struct spdk_fs_request *flush_req;
2424 11 : uint64_t rem_length, copy, blob_size, cluster_sz;
2425 11 : uint32_t cache_buffers_filled = 0;
2426 11 : uint8_t *cur_payload;
2427 11 : struct cache_buffer *last;
2428 :
2429 11 : BLOBFS_TRACE_RW(file, "offset=%jx length=%jx\n", offset, length);
2430 :
2431 11 : if (length == 0) {
2432 1 : return 0;
2433 : }
2434 :
2435 10 : if (offset != file->append_pos) {
2436 0 : BLOBFS_TRACE(file, " error offset=%jx append_pos=%jx\n", offset, file->append_pos);
2437 0 : return -EINVAL;
2438 : }
2439 :
2440 10 : pthread_spin_lock(&file->lock);
2441 10 : file->open_for_writing = true;
2442 :
2443 10 : do {
2444 10 : if ((file->last == NULL) && (file->append_pos % CACHE_BUFFER_SIZE == 0)) {
2445 5 : cache_append_buffer(file);
2446 5 : }
2447 :
2448 10 : if (file->last == NULL) {
2449 1 : struct rw_from_file_arg arg = {};
2450 1 : int rc;
2451 :
2452 1 : arg.channel = channel;
2453 1 : arg.rwerrno = 0;
2454 1 : file->append_pos += length;
2455 1 : pthread_spin_unlock(&file->lock);
2456 1 : rc = __send_rw_from_file(file, payload, offset, length, false, &arg);
2457 1 : if (rc != 0) {
2458 0 : return rc;
2459 : }
2460 1 : sem_wait(&channel->sem);
2461 1 : return arg.rwerrno;
2462 1 : }
2463 :
2464 9 : blob_size = __file_get_blob_size(file);
2465 :
2466 9 : if ((offset + length) > blob_size) {
2467 0 : struct spdk_fs_cb_args extend_args = {};
2468 :
2469 0 : cluster_sz = file->fs->bs_opts.cluster_sz;
2470 0 : extend_args.sem = &channel->sem;
2471 0 : extend_args.op.resize.num_clusters = __bytes_to_clusters((offset + length), cluster_sz);
2472 0 : extend_args.file = file;
2473 0 : BLOBFS_TRACE(file, "start resize to %u clusters\n", extend_args.op.resize.num_clusters);
2474 0 : pthread_spin_unlock(&file->lock);
2475 0 : file->fs->send_request(__file_extend_blob, &extend_args);
2476 0 : sem_wait(&channel->sem);
2477 0 : if (extend_args.rc) {
2478 0 : return extend_args.rc;
2479 : }
2480 0 : pthread_spin_lock(&file->lock);
2481 0 : }
2482 9 : } while (file->last == NULL);
2483 :
2484 9 : flush_req = alloc_fs_request(channel);
2485 9 : if (flush_req == NULL) {
2486 0 : pthread_spin_unlock(&file->lock);
2487 0 : return -ENOMEM;
2488 : }
2489 :
2490 9 : last = file->last;
2491 9 : rem_length = length;
2492 9 : cur_payload = payload;
2493 22 : while (rem_length > 0) {
2494 13 : copy = last->buf_size - last->bytes_filled;
2495 13 : if (copy > rem_length) {
2496 8 : copy = rem_length;
2497 8 : }
2498 13 : BLOBFS_TRACE_RW(file, " fill offset=%jx length=%jx\n", file->append_pos, copy);
2499 13 : memcpy(&last->buf[last->bytes_filled], cur_payload, copy);
2500 13 : file->append_pos += copy;
2501 13 : if (file->length < file->append_pos) {
2502 12 : file->length = file->append_pos;
2503 12 : }
2504 13 : cur_payload += copy;
2505 13 : last->bytes_filled += copy;
2506 13 : rem_length -= copy;
2507 13 : if (last->bytes_filled == last->buf_size) {
2508 5 : cache_buffers_filled++;
2509 5 : last = cache_append_buffer(file);
2510 5 : if (last == NULL) {
2511 0 : BLOBFS_TRACE(file, "nomem\n");
2512 0 : free_fs_request(flush_req);
2513 0 : pthread_spin_unlock(&file->lock);
2514 0 : return -ENOMEM;
2515 : }
2516 5 : }
2517 : }
2518 :
2519 9 : pthread_spin_unlock(&file->lock);
2520 :
2521 9 : if (cache_buffers_filled == 0) {
2522 6 : free_fs_request(flush_req);
2523 6 : return 0;
2524 : }
2525 :
2526 3 : flush_req->args.file = file;
2527 3 : file->fs->send_request(__file_flush, flush_req);
2528 3 : return 0;
2529 11 : }
2530 :
2531 : static void
2532 0 : __readahead_done(void *ctx, int bserrno)
2533 : {
2534 0 : struct spdk_fs_request *req = ctx;
2535 0 : struct spdk_fs_cb_args *args = &req->args;
2536 0 : struct cache_buffer *cache_buffer = args->op.readahead.cache_buffer;
2537 0 : struct spdk_file *file = args->file;
2538 :
2539 0 : BLOBFS_TRACE(file, "offset=%jx\n", cache_buffer->offset);
2540 :
2541 0 : pthread_spin_lock(&file->lock);
2542 0 : cache_buffer->bytes_filled = args->op.readahead.length;
2543 0 : cache_buffer->bytes_flushed = args->op.readahead.length;
2544 0 : cache_buffer->in_progress = false;
2545 0 : pthread_spin_unlock(&file->lock);
2546 :
2547 0 : free_fs_request(req);
2548 0 : }
2549 :
2550 : static void
2551 0 : __readahead(void *ctx)
2552 : {
2553 0 : struct spdk_fs_request *req = ctx;
2554 0 : struct spdk_fs_cb_args *args = &req->args;
2555 0 : struct spdk_file *file = args->file;
2556 0 : uint64_t offset, length, start_lba, num_lba;
2557 0 : uint32_t lba_size;
2558 :
2559 0 : offset = args->op.readahead.offset;
2560 0 : length = args->op.readahead.length;
2561 0 : assert(length > 0);
2562 :
2563 0 : __get_page_parameters(file, offset, length, &start_lba, &lba_size, &num_lba);
2564 :
2565 0 : BLOBFS_TRACE(file, "offset=%jx length=%jx page start=%jx num=%jx\n",
2566 : offset, length, start_lba, num_lba);
2567 0 : spdk_blob_io_read(file->blob, file->fs->sync_target.sync_fs_channel->bs_channel,
2568 0 : args->op.readahead.cache_buffer->buf,
2569 0 : start_lba, num_lba, __readahead_done, req);
2570 0 : }
2571 :
2572 : static uint64_t
2573 0 : __next_cache_buffer_offset(uint64_t offset)
2574 : {
2575 0 : return (offset + CACHE_BUFFER_SIZE) & ~(CACHE_TREE_LEVEL_MASK(0));
2576 : }
2577 :
2578 : static void
2579 0 : check_readahead(struct spdk_file *file, uint64_t offset,
2580 : struct spdk_fs_channel *channel)
2581 : {
2582 0 : struct spdk_fs_request *req;
2583 0 : struct spdk_fs_cb_args *args;
2584 :
2585 0 : offset = __next_cache_buffer_offset(offset);
2586 0 : if (tree_find_buffer(file->tree, offset) != NULL || file->length <= offset) {
2587 0 : return;
2588 : }
2589 :
2590 0 : req = alloc_fs_request(channel);
2591 0 : if (req == NULL) {
2592 0 : return;
2593 : }
2594 0 : args = &req->args;
2595 :
2596 0 : BLOBFS_TRACE(file, "offset=%jx\n", offset);
2597 :
2598 0 : args->file = file;
2599 0 : args->op.readahead.offset = offset;
2600 0 : args->op.readahead.cache_buffer = cache_insert_buffer(file, offset);
2601 0 : if (!args->op.readahead.cache_buffer) {
2602 0 : BLOBFS_TRACE(file, "Cannot allocate buf for offset=%jx\n", offset);
2603 0 : free_fs_request(req);
2604 0 : return;
2605 : }
2606 :
2607 0 : args->op.readahead.cache_buffer->in_progress = true;
2608 0 : if (file->length < (offset + CACHE_BUFFER_SIZE)) {
2609 0 : args->op.readahead.length = file->length & (CACHE_BUFFER_SIZE - 1);
2610 0 : } else {
2611 0 : args->op.readahead.length = CACHE_BUFFER_SIZE;
2612 : }
2613 0 : file->fs->send_request(__readahead, req);
2614 0 : }
2615 :
2616 : int64_t
2617 1 : spdk_file_read(struct spdk_file *file, struct spdk_fs_thread_ctx *ctx,
2618 : void *payload, uint64_t offset, uint64_t length)
2619 : {
2620 1 : struct spdk_fs_channel *channel = (struct spdk_fs_channel *)ctx;
2621 1 : uint64_t final_offset, final_length;
2622 1 : uint32_t sub_reads = 0;
2623 1 : struct cache_buffer *buf;
2624 1 : uint64_t read_len;
2625 1 : struct rw_from_file_arg arg = {};
2626 :
2627 1 : pthread_spin_lock(&file->lock);
2628 :
2629 1 : BLOBFS_TRACE_RW(file, "offset=%ju length=%ju\n", offset, length);
2630 :
2631 1 : file->open_for_writing = false;
2632 :
2633 1 : if (length == 0 || offset >= file->append_pos) {
2634 0 : pthread_spin_unlock(&file->lock);
2635 0 : return 0;
2636 : }
2637 :
2638 1 : if (offset + length > file->append_pos) {
2639 0 : length = file->append_pos - offset;
2640 0 : }
2641 :
2642 1 : if (offset != file->next_seq_offset) {
2643 0 : file->seq_byte_count = 0;
2644 0 : }
2645 1 : file->seq_byte_count += length;
2646 1 : file->next_seq_offset = offset + length;
2647 1 : if (file->seq_byte_count >= CACHE_READAHEAD_THRESHOLD) {
2648 0 : check_readahead(file, offset, channel);
2649 0 : check_readahead(file, offset + CACHE_BUFFER_SIZE, channel);
2650 0 : }
2651 :
2652 1 : arg.channel = channel;
2653 1 : arg.rwerrno = 0;
2654 1 : final_length = 0;
2655 1 : final_offset = offset + length;
2656 2 : while (offset < final_offset) {
2657 1 : int ret = 0;
2658 1 : length = NEXT_CACHE_BUFFER_OFFSET(offset) - offset;
2659 1 : if (length > (final_offset - offset)) {
2660 1 : length = final_offset - offset;
2661 1 : }
2662 :
2663 1 : buf = tree_find_filled_buffer(file->tree, offset);
2664 1 : if (buf == NULL) {
2665 0 : pthread_spin_unlock(&file->lock);
2666 0 : ret = __send_rw_from_file(file, payload, offset, length, true, &arg);
2667 0 : pthread_spin_lock(&file->lock);
2668 0 : if (ret == 0) {
2669 0 : sub_reads++;
2670 0 : }
2671 0 : } else {
2672 1 : read_len = length;
2673 1 : if ((offset + length) > (buf->offset + buf->bytes_filled)) {
2674 0 : read_len = buf->offset + buf->bytes_filled - offset;
2675 0 : }
2676 1 : BLOBFS_TRACE(file, "read %p offset=%ju length=%ju\n", payload, offset, read_len);
2677 1 : memcpy(payload, &buf->buf[offset - buf->offset], read_len);
2678 1 : if ((offset + read_len) % CACHE_BUFFER_SIZE == 0) {
2679 0 : tree_remove_buffer(file->tree, buf);
2680 0 : if (file->tree->present_mask == 0) {
2681 0 : spdk_thread_send_msg(g_cache_pool_thread, _remove_file_from_cache_pool, file);
2682 0 : }
2683 0 : }
2684 : }
2685 :
2686 1 : if (ret == 0) {
2687 1 : final_length += length;
2688 1 : } else {
2689 0 : arg.rwerrno = ret;
2690 0 : break;
2691 : }
2692 1 : payload += length;
2693 1 : offset += length;
2694 1 : }
2695 1 : pthread_spin_unlock(&file->lock);
2696 1 : while (sub_reads > 0) {
2697 0 : sem_wait(&channel->sem);
2698 0 : sub_reads--;
2699 : }
2700 1 : if (arg.rwerrno == 0) {
2701 1 : return final_length;
2702 : } else {
2703 0 : return arg.rwerrno;
2704 : }
2705 1 : }
2706 :
2707 : static void
2708 18 : _file_sync(struct spdk_file *file, struct spdk_fs_channel *channel,
2709 : spdk_file_op_complete cb_fn, void *cb_arg)
2710 : {
2711 18 : struct spdk_fs_request *sync_req;
2712 18 : struct spdk_fs_request *flush_req;
2713 18 : struct spdk_fs_cb_args *sync_args;
2714 18 : struct spdk_fs_cb_args *flush_args;
2715 :
2716 18 : BLOBFS_TRACE(file, "offset=%jx\n", file->append_pos);
2717 :
2718 18 : pthread_spin_lock(&file->lock);
2719 18 : if (file->append_pos <= file->length_xattr) {
2720 11 : BLOBFS_TRACE(file, "done - file already synced\n");
2721 11 : pthread_spin_unlock(&file->lock);
2722 11 : cb_fn(cb_arg, 0);
2723 11 : return;
2724 : }
2725 :
2726 7 : sync_req = alloc_fs_request(channel);
2727 7 : if (!sync_req) {
2728 0 : SPDK_ERRLOG("Cannot allocate sync req for file=%s\n", file->name);
2729 0 : pthread_spin_unlock(&file->lock);
2730 0 : cb_fn(cb_arg, -ENOMEM);
2731 0 : return;
2732 : }
2733 7 : sync_args = &sync_req->args;
2734 :
2735 7 : flush_req = alloc_fs_request(channel);
2736 7 : if (!flush_req) {
2737 0 : SPDK_ERRLOG("Cannot allocate flush req for file=%s\n", file->name);
2738 0 : free_fs_request(sync_req);
2739 0 : pthread_spin_unlock(&file->lock);
2740 0 : cb_fn(cb_arg, -ENOMEM);
2741 0 : return;
2742 : }
2743 7 : flush_args = &flush_req->args;
2744 :
2745 7 : sync_args->file = file;
2746 7 : sync_args->fn.file_op = cb_fn;
2747 7 : sync_args->arg = cb_arg;
2748 7 : sync_args->op.sync.offset = file->append_pos;
2749 7 : sync_args->op.sync.xattr_in_progress = false;
2750 7 : TAILQ_INSERT_TAIL(&file->sync_requests, sync_req, args.op.sync.tailq);
2751 7 : pthread_spin_unlock(&file->lock);
2752 :
2753 7 : flush_args->file = file;
2754 7 : channel->send_request(__file_flush, flush_req);
2755 18 : }
2756 :
2757 : int
2758 12 : spdk_file_sync(struct spdk_file *file, struct spdk_fs_thread_ctx *ctx)
2759 : {
2760 12 : struct spdk_fs_channel *channel = (struct spdk_fs_channel *)ctx;
2761 12 : struct spdk_fs_cb_args args = {};
2762 :
2763 12 : args.sem = &channel->sem;
2764 12 : _file_sync(file, channel, __wake_caller, &args);
2765 12 : sem_wait(&channel->sem);
2766 :
2767 12 : return args.rc;
2768 12 : }
2769 :
2770 : void
2771 6 : spdk_file_sync_async(struct spdk_file *file, struct spdk_io_channel *_channel,
2772 : spdk_file_op_complete cb_fn, void *cb_arg)
2773 : {
2774 6 : struct spdk_fs_channel *channel = spdk_io_channel_get_ctx(_channel);
2775 :
2776 6 : _file_sync(file, channel, cb_fn, cb_arg);
2777 6 : }
2778 :
2779 : void
2780 0 : spdk_file_set_priority(struct spdk_file *file, uint32_t priority)
2781 : {
2782 0 : BLOBFS_TRACE(file, "priority=%u\n", priority);
2783 0 : file->priority = priority;
2784 :
2785 0 : }
2786 :
2787 : /*
2788 : * Close routines
2789 : */
2790 :
2791 : static void
2792 17 : __file_close_async_done(void *ctx, int bserrno)
2793 : {
2794 17 : struct spdk_fs_request *req = ctx;
2795 17 : struct spdk_fs_cb_args *args = &req->args;
2796 17 : struct spdk_file *file = args->file;
2797 :
2798 17 : spdk_trace_record(TRACE_BLOBFS_CLOSE, 0, 0, 0, file->name);
2799 :
2800 17 : if (file->is_deleted) {
2801 2 : spdk_fs_delete_file_async(file->fs, file->name, blob_delete_cb, ctx);
2802 2 : return;
2803 : }
2804 :
2805 15 : args->fn.file_op(args->arg, bserrno);
2806 15 : free_fs_request(req);
2807 17 : }
2808 :
2809 : static void
2810 17 : __file_close_async(struct spdk_file *file, struct spdk_fs_request *req)
2811 : {
2812 17 : struct spdk_blob *blob;
2813 :
2814 17 : pthread_spin_lock(&file->lock);
2815 17 : if (file->ref_count == 0) {
2816 0 : pthread_spin_unlock(&file->lock);
2817 0 : __file_close_async_done(req, -EBADF);
2818 0 : return;
2819 : }
2820 :
2821 17 : file->ref_count--;
2822 17 : if (file->ref_count > 0) {
2823 0 : pthread_spin_unlock(&file->lock);
2824 0 : req->args.fn.file_op(req->args.arg, 0);
2825 0 : free_fs_request(req);
2826 0 : return;
2827 : }
2828 :
2829 17 : pthread_spin_unlock(&file->lock);
2830 :
2831 17 : blob = file->blob;
2832 17 : file->blob = NULL;
2833 17 : spdk_blob_close(blob, __file_close_async_done, req);
2834 17 : }
2835 :
2836 : static void
2837 6 : __file_close_async__sync_done(void *arg, int fserrno)
2838 : {
2839 6 : struct spdk_fs_request *req = arg;
2840 6 : struct spdk_fs_cb_args *args = &req->args;
2841 :
2842 6 : __file_close_async(args->file, req);
2843 6 : }
2844 :
2845 : void
2846 6 : spdk_file_close_async(struct spdk_file *file, spdk_file_op_complete cb_fn, void *cb_arg)
2847 : {
2848 6 : struct spdk_fs_request *req;
2849 6 : struct spdk_fs_cb_args *args;
2850 :
2851 6 : req = alloc_fs_request(file->fs->md_target.md_fs_channel);
2852 6 : if (req == NULL) {
2853 0 : SPDK_ERRLOG("Cannot allocate close async req for file=%s\n", file->name);
2854 0 : cb_fn(cb_arg, -ENOMEM);
2855 0 : return;
2856 : }
2857 :
2858 6 : args = &req->args;
2859 6 : args->file = file;
2860 6 : args->fn.file_op = cb_fn;
2861 6 : args->arg = cb_arg;
2862 :
2863 6 : spdk_file_sync_async(file, file->fs->md_target.md_io_channel, __file_close_async__sync_done, req);
2864 6 : }
2865 :
2866 : static void
2867 11 : __file_close(void *arg)
2868 : {
2869 11 : struct spdk_fs_request *req = arg;
2870 11 : struct spdk_fs_cb_args *args = &req->args;
2871 11 : struct spdk_file *file = args->file;
2872 :
2873 11 : __file_close_async(file, req);
2874 11 : }
2875 :
2876 : int
2877 11 : spdk_file_close(struct spdk_file *file, struct spdk_fs_thread_ctx *ctx)
2878 : {
2879 11 : struct spdk_fs_channel *channel = (struct spdk_fs_channel *)ctx;
2880 11 : struct spdk_fs_request *req;
2881 11 : struct spdk_fs_cb_args *args;
2882 :
2883 11 : req = alloc_fs_request(channel);
2884 11 : if (req == NULL) {
2885 0 : SPDK_ERRLOG("Cannot allocate close req for file=%s\n", file->name);
2886 0 : return -ENOMEM;
2887 : }
2888 :
2889 11 : args = &req->args;
2890 :
2891 11 : spdk_file_sync(file, ctx);
2892 11 : BLOBFS_TRACE(file, "name=%s\n", file->name);
2893 11 : args->file = file;
2894 11 : args->sem = &channel->sem;
2895 11 : args->fn.file_op = __wake_caller;
2896 11 : args->arg = args;
2897 11 : channel->send_request(__file_close, req);
2898 11 : sem_wait(&channel->sem);
2899 :
2900 11 : return args->rc;
2901 11 : }
2902 :
2903 : int
2904 0 : spdk_file_get_id(struct spdk_file *file, void *id, size_t size)
2905 : {
2906 0 : if (size < sizeof(spdk_blob_id)) {
2907 0 : return -EINVAL;
2908 : }
2909 :
2910 0 : memcpy(id, &file->blobid, sizeof(spdk_blob_id));
2911 :
2912 0 : return sizeof(spdk_blob_id);
2913 0 : }
2914 :
2915 : static void
2916 5 : _file_free(void *ctx)
2917 : {
2918 5 : struct spdk_file *file = ctx;
2919 :
2920 5 : TAILQ_REMOVE(&g_caches, file, cache_tailq);
2921 :
2922 5 : free(file->name);
2923 5 : free(file->tree);
2924 5 : free(file);
2925 5 : }
2926 :
2927 : static void
2928 18 : file_free(struct spdk_file *file)
2929 : {
2930 18 : BLOBFS_TRACE(file, "free=%s\n", file->name);
2931 18 : pthread_spin_lock(&file->lock);
2932 18 : if (file->tree->present_mask == 0) {
2933 13 : pthread_spin_unlock(&file->lock);
2934 13 : free(file->name);
2935 13 : free(file->tree);
2936 13 : free(file);
2937 13 : return;
2938 : }
2939 :
2940 5 : tree_free_buffers(file->tree);
2941 5 : assert(file->tree->present_mask == 0);
2942 5 : spdk_thread_send_msg(g_cache_pool_thread, _file_free, file);
2943 5 : pthread_spin_unlock(&file->lock);
2944 18 : }
2945 :
2946 2 : SPDK_LOG_REGISTER_COMPONENT(blobfs)
2947 2 : SPDK_LOG_REGISTER_COMPONENT(blobfs_rw)
|