target/ppc: introduce PMUEventType and PMU overflow timers
[qemu.git] / block / export / fuse.c
blob823c126d230f99de1d07d4c9321c2cacdaec80a5
1 /*
2 * Present a block device as a raw image through FUSE
4 * Copyright (c) 2020 Max Reitz <mreitz@redhat.com>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; under version 2 or later of the License.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 #define FUSE_USE_VERSION 31
21 #include "qemu/osdep.h"
22 #include "block/aio.h"
23 #include "block/block.h"
24 #include "block/export.h"
25 #include "block/fuse.h"
26 #include "block/qapi.h"
27 #include "qapi/error.h"
28 #include "qapi/qapi-commands-block.h"
29 #include "sysemu/block-backend.h"
31 #include <fuse.h>
32 #include <fuse_lowlevel.h>
34 #if defined(CONFIG_FALLOCATE_ZERO_RANGE)
35 #include <linux/falloc.h>
36 #endif
38 #ifdef __linux__
39 #include <linux/fs.h>
40 #endif
42 /* Prevent overly long bounce buffer allocations */
43 #define FUSE_MAX_BOUNCE_BYTES (MIN(BDRV_REQUEST_MAX_BYTES, 64 * 1024 * 1024))
46 typedef struct FuseExport {
47 BlockExport common;
49 struct fuse_session *fuse_session;
50 struct fuse_buf fuse_buf;
51 bool mounted, fd_handler_set_up;
53 char *mountpoint;
54 bool writable;
55 bool growable;
56 /* Whether allow_other was used as a mount option or not */
57 bool allow_other;
59 mode_t st_mode;
60 uid_t st_uid;
61 gid_t st_gid;
62 } FuseExport;
64 static GHashTable *exports;
65 static const struct fuse_lowlevel_ops fuse_ops;
67 static void fuse_export_shutdown(BlockExport *exp);
68 static void fuse_export_delete(BlockExport *exp);
70 static void init_exports_table(void);
72 static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
73 bool allow_other, Error **errp);
74 static void read_from_fuse_export(void *opaque);
76 static bool is_regular_file(const char *path, Error **errp);
79 static int fuse_export_create(BlockExport *blk_exp,
80 BlockExportOptions *blk_exp_args,
81 Error **errp)
83 FuseExport *exp = container_of(blk_exp, FuseExport, common);
84 BlockExportOptionsFuse *args = &blk_exp_args->u.fuse;
85 int ret;
87 assert(blk_exp_args->type == BLOCK_EXPORT_TYPE_FUSE);
89 /* For growable exports, take the RESIZE permission */
90 if (args->growable) {
91 uint64_t blk_perm, blk_shared_perm;
93 blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
95 ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
96 blk_shared_perm, errp);
97 if (ret < 0) {
98 return ret;
102 init_exports_table();
105 * It is important to do this check before calling is_regular_file() --
106 * that function will do a stat(), which we would have to handle if we
107 * already exported something on @mountpoint. But we cannot, because
108 * we are currently caught up here.
109 * (Note that ideally we would want to resolve relative paths here,
110 * but bdrv_make_absolute_filename() might do the wrong thing for
111 * paths that contain colons, and realpath() would resolve symlinks,
112 * which we do not want: The mount point is not going to be the
113 * symlink's destination, but the link itself.)
114 * So this will not catch all potential clashes, but hopefully at
115 * least the most common one of specifying exactly the same path
116 * string twice.
118 if (g_hash_table_contains(exports, args->mountpoint)) {
119 error_setg(errp, "There already is a FUSE export on '%s'",
120 args->mountpoint);
121 ret = -EEXIST;
122 goto fail;
125 if (!is_regular_file(args->mountpoint, errp)) {
126 ret = -EINVAL;
127 goto fail;
130 exp->mountpoint = g_strdup(args->mountpoint);
131 exp->writable = blk_exp_args->writable;
132 exp->growable = args->growable;
134 /* set default */
135 if (!args->has_allow_other) {
136 args->allow_other = FUSE_EXPORT_ALLOW_OTHER_AUTO;
139 exp->st_mode = S_IFREG | S_IRUSR;
140 if (exp->writable) {
141 exp->st_mode |= S_IWUSR;
143 exp->st_uid = getuid();
144 exp->st_gid = getgid();
146 if (args->allow_other == FUSE_EXPORT_ALLOW_OTHER_AUTO) {
147 /* Ignore errors on our first attempt */
148 ret = setup_fuse_export(exp, args->mountpoint, true, NULL);
149 exp->allow_other = ret == 0;
150 if (ret < 0) {
151 ret = setup_fuse_export(exp, args->mountpoint, false, errp);
153 } else {
154 exp->allow_other = args->allow_other == FUSE_EXPORT_ALLOW_OTHER_ON;
155 ret = setup_fuse_export(exp, args->mountpoint, exp->allow_other, errp);
157 if (ret < 0) {
158 goto fail;
161 return 0;
163 fail:
164 fuse_export_delete(blk_exp);
165 return ret;
169 * Allocates the global @exports hash table.
171 static void init_exports_table(void)
173 if (exports) {
174 return;
177 exports = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
181 * Create exp->fuse_session and mount it.
183 static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
184 bool allow_other, Error **errp)
186 const char *fuse_argv[4];
187 char *mount_opts;
188 struct fuse_args fuse_args;
189 int ret;
192 * max_read needs to match what fuse_init() sets.
193 * max_write need not be supplied.
195 mount_opts = g_strdup_printf("max_read=%zu,default_permissions%s",
196 FUSE_MAX_BOUNCE_BYTES,
197 allow_other ? ",allow_other" : "");
199 fuse_argv[0] = ""; /* Dummy program name */
200 fuse_argv[1] = "-o";
201 fuse_argv[2] = mount_opts;
202 fuse_argv[3] = NULL;
203 fuse_args = (struct fuse_args)FUSE_ARGS_INIT(3, (char **)fuse_argv);
205 exp->fuse_session = fuse_session_new(&fuse_args, &fuse_ops,
206 sizeof(fuse_ops), exp);
207 g_free(mount_opts);
208 if (!exp->fuse_session) {
209 error_setg(errp, "Failed to set up FUSE session");
210 ret = -EIO;
211 goto fail;
214 ret = fuse_session_mount(exp->fuse_session, mountpoint);
215 if (ret < 0) {
216 error_setg(errp, "Failed to mount FUSE session to export");
217 ret = -EIO;
218 goto fail;
220 exp->mounted = true;
222 g_hash_table_insert(exports, g_strdup(mountpoint), NULL);
224 aio_set_fd_handler(exp->common.ctx,
225 fuse_session_fd(exp->fuse_session), true,
226 read_from_fuse_export, NULL, NULL, exp);
227 exp->fd_handler_set_up = true;
229 return 0;
231 fail:
232 fuse_export_shutdown(&exp->common);
233 return ret;
237 * Callback to be invoked when the FUSE session FD can be read from.
238 * (This is basically the FUSE event loop.)
240 static void read_from_fuse_export(void *opaque)
242 FuseExport *exp = opaque;
243 int ret;
245 blk_exp_ref(&exp->common);
247 do {
248 ret = fuse_session_receive_buf(exp->fuse_session, &exp->fuse_buf);
249 } while (ret == -EINTR);
250 if (ret < 0) {
251 goto out;
254 fuse_session_process_buf(exp->fuse_session, &exp->fuse_buf);
256 out:
257 blk_exp_unref(&exp->common);
260 static void fuse_export_shutdown(BlockExport *blk_exp)
262 FuseExport *exp = container_of(blk_exp, FuseExport, common);
264 if (exp->fuse_session) {
265 fuse_session_exit(exp->fuse_session);
267 if (exp->fd_handler_set_up) {
268 aio_set_fd_handler(exp->common.ctx,
269 fuse_session_fd(exp->fuse_session), true,
270 NULL, NULL, NULL, NULL);
271 exp->fd_handler_set_up = false;
275 if (exp->mountpoint) {
277 * Safe to drop now, because we will not handle any requests
278 * for this export anymore anyway.
280 g_hash_table_remove(exports, exp->mountpoint);
284 static void fuse_export_delete(BlockExport *blk_exp)
286 FuseExport *exp = container_of(blk_exp, FuseExport, common);
288 if (exp->fuse_session) {
289 if (exp->mounted) {
290 fuse_session_unmount(exp->fuse_session);
293 fuse_session_destroy(exp->fuse_session);
296 free(exp->fuse_buf.mem);
297 g_free(exp->mountpoint);
301 * Check whether @path points to a regular file. If not, put an
302 * appropriate message into *errp.
304 static bool is_regular_file(const char *path, Error **errp)
306 struct stat statbuf;
307 int ret;
309 ret = stat(path, &statbuf);
310 if (ret < 0) {
311 error_setg_errno(errp, errno, "Failed to stat '%s'", path);
312 return false;
315 if (!S_ISREG(statbuf.st_mode)) {
316 error_setg(errp, "'%s' is not a regular file", path);
317 return false;
320 return true;
324 * A chance to set change some parameters supplied to FUSE_INIT.
326 static void fuse_init(void *userdata, struct fuse_conn_info *conn)
329 * MIN_NON_ZERO() would not be wrong here, but what we set here
330 * must equal what has been passed to fuse_session_new().
331 * Therefore, as long as max_read must be passed as a mount option
332 * (which libfuse claims will be changed at some point), we have
333 * to set max_read to a fixed value here.
335 conn->max_read = FUSE_MAX_BOUNCE_BYTES;
337 conn->max_write = MIN_NON_ZERO(BDRV_REQUEST_MAX_BYTES, conn->max_write);
341 * Let clients look up files. Always return ENOENT because we only
342 * care about the mountpoint itself.
344 static void fuse_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
346 fuse_reply_err(req, ENOENT);
350 * Let clients get file attributes (i.e., stat() the file).
352 static void fuse_getattr(fuse_req_t req, fuse_ino_t inode,
353 struct fuse_file_info *fi)
355 struct stat statbuf;
356 int64_t length, allocated_blocks;
357 time_t now = time(NULL);
358 FuseExport *exp = fuse_req_userdata(req);
360 length = blk_getlength(exp->common.blk);
361 if (length < 0) {
362 fuse_reply_err(req, -length);
363 return;
366 allocated_blocks = bdrv_get_allocated_file_size(blk_bs(exp->common.blk));
367 if (allocated_blocks <= 0) {
368 allocated_blocks = DIV_ROUND_UP(length, 512);
369 } else {
370 allocated_blocks = DIV_ROUND_UP(allocated_blocks, 512);
373 statbuf = (struct stat) {
374 .st_ino = inode,
375 .st_mode = exp->st_mode,
376 .st_nlink = 1,
377 .st_uid = exp->st_uid,
378 .st_gid = exp->st_gid,
379 .st_size = length,
380 .st_blksize = blk_bs(exp->common.blk)->bl.request_alignment,
381 .st_blocks = allocated_blocks,
382 .st_atime = now,
383 .st_mtime = now,
384 .st_ctime = now,
387 fuse_reply_attr(req, &statbuf, 1.);
390 static int fuse_do_truncate(const FuseExport *exp, int64_t size,
391 bool req_zero_write, PreallocMode prealloc)
393 uint64_t blk_perm, blk_shared_perm;
394 BdrvRequestFlags truncate_flags = 0;
395 int ret;
397 if (req_zero_write) {
398 truncate_flags |= BDRV_REQ_ZERO_WRITE;
401 /* Growable exports have a permanent RESIZE permission */
402 if (!exp->growable) {
403 blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
405 ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
406 blk_shared_perm, NULL);
407 if (ret < 0) {
408 return ret;
412 ret = blk_truncate(exp->common.blk, size, true, prealloc,
413 truncate_flags, NULL);
415 if (!exp->growable) {
416 /* Must succeed, because we are only giving up the RESIZE permission */
417 blk_set_perm(exp->common.blk, blk_perm, blk_shared_perm, &error_abort);
420 return ret;
424 * Let clients set file attributes. Only resizing and changing
425 * permissions (st_mode, st_uid, st_gid) is allowed.
426 * Changing permissions is only allowed as far as it will actually
427 * permit access: Read-only exports cannot be given +w, and exports
428 * without allow_other cannot be given a different UID or GID, and
429 * they cannot be given non-owner access.
431 static void fuse_setattr(fuse_req_t req, fuse_ino_t inode, struct stat *statbuf,
432 int to_set, struct fuse_file_info *fi)
434 FuseExport *exp = fuse_req_userdata(req);
435 int supported_attrs;
436 int ret;
438 supported_attrs = FUSE_SET_ATTR_SIZE | FUSE_SET_ATTR_MODE;
439 if (exp->allow_other) {
440 supported_attrs |= FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID;
443 if (to_set & ~supported_attrs) {
444 fuse_reply_err(req, ENOTSUP);
445 return;
448 /* Do some argument checks first before committing to anything */
449 if (to_set & FUSE_SET_ATTR_MODE) {
451 * Without allow_other, non-owners can never access the export, so do
452 * not allow setting permissions for them
454 if (!exp->allow_other &&
455 (statbuf->st_mode & (S_IRWXG | S_IRWXO)) != 0)
457 fuse_reply_err(req, EPERM);
458 return;
461 /* +w for read-only exports makes no sense, disallow it */
462 if (!exp->writable &&
463 (statbuf->st_mode & (S_IWUSR | S_IWGRP | S_IWOTH)) != 0)
465 fuse_reply_err(req, EROFS);
466 return;
470 if (to_set & FUSE_SET_ATTR_SIZE) {
471 if (!exp->writable) {
472 fuse_reply_err(req, EACCES);
473 return;
476 ret = fuse_do_truncate(exp, statbuf->st_size, true, PREALLOC_MODE_OFF);
477 if (ret < 0) {
478 fuse_reply_err(req, -ret);
479 return;
483 if (to_set & FUSE_SET_ATTR_MODE) {
484 /* Ignore FUSE-supplied file type, only change the mode */
485 exp->st_mode = (statbuf->st_mode & 07777) | S_IFREG;
488 if (to_set & FUSE_SET_ATTR_UID) {
489 exp->st_uid = statbuf->st_uid;
492 if (to_set & FUSE_SET_ATTR_GID) {
493 exp->st_gid = statbuf->st_gid;
496 fuse_getattr(req, inode, fi);
500 * Let clients open a file (i.e., the exported image).
502 static void fuse_open(fuse_req_t req, fuse_ino_t inode,
503 struct fuse_file_info *fi)
505 fuse_reply_open(req, fi);
509 * Handle client reads from the exported image.
511 static void fuse_read(fuse_req_t req, fuse_ino_t inode,
512 size_t size, off_t offset, struct fuse_file_info *fi)
514 FuseExport *exp = fuse_req_userdata(req);
515 int64_t length;
516 void *buf;
517 int ret;
519 /* Limited by max_read, should not happen */
520 if (size > FUSE_MAX_BOUNCE_BYTES) {
521 fuse_reply_err(req, EINVAL);
522 return;
526 * Clients will expect short reads at EOF, so we have to limit
527 * offset+size to the image length.
529 length = blk_getlength(exp->common.blk);
530 if (length < 0) {
531 fuse_reply_err(req, -length);
532 return;
535 if (offset + size > length) {
536 size = length - offset;
539 buf = qemu_try_blockalign(blk_bs(exp->common.blk), size);
540 if (!buf) {
541 fuse_reply_err(req, ENOMEM);
542 return;
545 ret = blk_pread(exp->common.blk, offset, buf, size);
546 if (ret >= 0) {
547 fuse_reply_buf(req, buf, size);
548 } else {
549 fuse_reply_err(req, -ret);
552 qemu_vfree(buf);
556 * Handle client writes to the exported image.
558 static void fuse_write(fuse_req_t req, fuse_ino_t inode, const char *buf,
559 size_t size, off_t offset, struct fuse_file_info *fi)
561 FuseExport *exp = fuse_req_userdata(req);
562 int64_t length;
563 int ret;
565 /* Limited by max_write, should not happen */
566 if (size > BDRV_REQUEST_MAX_BYTES) {
567 fuse_reply_err(req, EINVAL);
568 return;
571 if (!exp->writable) {
572 fuse_reply_err(req, EACCES);
573 return;
577 * Clients will expect short writes at EOF, so we have to limit
578 * offset+size to the image length.
580 length = blk_getlength(exp->common.blk);
581 if (length < 0) {
582 fuse_reply_err(req, -length);
583 return;
586 if (offset + size > length) {
587 if (exp->growable) {
588 ret = fuse_do_truncate(exp, offset + size, true, PREALLOC_MODE_OFF);
589 if (ret < 0) {
590 fuse_reply_err(req, -ret);
591 return;
593 } else {
594 size = length - offset;
598 ret = blk_pwrite(exp->common.blk, offset, buf, size, 0);
599 if (ret >= 0) {
600 fuse_reply_write(req, size);
601 } else {
602 fuse_reply_err(req, -ret);
607 * Let clients perform various fallocate() operations.
609 static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode,
610 off_t offset, off_t length,
611 struct fuse_file_info *fi)
613 FuseExport *exp = fuse_req_userdata(req);
614 int64_t blk_len;
615 int ret;
617 if (!exp->writable) {
618 fuse_reply_err(req, EACCES);
619 return;
622 blk_len = blk_getlength(exp->common.blk);
623 if (blk_len < 0) {
624 fuse_reply_err(req, -blk_len);
625 return;
628 if (mode & FALLOC_FL_KEEP_SIZE) {
629 length = MIN(length, blk_len - offset);
632 if (mode & FALLOC_FL_PUNCH_HOLE) {
633 if (!(mode & FALLOC_FL_KEEP_SIZE)) {
634 fuse_reply_err(req, EINVAL);
635 return;
638 do {
639 int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
641 ret = blk_pdiscard(exp->common.blk, offset, size);
642 offset += size;
643 length -= size;
644 } while (ret == 0 && length > 0);
646 #ifdef CONFIG_FALLOCATE_ZERO_RANGE
647 else if (mode & FALLOC_FL_ZERO_RANGE) {
648 if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) {
649 /* No need for zeroes, we are going to write them ourselves */
650 ret = fuse_do_truncate(exp, offset + length, false,
651 PREALLOC_MODE_OFF);
652 if (ret < 0) {
653 fuse_reply_err(req, -ret);
654 return;
658 do {
659 int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
661 ret = blk_pwrite_zeroes(exp->common.blk,
662 offset, size, 0);
663 offset += size;
664 length -= size;
665 } while (ret == 0 && length > 0);
667 #endif /* CONFIG_FALLOCATE_ZERO_RANGE */
668 else if (!mode) {
669 /* We can only fallocate at the EOF with a truncate */
670 if (offset < blk_len) {
671 fuse_reply_err(req, EOPNOTSUPP);
672 return;
675 if (offset > blk_len) {
676 /* No preallocation needed here */
677 ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF);
678 if (ret < 0) {
679 fuse_reply_err(req, -ret);
680 return;
684 ret = fuse_do_truncate(exp, offset + length, true,
685 PREALLOC_MODE_FALLOC);
686 } else {
687 ret = -EOPNOTSUPP;
690 fuse_reply_err(req, ret < 0 ? -ret : 0);
694 * Let clients fsync the exported image.
696 static void fuse_fsync(fuse_req_t req, fuse_ino_t inode, int datasync,
697 struct fuse_file_info *fi)
699 FuseExport *exp = fuse_req_userdata(req);
700 int ret;
702 ret = blk_flush(exp->common.blk);
703 fuse_reply_err(req, ret < 0 ? -ret : 0);
707 * Called before an FD to the exported image is closed. (libfuse
708 * notes this to be a way to return last-minute errors.)
710 static void fuse_flush(fuse_req_t req, fuse_ino_t inode,
711 struct fuse_file_info *fi)
713 fuse_fsync(req, inode, 1, fi);
716 #ifdef CONFIG_FUSE_LSEEK
718 * Let clients inquire allocation status.
720 static void fuse_lseek(fuse_req_t req, fuse_ino_t inode, off_t offset,
721 int whence, struct fuse_file_info *fi)
723 FuseExport *exp = fuse_req_userdata(req);
725 if (whence != SEEK_HOLE && whence != SEEK_DATA) {
726 fuse_reply_err(req, EINVAL);
727 return;
730 while (true) {
731 int64_t pnum;
732 int ret;
734 ret = bdrv_block_status_above(blk_bs(exp->common.blk), NULL,
735 offset, INT64_MAX, &pnum, NULL, NULL);
736 if (ret < 0) {
737 fuse_reply_err(req, -ret);
738 return;
741 if (!pnum && (ret & BDRV_BLOCK_EOF)) {
742 int64_t blk_len;
745 * If blk_getlength() rounds (e.g. by sectors), then the
746 * export length will be rounded, too. However,
747 * bdrv_block_status_above() may return EOF at unaligned
748 * offsets. We must not let this become visible and thus
749 * always simulate a hole between @offset (the real EOF)
750 * and @blk_len (the client-visible EOF).
753 blk_len = blk_getlength(exp->common.blk);
754 if (blk_len < 0) {
755 fuse_reply_err(req, -blk_len);
756 return;
759 if (offset > blk_len || whence == SEEK_DATA) {
760 fuse_reply_err(req, ENXIO);
761 } else {
762 fuse_reply_lseek(req, offset);
764 return;
767 if (ret & BDRV_BLOCK_DATA) {
768 if (whence == SEEK_DATA) {
769 fuse_reply_lseek(req, offset);
770 return;
772 } else {
773 if (whence == SEEK_HOLE) {
774 fuse_reply_lseek(req, offset);
775 return;
779 /* Safety check against infinite loops */
780 if (!pnum) {
781 fuse_reply_err(req, ENXIO);
782 return;
785 offset += pnum;
788 #endif
790 static const struct fuse_lowlevel_ops fuse_ops = {
791 .init = fuse_init,
792 .lookup = fuse_lookup,
793 .getattr = fuse_getattr,
794 .setattr = fuse_setattr,
795 .open = fuse_open,
796 .read = fuse_read,
797 .write = fuse_write,
798 .fallocate = fuse_fallocate,
799 .flush = fuse_flush,
800 .fsync = fuse_fsync,
801 #ifdef CONFIG_FUSE_LSEEK
802 .lseek = fuse_lseek,
803 #endif
806 const BlockExportDriver blk_exp_fuse = {
807 .type = BLOCK_EXPORT_TYPE_FUSE,
808 .instance_size = sizeof(FuseExport),
809 .create = fuse_export_create,
810 .delete = fuse_export_delete,
811 .request_shutdown = fuse_export_shutdown,