2 * Copyright (C) 2013-2020 Red Hat Inc.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * * Neither the name of Red Hat nor the names of its contributors may be
16 * used to endorse or promote products derived from this software without
17 * specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
22 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
26 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
27 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
42 #include <sys/types.h>
44 #include <sys/ioctl.h>
49 #if defined (__linux__) && !defined (FALLOC_FL_PUNCH_HOLE)
50 #include <linux/falloc.h> /* For FALLOC_FL_*, glibc < 2.18 */
53 #if defined (__linux__)
54 #include <linux/fs.h> /* For BLKZEROOUT */
57 #define NBDKIT_API_VERSION 2
59 #include <nbdkit-plugin.h>
62 #include "isaligned.h"
64 #ifndef HAVE_FDATASYNC
65 #define fdatasync fsync
68 static char *filename
= NULL
;
70 /* Any callbacks using lseek must be protected by this lock. */
71 static pthread_mutex_t lseek_lock
= PTHREAD_MUTEX_INITIALIZER
;
73 /* to enable: -D file.zero=1 */
79 return err
== ENOTSUP
|| err
== EOPNOTSUPP
;
88 /* Called for each key=value passed on the command line. This plugin
89 * only accepts file=<filename>, which is required.
92 file_config (const char *key
, const char *value
)
94 if (strcmp (key
, "file") == 0) {
95 /* See FILENAMES AND PATHS in nbdkit-plugin(3). */
97 filename
= nbdkit_realpath (value
);
101 else if (strcmp (key
, "rdelay") == 0 ||
102 strcmp (key
, "wdelay") == 0) {
103 nbdkit_error ("add --filter=delay on the command line");
107 nbdkit_error ("unknown parameter '%s'", key
);
114 /* Check the user did pass a file=<FILENAME> parameter. */
116 file_config_complete (void)
118 if (filename
== NULL
) {
119 nbdkit_error ("you must supply the file=<FILENAME> parameter "
120 "after the plugin name on the command line");
127 #define file_config_help \
128 "file=<FILENAME> (required) The filename to serve." \
130 /* Print some extra information about how the plugin was compiled. */
132 file_dump_plugin (void)
135 printf ("file_blksszget=yes\n");
138 printf ("file_blkzeroout=yes\n");
140 #ifdef FALLOC_FL_PUNCH_HOLE
141 printf ("file_falloc_fl_punch_hole=yes\n");
143 #ifdef FALLOC_FL_ZERO_RANGE
144 printf ("file_falloc_fl_zero_range=yes\n");
148 /* The per-connection handle. */
151 bool is_block_device
;
162 /* Create the per-connection handle. */
164 file_open (int readonly
)
170 h
= malloc (sizeof *h
);
172 nbdkit_error ("malloc: %m");
176 flags
= O_CLOEXEC
|O_NOCTTY
;
182 h
->fd
= open (filename
, flags
);
184 nbdkit_error ("open: %s: %m", filename
);
189 if (fstat (h
->fd
, &statbuf
) == -1) {
190 nbdkit_error ("fstat: %s: %m", filename
);
195 h
->is_block_device
= S_ISBLK (statbuf
.st_mode
);
196 h
->sector_size
= 4096; /* Start with safe guess */
199 if (h
->is_block_device
) {
200 if (ioctl (h
->fd
, BLKSSZGET
, &h
->sector_size
))
201 nbdkit_debug ("cannot get sector size: %s: %m", filename
);
205 #ifdef FALLOC_FL_PUNCH_HOLE
206 h
->can_punch_hole
= true;
208 h
->can_punch_hole
= false;
211 #ifdef FALLOC_FL_ZERO_RANGE
212 h
->can_zero_range
= true;
214 h
->can_zero_range
= false;
217 h
->can_fallocate
= true;
218 h
->can_zeroout
= h
->is_block_device
;
220 h
->can_extents
= false;
221 h
->init_sparse
= false;
222 h
->init_zero
= false;
224 if (!h
->is_block_device
) {
227 /* A simple test to see whether SEEK_DATA/SEEK_HOLE are likely to work on
228 * the current filesystem, and to see if the image is sparse or zero.
230 ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lseek_lock
);
231 r
= lseek (h
->fd
, 0, SEEK_DATA
);
233 if (errno
== ENXIO
) {
234 nbdkit_debug ("extents enabled, entire image is hole");
235 h
->can_extents
= true;
236 h
->init_sparse
= true;
239 nbdkit_debug ("extents disabled: lseek(SEEK_DATA): %m");
243 h
->can_extents
= true;
245 nbdkit_debug ("extents enabled, image includes hole before data");
246 h
->init_sparse
= true;
249 r
= lseek (h
->fd
, 0, SEEK_HOLE
);
251 nbdkit_debug ("extents disabled: lseek(SEEK_HOLE): %m");
252 h
->can_extents
= false;
254 else if (r
== statbuf
.st_size
) {
255 nbdkit_debug ("extents enabled, image currently all data");
258 nbdkit_debug ("extents enabled, image includes data before hole");
259 h
->init_sparse
= true;
269 /* Free up the per-connection handle. */
271 file_close (void *handle
)
273 struct handle
*h
= handle
;
279 #define THREAD_MODEL NBDKIT_THREAD_MODEL_PARALLEL
281 /* For block devices, stat->st_size is not the true size. The caller
282 * grabs the lseek_lock.
285 block_device_size (int fd
)
289 size
= lseek (fd
, 0, SEEK_END
);
291 nbdkit_error ("lseek (to find device size): %m");
298 /* Get the file size. */
300 file_get_size (void *handle
)
302 struct handle
*h
= handle
;
304 if (h
->is_block_device
) {
305 ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lseek_lock
);
306 return block_device_size (h
->fd
);
311 if (fstat (h
->fd
, &statbuf
) == -1) {
312 nbdkit_error ("fstat: %m");
316 return statbuf
.st_size
;
320 /* Allow multiple parallel connections from a single client. */
322 file_can_multi_conn (void *handle
)
328 file_can_trim (void *handle
)
330 /* Trim is advisory, but we prefer to advertise it only when we can
331 * actually (attempt to) punch holes. Since not all filesystems
332 * support all fallocate modes, it would be nice if we had a way
333 * from fpathconf() to definitively learn what will work on a given
334 * fd for a more precise answer; oh well. */
335 #ifdef FALLOC_FL_PUNCH_HOLE
343 file_can_fua (void *handle
)
345 return NBDKIT_FUA_NATIVE
;
349 file_can_cache (void *handle
)
351 /* Prefer posix_fadvise(), but letting nbdkit call .pread on our
352 * behalf also tends to work well for the local file system
355 #if HAVE_POSIX_FADVISE
356 return NBDKIT_FUA_NATIVE
;
358 return NBDKIT_FUA_EMULATE
;
362 /* Flush the file to disk. */
364 file_flush (void *handle
, uint32_t flags
)
366 struct handle
*h
= handle
;
368 if (fdatasync (h
->fd
) == -1) {
369 nbdkit_error ("fdatasync: %m");
376 /* Read data from the file. */
378 file_pread (void *handle
, void *buf
, uint32_t count
, uint64_t offset
,
381 struct handle
*h
= handle
;
384 ssize_t r
= pread (h
->fd
, buf
, count
, offset
);
386 nbdkit_error ("pread: %m");
390 nbdkit_error ("pread: unexpected end of file");
401 /* Write data to the file. */
403 file_pwrite (void *handle
, const void *buf
, uint32_t count
, uint64_t offset
,
406 struct handle
*h
= handle
;
409 ssize_t r
= pwrite (h
->fd
, buf
, count
, offset
);
411 nbdkit_error ("pwrite: %m");
419 if ((flags
& NBDKIT_FLAG_FUA
) && file_flush (handle
, 0) == -1)
425 #if defined (FALLOC_FL_PUNCH_HOLE) || defined (FALLOC_FL_ZERO_RANGE)
427 do_fallocate (int fd
, int mode
, off_t offset
, off_t len
)
429 int r
= fallocate (fd
, mode
, offset
, len
);
430 if (r
== -1 && errno
== ENODEV
) {
431 /* kernel 3.10 fails with ENODEV for block device. Kernel >= 4.9 fails
432 with EOPNOTSUPP in this case. Normalize errno to simplify callers. */
439 /* Write zeroes to the file. */
441 file_zero (void *handle
, uint32_t count
, uint64_t offset
, uint32_t flags
)
443 struct handle
*h
= handle
;
446 #ifdef FALLOC_FL_PUNCH_HOLE
447 if (h
->can_punch_hole
&& (flags
& NBDKIT_FLAG_MAY_TRIM
)) {
448 r
= do_fallocate (h
->fd
, FALLOC_FL_PUNCH_HOLE
| FALLOC_FL_KEEP_SIZE
,
452 nbdkit_debug ("h->can_punch_hole && may_trim: "
453 "zero succeeded using fallocate");
457 if (!is_enotsup (errno
)) {
458 nbdkit_error ("zero: %m");
462 h
->can_punch_hole
= false;
466 #ifdef FALLOC_FL_ZERO_RANGE
467 if (h
->can_zero_range
) {
468 r
= do_fallocate (h
->fd
, FALLOC_FL_ZERO_RANGE
, offset
, count
);
471 nbdkit_debug ("h->can_zero-range: "
472 "zero succeeded using fallocate");
476 if (!is_enotsup (errno
)) {
477 nbdkit_error ("zero: %m");
481 h
->can_zero_range
= false;
485 #ifdef FALLOC_FL_PUNCH_HOLE
486 /* If we can punch hole but may not trim, we can combine punching hole and
487 * fallocate to zero a range. This is expected to be more efficient than
488 * writing zeroes manually. */
489 if (h
->can_punch_hole
&& h
->can_fallocate
) {
490 r
= do_fallocate (h
->fd
, FALLOC_FL_PUNCH_HOLE
| FALLOC_FL_KEEP_SIZE
,
493 r
= do_fallocate (h
->fd
, 0, offset
, count
);
496 nbdkit_debug ("h->can_punch_hole && h->can_fallocate: "
497 "zero succeeded using fallocate");
501 if (!is_enotsup (errno
)) {
502 nbdkit_error ("zero: %m");
506 h
->can_fallocate
= false;
508 if (!is_enotsup (errno
)) {
509 nbdkit_error ("zero: %m");
513 h
->can_punch_hole
= false;
519 /* For aligned range and block device, we can use BLKZEROOUT. */
520 if (h
->can_zeroout
&& IS_ALIGNED (offset
| count
, h
->sector_size
)) {
521 uint64_t range
[2] = {offset
, count
};
523 r
= ioctl (h
->fd
, BLKZEROOUT
, &range
);
526 nbdkit_debug ("h->can_zeroout && IS_ALIGNED: "
527 "zero succeeded using BLKZEROOUT");
531 if (errno
!= ENOTTY
) {
532 nbdkit_error ("zero: %m");
536 h
->can_zeroout
= false;
540 /* Trigger a fall back to writing */
542 nbdkit_debug ("zero falling back to writing");
547 if ((flags
& NBDKIT_FLAG_FUA
) && file_flush (handle
, 0) == -1)
552 /* Punch a hole in the file. */
554 file_trim (void *handle
, uint32_t count
, uint64_t offset
, uint32_t flags
)
556 #ifdef FALLOC_FL_PUNCH_HOLE
557 struct handle
*h
= handle
;
560 if (h
->can_punch_hole
) {
561 r
= do_fallocate (h
->fd
, FALLOC_FL_PUNCH_HOLE
| FALLOC_FL_KEEP_SIZE
,
564 /* Trim is advisory; we don't care if it fails for anything other
565 * than EIO or EPERM. */
566 if (errno
== EPERM
|| errno
== EIO
) {
567 nbdkit_error ("fallocate: %m");
571 if (is_enotsup (EOPNOTSUPP
))
572 h
->can_punch_hole
= false;
574 nbdkit_debug ("ignoring failed fallocate during trim: %m");
579 if ((flags
& NBDKIT_FLAG_FUA
) && file_flush (handle
, 0) == -1)
589 file_can_extents (void *handle
)
591 struct handle
*h
= handle
;
593 return h
->can_extents
;
597 do_extents (void *handle
, uint32_t count
, uint64_t offset
,
598 uint32_t flags
, struct nbdkit_extents
*extents
)
600 struct handle
*h
= handle
;
601 const bool req_one
= flags
& NBDKIT_FLAG_REQ_ONE
;
602 uint64_t end
= offset
+ count
;
607 pos
= lseek (h
->fd
, offset
, SEEK_DATA
);
609 if (errno
== ENXIO
) {
610 /* The current man page does not describe this situation well,
611 * but a proposed change to POSIX adds these words for ENXIO:
612 * "or the whence argument is SEEK_DATA and the offset falls
613 * within the final hole of the file."
618 nbdkit_error ("lseek: SEEK_DATA: %" PRIu64
": %m", offset
);
623 /* We know there is a hole from offset to pos-1. */
625 if (nbdkit_add_extent (extents
, offset
, pos
- offset
,
626 NBDKIT_EXTENT_HOLE
| NBDKIT_EXTENT_ZERO
) == -1)
636 pos
= lseek (h
->fd
, offset
, SEEK_HOLE
);
638 nbdkit_error ("lseek: SEEK_HOLE: %" PRIu64
": %m", offset
);
642 /* We know there is data from offset to pos-1. */
644 if (nbdkit_add_extent (extents
, offset
, pos
- offset
,
645 0 /* allocated data */) == -1)
652 } while (offset
< end
);
658 file_extents (void *handle
, uint32_t count
, uint64_t offset
,
659 uint32_t flags
, struct nbdkit_extents
*extents
)
661 ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lseek_lock
);
662 return do_extents (handle
, count
, offset
, flags
, extents
);
667 file_init_sparse (void *handle
)
669 struct handle
*h
= handle
;
671 return h
->init_sparse
;
675 file_init_zero (void *handle
)
677 struct handle
*h
= handle
;
681 #endif /* SEEK_HOLE */
683 #if HAVE_POSIX_FADVISE
686 file_cache (void *handle
, uint32_t count
, uint64_t offset
, uint32_t flags
)
688 struct handle
*h
= handle
;
691 /* Cache is advisory, we don't care if this fails */
692 r
= posix_fadvise (h
->fd
, offset
, count
, POSIX_FADV_WILLNEED
);
695 nbdkit_error ("posix_fadvise: %m");
700 #endif /* HAVE_POSIX_FADVISE */
702 static struct nbdkit_plugin plugin
= {
704 .longname
= "nbdkit file plugin",
705 .version
= PACKAGE_VERSION
,
706 .unload
= file_unload
,
707 .config
= file_config
,
708 .config_complete
= file_config_complete
,
709 .config_help
= file_config_help
,
710 .magic_config_key
= "file",
711 .dump_plugin
= file_dump_plugin
,
714 .get_size
= file_get_size
,
715 .can_multi_conn
= file_can_multi_conn
,
716 .can_trim
= file_can_trim
,
717 .can_fua
= file_can_fua
,
718 .can_cache
= file_can_cache
,
720 .pwrite
= file_pwrite
,
725 .can_extents
= file_can_extents
,
726 .extents
= file_extents
,
727 .init_sparse
= file_init_sparse
,
728 .init_zero
= file_init_zero
,
730 #if HAVE_POSIX_FADVISE
733 .errno_is_preserved
= 1,
736 NBDKIT_REGISTER_PLUGIN(plugin
)