From fd3615548bd8d5f4662ac73ea2de66f2bb078202 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 2 Sep 2019 19:12:20 -0500 Subject: [PATCH] filters: Cache and change semantics of can_zero The tri-state return pattern has proved valuable in other cases where a backend wants to opt in or out of nbdkit fallbacks. Using a tri-state return pattern at the backend level unifies the different semantics between plugin and filter .can_zero return values, and makes it possible for plugins to use cached results of .can_zero rather than calling into the plugin on each .zero request. As in other recent patches, it is easy to write an sh script that demonstrates a resulting speedup from the caching. All filters are in-tree and do not have a promise of API compatability, so it's easy to update all of them (the log filter is okay as-is, the nozero filter is easy to update, and no other filter overrides .can_zero). But for backwards compatibility reasons, we cannot change the semantics of the plugin's return value for .can_zero while remaining at API version 2; so that remains a bool, and merely selects between EMULATE or NATIVE at the backend level. $ cat script case "$1" in get_size) echo 1m;; can_zero) sleep 1;; can_write | zero) ;; *) exit 2 ;; esac Pre-patch: $ /bin/time -f %e ./nbdkit -U - sh script \ --run 'qemu-io -f raw -c "w -z 0 512" -c "w -z 512 512" $uri >/dev/null' 3.10 Post-patch: $ /bin/time -f %e ./nbdkit -U - sh script \ --run 'qemu-io -f raw -c "w -z 0 512" -c "w -z 512 512" $uri >/dev/null' 1.08 Signed-off-by: Eric Blake --- docs/nbdkit-filter.pod | 44 +++++++++++++++++++++++++++++++++----------- filters/nozero/nozero.c | 9 ++++++++- include/nbdkit-filter.h | 4 ++++ server/backend.c | 15 +++++++++++---- server/internal.h | 2 +- server/plugins.c | 31 +++++++++++++++---------------- server/protocol-handshake.c | 4 +--- 7 files changed, 73 insertions(+), 36 deletions(-) diff --git a/docs/nbdkit-filter.pod b/docs/nbdkit-filter.pod index e4ab508d..912625a7 100644 --- a/docs/nbdkit-filter.pod +++ b/docs/nbdkit-filter.pod @@ -397,16 +397,37 @@ cached value. These intercept the corresponding plugin methods, and control feature bits advertised to the client. -Of note, the C<.can_zero> callback in the filter controls whether the -server advertises zero support to the client, which is slightly -different semantics than the plugin; that is, -Ccan_zero> always returns true for a plugin, even when -the plugin's own C<.can_zero> callback returned false, because nbdkit -implements a fallback to C<.pwrite> at the plugin layer. +Of note, the semantics of C<.can_zero> callback in the filter are +slightly different from the plugin, and must be one of three success +values visible only to filters: + +=over 4 + +=item C + +Completely suppress advertisement of write zero support (this can only +be done from filters, not plugins). + +=item C + +Inform nbdkit that write zeroes should immediately fall back to +C<.pwrite> emulation without trying C<.zero> (this value is returned +by Ccan_zero> if the plugin returned false in its +C<.can_zero>). + +=item C + +Inform nbdkit that write zeroes should attempt to use C<.zero>, +although it may still fall back to C<.pwrite> emulation for C +or C failures (this value is returned by +Ccan_zero> if the plugin returned true in its +C<.can_zero>). + +=back Remember that most of the feature check functions return merely a -boolean success value, while C<.can_fua> and C<.can_cache> have three -success values. +boolean success value, while C<.can_zero>, C<.can_fua> and +C<.can_cache> have three success values. The difference between C<.can_fua> values may affect choices made in the filter: when splitting a write request that requested FUA from the @@ -517,9 +538,10 @@ value to return to the client. This intercepts the plugin C<.zero> method and can be used to modify zero requests. -This function will not be called if C<.can_zero> returned false; in -turn, the filter should not call Czero> if -Ccan_zero> did not return true. +This function will not be called if C<.can_zero> returned +C; in turn, the filter should not call +Czero> if Ccan_zero> returned +C. On input, the parameter C may include C unconditionally, and C based on the result of diff --git a/filters/nozero/nozero.c b/filters/nozero/nozero.c index 2a518763..d0d48720 100644 --- a/filters/nozero/nozero.c +++ b/filters/nozero/nozero.c @@ -99,7 +99,14 @@ nozero_prepare (struct nbdkit_next_ops *next_ops, void *nxdata, void *handle, static int nozero_can_zero (struct nbdkit_next_ops *next_ops, void *nxdata, void *handle) { - return zeromode != NONE; + switch (zeromode) { + case NONE: + return NBDKIT_ZERO_NONE; + case EMULATE: + return NBDKIT_ZERO_EMULATE; + default: + return next_ops->can_zero (nxdata); + } } static int diff --git a/include/nbdkit-filter.h b/include/nbdkit-filter.h index 550c7048..67170bb1 100644 --- a/include/nbdkit-filter.h +++ b/include/nbdkit-filter.h @@ -45,6 +45,10 @@ extern "C" { #define NBDKIT_FILTER_API_VERSION 6 /* Corresponding to v1.16+ */ +#define NBDKIT_ZERO_NONE 0 +#define NBDKIT_ZERO_EMULATE 1 +#define NBDKIT_ZERO_NATIVE 2 + struct nbdkit_extent { uint64_t offset; uint64_t length; diff --git a/server/backend.c b/server/backend.c index 5da23d9e..0e64bfe5 100644 --- a/server/backend.c +++ b/server/backend.c @@ -257,14 +257,20 @@ backend_can_trim (struct backend *b, struct connection *conn) int backend_can_zero (struct backend *b, struct connection *conn) { + struct b_conn_handle *h = &conn->handles[b->i]; int r; debug ("%s: can_zero", b->name); - r = backend_can_write (b, conn); - if (r != 1) - return r; - return b->can_zero (b, conn); + if (h->can_zero == -1) { + r = backend_can_write (b, conn); + if (r != 1) { + h->can_zero = NBDKIT_ZERO_NONE; + return r; /* Relies on 0 == NBDKIT_ZERO_NONE */ + } + h->can_zero = b->can_zero (b, conn); + } + return h->can_zero; } int @@ -391,6 +397,7 @@ backend_zero (struct backend *b, struct connection *conn, int r; assert (h->can_write == 1); + assert (h->can_zero > NBDKIT_ZERO_NONE); assert (!(flags & ~(NBDKIT_FLAG_MAY_TRIM | NBDKIT_FLAG_FUA))); debug ("%s: zero count=%" PRIu32 " offset=%" PRIu64 " may_trim=%d fua=%d", b->name, count, offset, !!(flags & NBDKIT_FLAG_MAY_TRIM), diff --git a/server/internal.h b/server/internal.h index 23350417..eef40212 100644 --- a/server/internal.h +++ b/server/internal.h @@ -156,6 +156,7 @@ struct b_conn_handle { uint64_t exportsize; int can_write; + int can_zero; int can_extents; int can_cache; }; @@ -178,7 +179,6 @@ struct connection { bool can_flush; bool is_rotational; bool can_trim; - bool can_zero; bool can_fua; bool can_multi_conn; bool using_tls; diff --git a/server/plugins.c b/server/plugins.c index a96d9e36..08d6bfa6 100644 --- a/server/plugins.c +++ b/server/plugins.c @@ -347,19 +347,23 @@ static int plugin_can_zero (struct backend *b, struct connection *conn) { struct backend_plugin *p = container_of (b, struct backend_plugin, backend); + int r; assert (connection_get_handle (conn, 0)); - /* Note the special case here: the plugin's .can_zero controls only - * whether we call .zero; while the backend expects .can_zero to - * return whether to advertise zero support. Since we ALWAYS know - * how to fall back to .pwrite in plugin_zero(), we ignore the - * difference between the plugin's true or false return, and only - * call it to catch a -1 failure during negotiation. */ - if (p->plugin.can_zero && - p->plugin.can_zero (connection_get_handle (conn, 0)) == -1) - return -1; - return 1; + /* Note the special case here: the plugin's .can_zero returns a bool + * which controls only whether we call .zero; while the backend + * expects .can_zero to return a tri-state on level of support. + */ + if (p->plugin.can_zero) { + r = p->plugin.can_zero (connection_get_handle (conn, 0)); + if (r == -1) + return -1; + return r ? NBDKIT_ZERO_NATIVE : NBDKIT_ZERO_EMULATE; + } + if (p->plugin.zero || p->plugin._zero_old) + return NBDKIT_ZERO_NATIVE; + return NBDKIT_ZERO_EMULATE; } static int @@ -562,7 +566,6 @@ plugin_zero (struct backend *b, struct connection *conn, bool fua = flags & NBDKIT_FLAG_FUA; bool emulate = false; bool need_flush = false; - int can_zero = 1; /* TODO cache this per-connection? */ assert (connection_get_handle (conn, 0)); @@ -572,12 +575,8 @@ plugin_zero (struct backend *b, struct connection *conn, } if (!count) return 0; - if (p->plugin.can_zero) { - can_zero = p->plugin.can_zero (connection_get_handle (conn, 0)); - assert (can_zero != -1); - } - if (can_zero) { + if (backend_can_zero (b, conn) == NBDKIT_ZERO_NATIVE) { errno = 0; if (p->plugin.zero) r = p->plugin.zero (connection_get_handle (conn, 0), count, offset, diff --git a/server/protocol-handshake.c b/server/protocol-handshake.c index bb2cbde7..672b6571 100644 --- a/server/protocol-handshake.c +++ b/server/protocol-handshake.c @@ -64,10 +64,8 @@ protocol_compute_eflags (struct connection *conn, uint16_t *flags) fl = backend_can_zero (backend, conn); if (fl == -1) return -1; - if (fl) { + if (fl) eflags |= NBD_FLAG_SEND_WRITE_ZEROES; - conn->can_zero = true; - } fl = backend_can_trim (backend, conn); if (fl == -1) -- 2.11.4.GIT