2 * Copyright (C) 2013 Red Hat Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
9 * * Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * * Neither the name of Red Hat nor the names of its contributors may be
17 * used to endorse or promote products derived from this software without
18 * specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
22 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
24 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
27 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
28 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44 #include <sys/types.h>
48 #include "nbdkit-plugin.h"
52 /* Maximum read or write request that we will handle. */
53 #define MAX_REQUEST_SIZE (64 * 1024 * 1024)
55 static struct connection
*new_connection (int sockin
, int sockout
);
56 static void free_connection (struct connection
*conn
);
57 static int negotiate_handshake (struct connection
*conn
);
58 static int recv_request_send_reply (struct connection
*conn
);
61 _handle_single_connection (int sockin
, int sockout
)
64 struct connection
*conn
= new_connection (sockin
, sockout
);
69 if (plugin_open (conn
, readonly
) == -1)
72 tls_set_name (plugin_name ());
75 if (negotiate_handshake (conn
) == -1)
78 /* Process requests. XXX Allow these to be dispatched in parallel using
82 r
= recv_request_send_reply (conn
);
89 free_connection (conn
);
93 free_connection (conn
);
98 handle_single_connection (int sockin
, int sockout
)
102 plugin_lock_connection ();
103 r
= _handle_single_connection (sockin
, sockout
);
104 plugin_unlock_connection ();
109 static struct connection
*
110 new_connection (int sockin
, int sockout
)
112 struct connection
*conn
;
114 conn
= calloc (1, sizeof *conn
);
120 conn
->sockin
= sockin
;
121 conn
->sockout
= sockout
;
122 pthread_mutex_init (&conn
->request_lock
, NULL
);
128 free_connection (struct connection
*conn
)
133 if (conn
->sockin
>= 0)
134 close (conn
->sockin
);
135 if (conn
->sockout
>= 0 && conn
->sockin
!= conn
->sockout
)
136 close (conn
->sockout
);
138 pthread_mutex_destroy (&conn
->request_lock
);
146 /* XXX Note because we don't support multiple plugins or export names,
147 * we are using the old-style handshake. This will be fixed.
150 _negotiate_handshake (struct connection
*conn
)
152 struct old_handshake handshake
;
155 uint16_t gflags
, eflags
;
158 r
= plugin_get_size (conn
);
162 nbdkit_error (".get_size function returned invalid value "
166 exportsize
= (uint64_t) r
;
167 conn
->exportsize
= exportsize
;
170 eflags
= NBD_FLAG_HAS_FLAGS
;
172 fl
= plugin_can_write (conn
);
175 if (readonly
|| !fl
) {
176 eflags
|= NBD_FLAG_READ_ONLY
;
180 fl
= plugin_can_flush (conn
);
184 eflags
|= NBD_FLAG_SEND_FLUSH
| NBD_FLAG_SEND_FUA
;
188 fl
= plugin_is_rotational (conn
);
192 eflags
|= NBD_FLAG_ROTATIONAL
;
193 conn
->is_rotational
= 1;
196 fl
= plugin_can_trim (conn
);
200 eflags
|= NBD_FLAG_SEND_TRIM
;
204 debug ("flags: global 0x%x export 0x%x", gflags
, eflags
);
206 memset (&handshake
, 0, sizeof handshake
);
207 memcpy (handshake
.nbdmagic
, "NBDMAGIC", 8);
208 handshake
.version
= htobe64 (OLD_VERSION
);
209 handshake
.exportsize
= htobe64 (exportsize
);
210 handshake
.gflags
= htobe16 (gflags
);
211 handshake
.eflags
= htobe16 (eflags
);
213 if (xwrite (conn
->sockout
, &handshake
, sizeof handshake
) == -1) {
214 nbdkit_error ("write: %m");
222 negotiate_handshake (struct connection
*conn
)
226 plugin_lock_request (conn
);
227 r
= _negotiate_handshake (conn
);
228 plugin_unlock_request (conn
);
234 valid_range (struct connection
*conn
, uint64_t offset
, uint32_t count
)
236 uint64_t exportsize
= conn
->exportsize
;
238 return count
> 0 && offset
<= exportsize
&& offset
+ count
<= exportsize
;
242 validate_request (struct connection
*conn
,
243 uint32_t cmd
, uint32_t flags
, uint64_t offset
, uint32_t count
,
248 /* Validate cmd, offset, count. */
253 r
= valid_range (conn
, offset
, count
);
257 /* XXX Allow writes to extend the disk? */
258 nbdkit_error ("invalid request: offset and length are out of range");
265 if (offset
!= 0 || count
!= 0) {
266 nbdkit_error ("invalid flush request: expecting offset and length == 0");
273 nbdkit_error ("invalid request: unknown command (%" PRIu32
") ignored",
279 /* Refuse over-large read and write requests. */
280 if ((cmd
== NBD_CMD_WRITE
|| cmd
== NBD_CMD_READ
) &&
281 count
> MAX_REQUEST_SIZE
) {
282 nbdkit_error ("invalid request: data request is too large (%" PRIu32
283 " > %d)", count
, MAX_REQUEST_SIZE
);
288 /* Readonly connection? */
289 if (conn
->readonly
&&
290 (cmd
== NBD_CMD_WRITE
|| cmd
== NBD_CMD_FLUSH
||
291 cmd
== NBD_CMD_TRIM
)) {
292 nbdkit_error ("invalid request: write request on readonly connection");
298 if (!conn
->can_flush
&& cmd
== NBD_CMD_FLUSH
) {
299 nbdkit_error ("invalid request: flush operation not supported");
305 if (!conn
->can_trim
&& cmd
== NBD_CMD_TRIM
) {
306 nbdkit_error ("invalid request: trim operation not supported");
311 return 1; /* Commands validates. */
314 /* This is called with the request lock held to actually execute the
315 * request (by calling the plugin). Note that the request fields have
316 * been validated already in 'validate_request' so we don't have to
317 * check them again. 'buf' is either the data to be written or the
318 * data to be returned, and points to a buffer of size 'count' bytes.
320 * Only returns -1 if there is a fatal error and the connection cannot
323 * On read/write errors, sets *error to errno (or EIO if errno is not
324 * set) and returns 0.
327 _handle_request (struct connection
*conn
,
328 uint32_t cmd
, uint32_t flags
, uint64_t offset
, uint32_t count
,
332 bool flush_after_command
;
335 /* Flush after command performed? */
336 flush_after_command
= (flags
& NBD_CMD_FLAG_FUA
) != 0;
337 if (!conn
->can_flush
|| conn
->readonly
)
338 flush_after_command
= false;
342 r
= plugin_pread (conn
, buf
, count
, offset
);
344 *error
= errno
? errno
: EIO
;
350 r
= plugin_pwrite (conn
, buf
, count
, offset
);
352 *error
= errno
? errno
: EIO
;
358 r
= plugin_flush (conn
);
360 *error
= errno
? errno
: EIO
;
366 r
= plugin_trim (conn
, count
, offset
);
368 *error
= errno
? errno
: EIO
;
377 if (flush_after_command
) {
378 r
= plugin_flush (conn
);
380 *error
= errno
? errno
: EIO
;
389 handle_request (struct connection
*conn
,
390 uint32_t cmd
, uint32_t flags
, uint64_t offset
, uint32_t count
,
396 plugin_lock_request (conn
);
397 r
= _handle_request (conn
, cmd
, flags
, offset
, count
, buf
, error
);
398 plugin_unlock_request (conn
);
404 skip_over_write_buffer (int sock
, size_t count
)
410 r
= read (sock
, buf
, count
> BUFSIZ
? BUFSIZ
: count
);
412 nbdkit_error ("skipping write buffer: %m");
422 recv_request_send_reply (struct connection
*conn
)
425 struct request request
;
427 uint32_t magic
, cmd
, flags
, count
, error
= 0;
429 CLEANUP_FREE
char *buf
= NULL
;
431 /* Read the request packet. */
432 r
= xread (conn
->sockin
, &request
, sizeof request
);
434 nbdkit_error ("read request: %m");
438 debug ("client closed input socket, closing connection");
439 return 0; /* disconnect */
442 magic
= be32toh (request
.magic
);
443 if (magic
!= NBD_REQUEST_MAGIC
) {
444 nbdkit_error ("invalid request: 'magic' field is incorrect (0x%x)", magic
);
448 cmd
= be32toh (request
.type
);
450 cmd
&= NBD_CMD_MASK_COMMAND
;
452 offset
= be64toh (request
.offset
);
453 count
= be32toh (request
.count
);
455 if (cmd
== NBD_CMD_DISC
) {
456 debug ("client sent disconnect command, closing connection");
457 return 0; /* disconnect */
460 /* Validate the request. */
461 r
= validate_request (conn
, cmd
, flags
, offset
, count
, &error
);
464 if (r
== 0) { /* request not valid */
465 if (cmd
== NBD_CMD_WRITE
)
466 skip_over_write_buffer (conn
->sockin
, count
);
470 /* Allocate the data buffer used for either read or write requests. */
471 if (cmd
== NBD_CMD_READ
|| cmd
== NBD_CMD_WRITE
) {
472 buf
= malloc (count
);
476 if (cmd
== NBD_CMD_WRITE
)
477 skip_over_write_buffer (conn
->sockin
, count
);
482 /* Receive the write data buffer. */
483 if (cmd
== NBD_CMD_WRITE
) {
484 r
= xread (conn
->sockin
, buf
, count
);
486 nbdkit_error ("read data: %m");
490 debug ("client closed input unexpectedly, closing connection");
491 return 0; /* disconnect */
495 /* Perform the request. Only this part happens inside the request lock. */
496 r
= handle_request (conn
, cmd
, flags
, offset
, count
, buf
, &error
);
500 /* Send the reply packet. */
502 reply
.magic
= htobe32 (NBD_REPLY_MAGIC
);
503 reply
.handle
= request
.handle
;
504 reply
.error
= htobe32 (error
);
506 r
= xwrite (conn
->sockout
, &reply
, sizeof reply
);
508 nbdkit_error ("write reply: %m");
512 /* Send the read data buffer. */
513 if (cmd
== NBD_CMD_READ
) {
514 r
= xwrite (conn
->sockout
, buf
, count
);
516 nbdkit_error ("write data: %m");
521 return 1; /* command processed ok */