server/protocol.c

   1 /* nbdkit
   2  * Copyright (C) 2013-2019 Red Hat Inc.
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions are
   6  * met:
   7  *
   8  * * Redistributions of source code must retain the above copyright
   9  * notice, this list of conditions and the following disclaimer.
  10  *
  11  * * Redistributions in binary form must reproduce the above copyright
  12  * notice, this list of conditions and the following disclaimer in the
  13  * documentation and/or other materials provided with the distribution.
  14  *
  15  * * Neither the name of Red Hat nor the names of its contributors may be
  16  * used to endorse or promote products derived from this software without
  17  * specific prior written permission.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
  20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
  22  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
  23  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  26  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  27  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  30  * SUCH DAMAGE.
  31  */
  32
  33 #include <config.h>
  34
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <stdint.h>
  38 #include <stdbool.h>
  39 #include <inttypes.h>
  40 #include <string.h>
  41 #include <unistd.h>
  42 #include <errno.h>
  43 #include <assert.h>
  44
  45 #include "internal.h"
  46 #include "byte-swapping.h"
  47 #include "minmax.h"
  48 #include "nbd-protocol.h"
  49 #include "protostrings.h"
  50
  51 static bool
  52 validate_request (struct connection *conn,
  53                   uint16_t cmd, uint16_t flags, uint64_t offset, uint32_t count,
  54                   uint32_t *error)
  55 {
  56   /* Readonly connection? */
  57   if (conn->eflags & NBD_FLAG_READ_ONLY &&
  58       (cmd == NBD_CMD_WRITE || cmd == NBD_CMD_TRIM ||
  59        cmd == NBD_CMD_WRITE_ZEROES)) {
  60     nbdkit_error ("invalid request: %s: write request on readonly connection",
  61                   name_of_nbd_cmd (cmd));
  62     *error = EROFS;
  63     return false;
  64   }
  65
  66   /* Validate cmd, offset, count. */
  67   switch (cmd) {
  68   case NBD_CMD_READ:
  69   case NBD_CMD_CACHE:
  70   case NBD_CMD_WRITE:
  71   case NBD_CMD_TRIM:
  72   case NBD_CMD_WRITE_ZEROES:
  73   case NBD_CMD_BLOCK_STATUS:
  74     if (!backend_valid_range (backend, conn, offset, count)) {
  75       /* XXX Allow writes to extend the disk? */
  76       nbdkit_error ("invalid request: %s: offset and count are out of range: "
  77                     "offset=%" PRIu64 " count=%" PRIu32,
  78                     name_of_nbd_cmd (cmd), offset, count);
  79       *error = (cmd == NBD_CMD_WRITE ||
  80                 cmd == NBD_CMD_WRITE_ZEROES) ? ENOSPC : EINVAL;
  81       return false;
  82     }
  83     break;
  84
  85   case NBD_CMD_FLUSH:
  86     if (offset != 0 || count != 0) {
  87       nbdkit_error ("invalid request: %s: expecting offset and count = 0",
  88                     name_of_nbd_cmd (cmd));
  89       *error = EINVAL;
  90       return false;
  91     }
  92     break;
  93
  94   default:
  95     nbdkit_error ("invalid request: unknown command (%" PRIu32 ") ignored",
  96                   cmd);
  97     *error = EINVAL;
  98     return false;
  99   }
 100
 101   /* Validate flags */
 102   if (flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE |
 103                 NBD_CMD_FLAG_DF | NBD_CMD_FLAG_REQ_ONE |
 104                 NBD_CMD_FLAG_FAST_ZERO)) {
 105     nbdkit_error ("invalid request: unknown flag (0x%x)", flags);
 106     *error = EINVAL;
 107     return false;
 108   }
 109   if ((flags & NBD_CMD_FLAG_NO_HOLE) &&
 110       cmd != NBD_CMD_WRITE_ZEROES) {
 111     nbdkit_error ("invalid request: NO_HOLE flag needs WRITE_ZEROES request");
 112     *error = EINVAL;
 113     return false;
 114   }
 115   if ((flags & NBD_CMD_FLAG_FAST_ZERO) &&
 116       cmd != NBD_CMD_WRITE_ZEROES) {
 117     nbdkit_error ("invalid request: "
 118                   "FAST_ZERO flag needs WRITE_ZEROES request");
 119     *error = EINVAL;
 120     return false;
 121   }
 122   if (flags & NBD_CMD_FLAG_DF) {
 123     if (cmd != NBD_CMD_READ) {
 124       nbdkit_error ("invalid request: DF flag needs READ request");
 125       *error = EINVAL;
 126       return false;
 127     }
 128     if (!conn->structured_replies) {
 129       nbdkit_error ("invalid request: "
 130                     "%s: structured replies was not negotiated",
 131                     name_of_nbd_cmd (cmd));
 132       *error = EINVAL;
 133       return false;
 134     }
 135   }
 136   if ((flags & NBD_CMD_FLAG_REQ_ONE) &&
 137       cmd != NBD_CMD_BLOCK_STATUS) {
 138     nbdkit_error ("invalid request: REQ_ONE flag needs BLOCK_STATUS request");
 139     *error = EINVAL;
 140     return false;
 141   }
 142   if (flags & NBD_CMD_FLAG_FUA && !(conn->eflags & NBD_FLAG_SEND_FUA)) {
 143     nbdkit_error ("invalid request: FUA flag not supported");
 144     *error = EINVAL;
 145     return false;
 146   }
 147
 148   /* Refuse over-large read and write requests. */
 149   if ((cmd == NBD_CMD_WRITE || cmd == NBD_CMD_READ) &&
 150       count > MAX_REQUEST_SIZE) {
 151     nbdkit_error ("invalid request: %s: data request is too large (%" PRIu32
 152                   " > %d)",
 153                   name_of_nbd_cmd (cmd), count, MAX_REQUEST_SIZE);
 154     *error = ENOMEM;
 155     return false;
 156   }
 157
 158   /* Flush allowed? */
 159   if (cmd == NBD_CMD_FLUSH && !(conn->eflags & NBD_FLAG_SEND_FLUSH)) {
 160     nbdkit_error ("invalid request: %s: flush operation not supported",
 161                   name_of_nbd_cmd (cmd));
 162     *error = EINVAL;
 163     return false;
 164   }
 165
 166   /* Trim allowed? */
 167   if (cmd == NBD_CMD_TRIM && !(conn->eflags & NBD_FLAG_SEND_TRIM)) {
 168     nbdkit_error ("invalid request: %s: trim operation not supported",
 169                   name_of_nbd_cmd (cmd));
 170     *error = EINVAL;
 171     return false;
 172   }
 173
 174   /* Zero allowed? */
 175   if (cmd == NBD_CMD_WRITE_ZEROES &&
 176       !(conn->eflags & NBD_FLAG_SEND_WRITE_ZEROES)) {
 177     nbdkit_error ("invalid request: %s: write zeroes operation not supported",
 178                   name_of_nbd_cmd (cmd));
 179     *error = EINVAL;
 180     return false;
 181   }
 182
 183   /* Cache allowed? */
 184   if (cmd == NBD_CMD_CACHE && !(conn->eflags & NBD_FLAG_SEND_CACHE)) {
 185     nbdkit_error ("invalid request: %s: cache operation not supported",
 186                   name_of_nbd_cmd (cmd));
 187     *error = EINVAL;
 188     return false;
 189   }
 190
 191   /* Block status allowed? */
 192   if (cmd == NBD_CMD_BLOCK_STATUS) {
 193     if (!conn->structured_replies) {
 194       nbdkit_error ("invalid request: "
 195                     "%s: structured replies was not negotiated",
 196                     name_of_nbd_cmd (cmd));
 197       *error = EINVAL;
 198       return false;
 199     }
 200     if (!conn->meta_context_base_allocation) {
 201       nbdkit_error ("invalid request: "
 202                     "%s: base:allocation was not negotiated",
 203                     name_of_nbd_cmd (cmd));
 204       *error = EINVAL;
 205       return false;
 206     }
 207   }
 208
 209   return true;                     /* Command validates. */
 210 }
 211
 212 /* This is called with the request lock held to actually execute the
 213  * request (by calling the plugin).  Note that the request fields have
 214  * been validated already in 'validate_request' so we don't have to
 215  * check them again.
 216  *
 217  * 'buf' is either the data to be written or the data to be returned,
 218  * and points to a buffer of size 'count' bytes.
 219  *
 220  * 'extents' is an empty extents list used for block status requests
 221  * only.
 222  *
 223  * In all cases, the return value is the system errno value that will
 224  * later be converted to the nbd error to send back to the client (0
 225  * for success).
 226  */
 227 static uint32_t
 228 handle_request (struct connection *conn,
 229                 uint16_t cmd, uint16_t flags, uint64_t offset, uint32_t count,
 230                 void *buf, struct nbdkit_extents *extents)
 231 {
 232   uint32_t f = 0;
 233   int err = 0;
 234
 235   /* Clear the error, so that we know if the plugin calls
 236    * nbdkit_set_error() or relied on errno.  */
 237   threadlocal_set_error (0);
 238
 239   switch (cmd) {
 240   case NBD_CMD_READ:
 241     if (backend_pread (backend, conn, buf, count, offset, 0, &err) == -1)
 242       return err;
 243     break;
 244
 245   case NBD_CMD_WRITE:
 246     if (flags & NBD_CMD_FLAG_FUA)
 247       f |= NBDKIT_FLAG_FUA;
 248     if (backend_pwrite (backend, conn, buf, count, offset, f, &err) == -1)
 249       return err;
 250     break;
 251
 252   case NBD_CMD_FLUSH:
 253     if (backend_flush (backend, conn, 0, &err) == -1)
 254       return err;
 255     break;
 256
 257   case NBD_CMD_TRIM:
 258     if (flags & NBD_CMD_FLAG_FUA)
 259       f |= NBDKIT_FLAG_FUA;
 260     if (backend_trim (backend, conn, count, offset, f, &err) == -1)
 261       return err;
 262     break;
 263
 264   case NBD_CMD_CACHE:
 265     if (backend_cache (backend, conn, count, offset, 0, &err) == -1)
 266       return err;
 267     break;
 268
 269   case NBD_CMD_WRITE_ZEROES:
 270     if (!(flags & NBD_CMD_FLAG_NO_HOLE))
 271       f |= NBDKIT_FLAG_MAY_TRIM;
 272     if (flags & NBD_CMD_FLAG_FUA)
 273       f |= NBDKIT_FLAG_FUA;
 274     if (flags & NBD_CMD_FLAG_FAST_ZERO)
 275       f |= NBDKIT_FLAG_FAST_ZERO;
 276     if (backend_zero (backend, conn, count, offset, f, &err) == -1)
 277       return err;
 278     break;
 279
 280   case NBD_CMD_BLOCK_STATUS:
 281     if (flags & NBD_CMD_FLAG_REQ_ONE)
 282       f |= NBDKIT_FLAG_REQ_ONE;
 283     if (backend_extents (backend, conn, count, offset, f,
 284                          extents, &err) == -1)
 285       return err;
 286     break;
 287
 288   default:
 289     abort ();
 290   }
 291
 292   return 0;
 293 }
 294
 295 static int
 296 skip_over_write_buffer (int sock, size_t count)
 297 {
 298   char buf[BUFSIZ];
 299   ssize_t r;
 300
 301   if (count > MAX_REQUEST_SIZE * 2) {
 302     nbdkit_error ("write request too large to skip");
 303     return -1;
 304   }
 305
 306   while (count > 0) {
 307     r = read (sock, buf, count > BUFSIZ ? BUFSIZ : count);
 308     if (r == -1) {
 309       nbdkit_error ("skipping write buffer: %m");
 310       return -1;
 311     }
 312     if (r == 0)  {
 313       nbdkit_error ("unexpected early EOF");
 314       errno = EBADMSG;
 315       return -1;
 316     }
 317     count -= r;
 318   }
 319   return 0;
 320 }
 321
 322 /* Convert a system errno to an NBD_E* error code. */
 323 static int
 324 nbd_errno (int error, uint16_t flags)
 325 {
 326   switch (error) {
 327   case 0:
 328     return NBD_SUCCESS;
 329   case EROFS:
 330   case EPERM:
 331     return NBD_EPERM;
 332   case EIO:
 333     return NBD_EIO;
 334   case ENOMEM:
 335     return NBD_ENOMEM;
 336 #ifdef EDQUOT
 337   case EDQUOT:
 338 #endif
 339   case EFBIG:
 340   case ENOSPC:
 341     return NBD_ENOSPC;
 342 #ifdef ESHUTDOWN
 343   case ESHUTDOWN:
 344     return NBD_ESHUTDOWN;
 345 #endif
 346   case ENOTSUP:
 347 #if ENOTSUP != EOPNOTSUPP
 348   case EOPNOTSUPP:
 349 #endif
 350     if (flags & NBD_CMD_FLAG_FAST_ZERO)
 351       return NBD_ENOTSUP;
 352     return NBD_EINVAL;
 353   case EOVERFLOW:
 354     if (flags & NBD_CMD_FLAG_DF)
 355       return NBD_EOVERFLOW;
 356     return NBD_EINVAL;
 357   case EINVAL:
 358   default:
 359     return NBD_EINVAL;
 360   }
 361 }
 362
 363 static int
 364 send_simple_reply (struct connection *conn,
 365                    uint64_t handle, uint16_t cmd, uint16_t flags,
 366                    const char *buf, uint32_t count,
 367                    uint32_t error)
 368 {
 369   ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->write_lock);
 370   struct nbd_simple_reply reply;
 371   int r;
 372   int f = (cmd == NBD_CMD_READ && !error) ? SEND_MORE : 0;
 373
 374   reply.magic = htobe32 (NBD_SIMPLE_REPLY_MAGIC);
 375   reply.handle = handle;
 376   reply.error = htobe32 (nbd_errno (error, flags));
 377
 378   r = conn->send (conn, &reply, sizeof reply, f);
 379   if (r == -1) {
 380     nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
 381     return connection_set_status (conn, -1);
 382   }
 383
 384   /* Send the read data buffer. */
 385   if (cmd == NBD_CMD_READ && !error) {
 386     r = conn->send (conn, buf, count, 0);
 387     if (r == -1) {
 388       nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
 389       return connection_set_status (conn, -1);
 390     }
 391   }
 392
 393   return 1;                     /* command processed ok */
 394 }
 395
 396 static int
 397 send_structured_reply_read (struct connection *conn,
 398                             uint64_t handle, uint16_t cmd,
 399                             const char *buf, uint32_t count, uint64_t offset)
 400 {
 401   /* Once we are really using structured replies and sending data back
 402    * in chunks, we'll be able to grab the write lock for each chunk,
 403    * allowing other threads to interleave replies.  As we're not doing
 404    * that yet we acquire the lock for the whole function.
 405    */
 406   ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->write_lock);
 407   struct nbd_structured_reply reply;
 408   struct nbd_structured_reply_offset_data offset_data;
 409   int r;
 410
 411   assert (cmd == NBD_CMD_READ);
 412
 413   reply.magic = htobe32 (NBD_STRUCTURED_REPLY_MAGIC);
 414   reply.handle = handle;
 415   reply.flags = htobe16 (NBD_REPLY_FLAG_DONE);
 416   reply.type = htobe16 (NBD_REPLY_TYPE_OFFSET_DATA);
 417   reply.length = htobe32 (count + sizeof offset_data);
 418
 419   r = conn->send (conn, &reply, sizeof reply, SEND_MORE);
 420   if (r == -1) {
 421     nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
 422     return connection_set_status (conn, -1);
 423   }
 424
 425   /* Send the offset + read data buffer. */
 426   offset_data.offset = htobe64 (offset);
 427   r = conn->send (conn, &offset_data, sizeof offset_data, SEND_MORE);
 428   if (r == -1) {
 429     nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
 430     return connection_set_status (conn, -1);
 431   }
 432
 433   r = conn->send (conn, buf, count, 0);
 434   if (r == -1) {
 435     nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
 436     return connection_set_status (conn, -1);
 437   }
 438
 439   return 1;                     /* command processed ok */
 440 }
 441
 442 /* Convert a list of extents into NBD_REPLY_TYPE_BLOCK_STATUS blocks.
 443  * The rules here are very complicated.  Read the spec carefully!
 444  */
 445 static struct nbd_block_descriptor *
 446 extents_to_block_descriptors (struct nbdkit_extents *extents,
 447                               uint16_t flags,
 448                               uint32_t count, uint64_t offset,
 449                               size_t *nr_blocks)
 450 {
 451   const bool req_one = flags & NBD_CMD_FLAG_REQ_ONE;
 452   const size_t nr_extents = nbdkit_extents_count (extents);
 453   size_t i;
 454   struct nbd_block_descriptor *blocks;
 455
 456   /* This is checked in server/plugins.c. */
 457   assert (nr_extents >= 1);
 458
 459   /* We may send fewer than nr_extents blocks, but never more. */
 460   blocks = calloc (req_one ? 1 : nr_extents,
 461                    sizeof (struct nbd_block_descriptor));
 462   if (blocks == NULL) {
 463     nbdkit_error ("calloc: %m");
 464     return NULL;
 465   }
 466
 467   if (req_one) {
 468     const struct nbdkit_extent e = nbdkit_get_extent (extents, 0);
 469
 470     /* Checked as a side effect of how the extent list is created. */
 471     assert (e.length > 0);
 472
 473     *nr_blocks = 1;
 474
 475     /* Must not exceed count of the original request. */
 476     blocks[0].length = MIN (e.length, (uint64_t) count);
 477     blocks[0].status_flags = e.type & 3;
 478   }
 479   else {
 480     uint64_t pos = offset;
 481
 482     *nr_blocks = 0;
 483     for (i = 0; i < nr_extents; ++i) {
 484       const struct nbdkit_extent e = nbdkit_get_extent (extents, i);
 485       uint64_t length;
 486
 487       if (i == 0)
 488         assert (e.offset == offset);
 489
 490       /* Must not exceed UINT32_MAX. */
 491       blocks[i].length = length = MIN (e.length, UINT32_MAX);
 492       blocks[i].status_flags = e.type & 3;
 493       (*nr_blocks)++;
 494
 495       pos += length;
 496       if (pos > offset + count) /* this must be the last block */
 497         break;
 498
 499       /* If we reach here then we must have consumed this whole
 500        * extent.  This is currently true because the server only sends
 501        * 32 bit requests, but if we move to 64 bit requests we will
 502        * need to revisit this code so it can split extents into
 503        * multiple blocks.  XXX
 504        */
 505       assert (e.length <= length);
 506     }
 507   }
 508
 509 #if 0
 510   for (i = 0; i < *nr_blocks; ++i)
 511     debug ("block status: sending block %" PRIu32 " type %" PRIu32,
 512            blocks[i].length, blocks[i].status_flags);
 513 #endif
 514
 515   /* Convert to big endian for the protocol. */
 516   for (i = 0; i < *nr_blocks; ++i) {
 517     blocks[i].length = htobe32 (blocks[i].length);
 518     blocks[i].status_flags = htobe32 (blocks[i].status_flags);
 519   }
 520
 521   return blocks;
 522 }
 523
 524 static int
 525 send_structured_reply_block_status (struct connection *conn,
 526                                     uint64_t handle,
 527                                     uint16_t cmd, uint16_t flags,
 528                                     uint32_t count, uint64_t offset,
 529                                     struct nbdkit_extents *extents)
 530 {
 531   ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->write_lock);
 532   struct nbd_structured_reply reply;
 533   CLEANUP_FREE struct nbd_block_descriptor *blocks = NULL;
 534   size_t nr_blocks;
 535   uint32_t context_id;
 536   size_t i;
 537   int r;
 538
 539   assert (conn->meta_context_base_allocation);
 540   assert (cmd == NBD_CMD_BLOCK_STATUS);
 541
 542   blocks = extents_to_block_descriptors (extents, flags, count, offset,
 543                                          &nr_blocks);
 544   if (blocks == NULL)
 545     return connection_set_status (conn, -1);
 546
 547   reply.magic = htobe32 (NBD_STRUCTURED_REPLY_MAGIC);
 548   reply.handle = handle;
 549   reply.flags = htobe16 (NBD_REPLY_FLAG_DONE);
 550   reply.type = htobe16 (NBD_REPLY_TYPE_BLOCK_STATUS);
 551   reply.length = htobe32 (sizeof context_id +
 552                           nr_blocks * sizeof (struct nbd_block_descriptor));
 553
 554   r = conn->send (conn, &reply, sizeof reply, SEND_MORE);
 555   if (r == -1) {
 556     nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
 557     return connection_set_status (conn, -1);
 558   }
 559
 560   /* Send the base:allocation context ID. */
 561   context_id = htobe32 (base_allocation_id);
 562   r = conn->send (conn, &context_id, sizeof context_id, SEND_MORE);
 563   if (r == -1) {
 564     nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
 565     return connection_set_status (conn, -1);
 566   }
 567
 568   /* Send each block descriptor. */
 569   for (i = 0; i < nr_blocks; ++i) {
 570     r = conn->send (conn, &blocks[i], sizeof blocks[i],
 571                     i == nr_blocks - 1 ? 0 : SEND_MORE);
 572     if (r == -1) {
 573       nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
 574       return connection_set_status (conn, -1);
 575     }
 576   }
 577
 578   return 1;                     /* command processed ok */
 579 }
 580
 581 static int
 582 send_structured_reply_error (struct connection *conn,
 583                              uint64_t handle, uint16_t cmd, uint16_t flags,
 584                              uint32_t error)
 585 {
 586   ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->write_lock);
 587   struct nbd_structured_reply reply;
 588   struct nbd_structured_reply_error error_data;
 589   int r;
 590
 591   reply.magic = htobe32 (NBD_STRUCTURED_REPLY_MAGIC);
 592   reply.handle = handle;
 593   reply.flags = htobe16 (NBD_REPLY_FLAG_DONE);
 594   reply.type = htobe16 (NBD_REPLY_TYPE_ERROR);
 595   reply.length = htobe32 (0 /* no human readable error */ + sizeof error_data);
 596
 597   r = conn->send (conn, &reply, sizeof reply, SEND_MORE);
 598   if (r == -1) {
 599     nbdkit_error ("write error reply: %m");
 600     return connection_set_status (conn, -1);
 601   }
 602
 603   /* Send the error. */
 604   error_data.error = htobe32 (nbd_errno (error, flags));
 605   error_data.len = htobe16 (0);
 606   r = conn->send (conn, &error_data, sizeof error_data, 0);
 607   if (r == -1) {
 608     nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
 609     return connection_set_status (conn, -1);
 610   }
 611   /* No human readable error message at the moment. */
 612
 613   return 1;                     /* command processed ok */
 614 }
 615
 616 int
 617 protocol_recv_request_send_reply (struct connection *conn)
 618 {
 619   int r;
 620   struct nbd_request request;
 621   uint16_t cmd, flags;
 622   uint32_t magic, count, error = 0;
 623   uint64_t offset;
 624   char *buf = NULL;
 625   CLEANUP_EXTENTS_FREE struct nbdkit_extents *extents = NULL;
 626
 627   /* Read the request packet. */
 628   {
 629     ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->read_lock);
 630     r = connection_get_status (conn);
 631     if (r <= 0)
 632       return r;
 633     r = conn->recv (conn, &request, sizeof request);
 634     if (r == -1) {
 635       nbdkit_error ("read request: %m");
 636       return connection_set_status (conn, -1);
 637     }
 638     if (r == 0) {
 639       debug ("client closed input socket, closing connection");
 640       return connection_set_status (conn, 0); /* disconnect */
 641     }
 642
 643     magic = be32toh (request.magic);
 644     if (magic != NBD_REQUEST_MAGIC) {
 645       nbdkit_error ("invalid request: 'magic' field is incorrect (0x%x)",
 646                     magic);
 647       return connection_set_status (conn, -1);
 648     }
 649
 650     flags = be16toh (request.flags);
 651     cmd = be16toh (request.type);
 652
 653     offset = be64toh (request.offset);
 654     count = be32toh (request.count);
 655
 656     if (cmd == NBD_CMD_DISC) {
 657       debug ("client sent %s, closing connection", name_of_nbd_cmd (cmd));
 658       return connection_set_status (conn, 0); /* disconnect */
 659     }
 660
 661     /* Validate the request. */
 662     if (!validate_request (conn, cmd, flags, offset, count, &error)) {
 663       if (cmd == NBD_CMD_WRITE &&
 664           skip_over_write_buffer (conn->sockin, count) < 0)
 665         return connection_set_status (conn, -1);
 666       goto send_reply;
 667     }
 668
 669     /* Get the data buffer used for either read or write requests.
 670      * This is a common per-thread data buffer, it must not be freed.
 671      */
 672     if (cmd == NBD_CMD_READ || cmd == NBD_CMD_WRITE) {
 673       buf = threadlocal_buffer ((size_t) count);
 674       if (buf == NULL) {
 675         error = ENOMEM;
 676         if (cmd == NBD_CMD_WRITE &&
 677             skip_over_write_buffer (conn->sockin, count) < 0)
 678           return connection_set_status (conn, -1);
 679         goto send_reply;
 680       }
 681     }
 682
 683     /* Allocate the extents list for block status only. */
 684     if (cmd == NBD_CMD_BLOCK_STATUS) {
 685       extents = nbdkit_extents_new (offset, backend_get_size (backend, conn));
 686       if (extents == NULL) {
 687         error = ENOMEM;
 688         goto send_reply;
 689       }
 690     }
 691
 692     /* Receive the write data buffer. */
 693     if (cmd == NBD_CMD_WRITE) {
 694       r = conn->recv (conn, buf, count);
 695       if (r == 0) {
 696         errno = EBADMSG;
 697         r = -1;
 698       }
 699       if (r == -1) {
 700         nbdkit_error ("read data: %s: %m", name_of_nbd_cmd (cmd));
 701         return connection_set_status (conn, -1);
 702       }
 703     }
 704   }
 705
 706   /* Perform the request.  Only this part happens inside the request lock. */
 707   if (quit || !connection_get_status (conn)) {
 708     error = ESHUTDOWN;
 709   }
 710   else {
 711     lock_request (conn);
 712     error = handle_request (conn, cmd, flags, offset, count, buf, extents);
 713     assert ((int) error >= 0);
 714     unlock_request (conn);
 715   }
 716
 717   /* Send the reply packet. */
 718  send_reply:
 719   if (connection_get_status (conn) < 0)
 720     return -1;
 721
 722   if (error != 0) {
 723     /* Since we're about to send only the limited NBD_E* errno to the
 724      * client, don't lose the information about what really happened
 725      * on the server side.  Make sure there is a way for the operator
 726      * to retrieve the real error.
 727      */
 728     debug ("sending error reply: %s", strerror (error));
 729   }
 730
 731   /* Currently we prefer to send simple replies for everything except
 732    * where we have to (ie. NBD_CMD_READ and NBD_CMD_BLOCK_STATUS when
 733    * structured_replies have been negotiated).  However this prevents
 734    * us from sending human-readable error messages to the client, so
 735    * we should reconsider this in future.
 736    */
 737   if (conn->structured_replies &&
 738       (cmd == NBD_CMD_READ || cmd == NBD_CMD_BLOCK_STATUS)) {
 739     if (!error) {
 740       if (cmd == NBD_CMD_READ)
 741         return send_structured_reply_read (conn, request.handle, cmd,
 742                                            buf, count, offset);
 743       else /* NBD_CMD_BLOCK_STATUS */
 744         return send_structured_reply_block_status (conn, request.handle,
 745                                                    cmd, flags,
 746                                                    count, offset,
 747                                                    extents);
 748     }
 749     else
 750       return send_structured_reply_error (conn, request.handle, cmd, flags,
 751                                           error);
 752   }
 753   else
 754     return send_simple_reply (conn, request.handle, cmd, flags, buf, count,
 755                               error);
 756 }