server/protocol.c

   1 /* nbdkit
   2  * Copyright (C) 2013-2019 Red Hat Inc.
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions are
   6  * met:
   7  *
   8  * * Redistributions of source code must retain the above copyright
   9  * notice, this list of conditions and the following disclaimer.
  10  *
  11  * * Redistributions in binary form must reproduce the above copyright
  12  * notice, this list of conditions and the following disclaimer in the
  13  * documentation and/or other materials provided with the distribution.
  14  *
  15  * * Neither the name of Red Hat nor the names of its contributors may be
  16  * used to endorse or promote products derived from this software without
  17  * specific prior written permission.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
  20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
  22  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
  23  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  26  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  27  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  30  * SUCH DAMAGE.
  31  */
  32
  33 #include <config.h>
  34
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <stdint.h>
  38 #include <stdbool.h>
  39 #include <inttypes.h>
  40 #include <string.h>
  41 #include <unistd.h>
  42 #include <errno.h>
  43 #include <assert.h>
  44
  45 #include "internal.h"
  46 #include "byte-swapping.h"
  47 #include "minmax.h"
  48 #include "protocol.h"
  49
  50 static bool
  51 valid_range (struct connection *conn, uint64_t offset, uint32_t count)
  52 {
  53   uint64_t exportsize = backend_get_size (backend, conn);
  54
  55   assert (exportsize <= INT64_MAX); /* Guaranteed by negotiation phase */
  56   return count > 0 && offset <= exportsize && offset + count <= exportsize;
  57 }
  58
  59 static bool
  60 validate_request (struct connection *conn,
  61                   uint16_t cmd, uint16_t flags, uint64_t offset, uint32_t count,
  62                   uint32_t *error)
  63 {
  64   /* Readonly connection? */
  65   if (conn->readonly &&
  66       (cmd == NBD_CMD_WRITE || cmd == NBD_CMD_TRIM ||
  67        cmd == NBD_CMD_WRITE_ZEROES)) {
  68     nbdkit_error ("invalid request: %s: write request on readonly connection",
  69                   name_of_nbd_cmd (cmd));
  70     *error = EROFS;
  71     return false;
  72   }
  73
  74   /* Validate cmd, offset, count. */
  75   switch (cmd) {
  76   case NBD_CMD_READ:
  77   case NBD_CMD_CACHE:
  78   case NBD_CMD_WRITE:
  79   case NBD_CMD_TRIM:
  80   case NBD_CMD_WRITE_ZEROES:
  81   case NBD_CMD_BLOCK_STATUS:
  82     if (!valid_range (conn, offset, count)) {
  83       /* XXX Allow writes to extend the disk? */
  84       nbdkit_error ("invalid request: %s: offset and count are out of range: "
  85                     "offset=%" PRIu64 " count=%" PRIu32,
  86                     name_of_nbd_cmd (cmd), offset, count);
  87       *error = (cmd == NBD_CMD_WRITE ||
  88                 cmd == NBD_CMD_WRITE_ZEROES) ? ENOSPC : EINVAL;
  89       return false;
  90     }
  91     break;
  92
  93   case NBD_CMD_FLUSH:
  94     if (offset != 0 || count != 0) {
  95       nbdkit_error ("invalid request: %s: expecting offset and count = 0",
  96                     name_of_nbd_cmd (cmd));
  97       *error = EINVAL;
  98       return false;
  99     }
 100     break;
 101
 102   default:
 103     nbdkit_error ("invalid request: unknown command (%" PRIu32 ") ignored",
 104                   cmd);
 105     *error = EINVAL;
 106     return false;
 107   }
 108
 109   /* Validate flags */
 110   if (flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE |
 111                 NBD_CMD_FLAG_DF | NBD_CMD_FLAG_REQ_ONE)) {
 112     nbdkit_error ("invalid request: unknown flag (0x%x)", flags);
 113     *error = EINVAL;
 114     return false;
 115   }
 116   if ((flags & NBD_CMD_FLAG_NO_HOLE) &&
 117       cmd != NBD_CMD_WRITE_ZEROES) {
 118     nbdkit_error ("invalid request: NO_HOLE flag needs WRITE_ZEROES request");
 119     *error = EINVAL;
 120     return false;
 121   }
 122   if (flags & NBD_CMD_FLAG_DF) {
 123     if (cmd != NBD_CMD_READ) {
 124       nbdkit_error ("invalid request: DF flag needs READ request");
 125       *error = EINVAL;
 126       return false;
 127     }
 128     if (!conn->structured_replies) {
 129       nbdkit_error ("invalid request: "
 130                     "%s: structured replies was not negotiated",
 131                     name_of_nbd_cmd (cmd));
 132       *error = EINVAL;
 133       return false;
 134     }
 135   }
 136   if ((flags & NBD_CMD_FLAG_REQ_ONE) &&
 137       cmd != NBD_CMD_BLOCK_STATUS) {
 138     nbdkit_error ("invalid request: REQ_ONE flag needs BLOCK_STATUS request");
 139     *error = EINVAL;
 140     return false;
 141   }
 142   if (!conn->can_fua && (flags & NBD_CMD_FLAG_FUA)) {
 143     nbdkit_error ("invalid request: FUA flag not supported");
 144     *error = EINVAL;
 145     return false;
 146   }
 147
 148   /* Refuse over-large read and write requests. */
 149   if ((cmd == NBD_CMD_WRITE || cmd == NBD_CMD_READ) &&
 150       count > MAX_REQUEST_SIZE) {
 151     nbdkit_error ("invalid request: %s: data request is too large (%" PRIu32
 152                   " > %d)",
 153                   name_of_nbd_cmd (cmd), count, MAX_REQUEST_SIZE);
 154     *error = ENOMEM;
 155     return false;
 156   }
 157
 158   /* Flush allowed? */
 159   if (!conn->can_flush && cmd == NBD_CMD_FLUSH) {
 160     nbdkit_error ("invalid request: %s: flush operation not supported",
 161                   name_of_nbd_cmd (cmd));
 162     *error = EINVAL;
 163     return false;
 164   }
 165
 166   /* Trim allowed? */
 167   if (!conn->can_trim && cmd == NBD_CMD_TRIM) {
 168     nbdkit_error ("invalid request: %s: trim operation not supported",
 169                   name_of_nbd_cmd (cmd));
 170     *error = EINVAL;
 171     return false;
 172   }
 173
 174   /* Zero allowed? */
 175   if (!conn->can_zero && cmd == NBD_CMD_WRITE_ZEROES) {
 176     nbdkit_error ("invalid request: %s: write zeroes operation not supported",
 177                   name_of_nbd_cmd (cmd));
 178     *error = EINVAL;
 179     return false;
 180   }
 181
 182   /* Cache allowed? */
 183   if (!conn->can_cache && cmd == NBD_CMD_CACHE) {
 184     nbdkit_error ("invalid request: %s: cache operation not supported",
 185                   name_of_nbd_cmd (cmd));
 186     *error = EINVAL;
 187     return false;
 188   }
 189
 190   /* Block status allowed? */
 191   if (cmd == NBD_CMD_BLOCK_STATUS) {
 192     if (!conn->structured_replies) {
 193       nbdkit_error ("invalid request: "
 194                     "%s: structured replies was not negotiated",
 195                     name_of_nbd_cmd (cmd));
 196       *error = EINVAL;
 197       return false;
 198     }
 199     if (!conn->meta_context_base_allocation) {
 200       nbdkit_error ("invalid request: "
 201                     "%s: base:allocation was not negotiated",
 202                     name_of_nbd_cmd (cmd));
 203       *error = EINVAL;
 204       return false;
 205     }
 206   }
 207
 208   return true;                     /* Command validates. */
 209 }
 210
 211 /* This is called with the request lock held to actually execute the
 212  * request (by calling the plugin).  Note that the request fields have
 213  * been validated already in 'validate_request' so we don't have to
 214  * check them again.
 215  *
 216  * 'buf' is either the data to be written or the data to be returned,
 217  * and points to a buffer of size 'count' bytes.
 218  *
 219  * 'extents' is an empty extents list used for block status requests
 220  * only.
 221  *
 222  * In all cases, the return value is the system errno value that will
 223  * later be converted to the nbd error to send back to the client (0
 224  * for success).
 225  */
 226 static uint32_t
 227 handle_request (struct connection *conn,
 228                 uint16_t cmd, uint16_t flags, uint64_t offset, uint32_t count,
 229                 void *buf, struct nbdkit_extents *extents)
 230 {
 231   uint32_t f = 0;
 232   bool fua = conn->can_fua && (flags & NBD_CMD_FLAG_FUA);
 233   int err = 0;
 234
 235   /* Clear the error, so that we know if the plugin calls
 236    * nbdkit_set_error() or relied on errno.  */
 237   threadlocal_set_error (0);
 238
 239   switch (cmd) {
 240   case NBD_CMD_READ:
 241     if (backend_pread (backend, conn, buf, count, offset, 0, &err) == -1)
 242       return err;
 243     break;
 244
 245   case NBD_CMD_WRITE:
 246     if (fua)
 247       f |= NBDKIT_FLAG_FUA;
 248     if (backend_pwrite (backend, conn, buf, count, offset, f, &err) == -1)
 249       return err;
 250     break;
 251
 252   case NBD_CMD_FLUSH:
 253     if (backend_flush (backend, conn, 0, &err) == -1)
 254       return err;
 255     break;
 256
 257   case NBD_CMD_TRIM:
 258     if (fua)
 259       f |= NBDKIT_FLAG_FUA;
 260     if (backend_trim (backend, conn, count, offset, f, &err) == -1)
 261       return err;
 262     break;
 263
 264   case NBD_CMD_CACHE:
 265     if (conn->emulate_cache) {
 266       static char buf[MAX_REQUEST_SIZE]; /* data sink, never read */
 267       uint32_t limit;
 268
 269       while (count) {
 270         limit = MIN (count, sizeof buf);
 271         if (backend_pread (backend, conn, buf, limit, offset, flags,
 272                            &err) == -1)
 273           return err;
 274         count -= limit;
 275       }
 276     }
 277     else if (backend_cache (backend, conn, count, offset, 0, &err) == -1)
 278       return err;
 279     break;
 280
 281   case NBD_CMD_WRITE_ZEROES:
 282     if (!(flags & NBD_CMD_FLAG_NO_HOLE))
 283       f |= NBDKIT_FLAG_MAY_TRIM;
 284     if (fua)
 285       f |= NBDKIT_FLAG_FUA;
 286     if (backend_zero (backend, conn, count, offset, f, &err) == -1)
 287       return err;
 288     break;
 289
 290   case NBD_CMD_BLOCK_STATUS:
 291     /* The other backend methods don't check can_*.  That is because
 292      * those methods are implicitly suppressed by returning eflags to
 293      * the client.  However there is no eflag for extents so we must
 294      * check it here.
 295      */
 296     if (conn->can_extents) {
 297       if (flags & NBD_CMD_FLAG_REQ_ONE)
 298         f |= NBDKIT_FLAG_REQ_ONE;
 299       if (backend_extents (backend, conn, count, offset, f,
 300                            extents, &err) == -1)
 301         return err;
 302     }
 303     else {
 304       int r;
 305
 306       /* By default it is safe assume that everything in the range is
 307        * allocated.
 308        */
 309       errno = 0;
 310       r = nbdkit_add_extent (extents, offset, count, 0 /* allocated data */);
 311       if (r == -1)
 312         return errno ? errno : EINVAL;
 313       return 0;
 314     }
 315     break;
 316
 317   default:
 318     abort ();
 319   }
 320
 321   return 0;
 322 }
 323
 324 static int
 325 skip_over_write_buffer (int sock, size_t count)
 326 {
 327   char buf[BUFSIZ];
 328   ssize_t r;
 329
 330   if (count > MAX_REQUEST_SIZE * 2) {
 331     nbdkit_error ("write request too large to skip");
 332     return -1;
 333   }
 334
 335   while (count > 0) {
 336     r = read (sock, buf, count > BUFSIZ ? BUFSIZ : count);
 337     if (r == -1) {
 338       nbdkit_error ("skipping write buffer: %m");
 339       return -1;
 340     }
 341     if (r == 0)  {
 342       nbdkit_error ("unexpected early EOF");
 343       errno = EBADMSG;
 344       return -1;
 345     }
 346     count -= r;
 347   }
 348   return 0;
 349 }
 350
 351 /* Convert a system errno to an NBD_E* error code. */
 352 static int
 353 nbd_errno (int error, bool flag_df)
 354 {
 355   switch (error) {
 356   case 0:
 357     return NBD_SUCCESS;
 358   case EROFS:
 359   case EPERM:
 360     return NBD_EPERM;
 361   case EIO:
 362     return NBD_EIO;
 363   case ENOMEM:
 364     return NBD_ENOMEM;
 365 #ifdef EDQUOT
 366   case EDQUOT:
 367 #endif
 368   case EFBIG:
 369   case ENOSPC:
 370     return NBD_ENOSPC;
 371 #ifdef ESHUTDOWN
 372   case ESHUTDOWN:
 373     return NBD_ESHUTDOWN;
 374 #endif
 375   case EOVERFLOW:
 376     if (flag_df)
 377       return NBD_EOVERFLOW;
 378     /* fallthrough */
 379   case EINVAL:
 380   default:
 381     return NBD_EINVAL;
 382   }
 383 }
 384
 385 static int
 386 send_simple_reply (struct connection *conn,
 387                    uint64_t handle, uint16_t cmd,
 388                    const char *buf, uint32_t count,
 389                    uint32_t error)
 390 {
 391   ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->write_lock);
 392   struct simple_reply reply;
 393   int r;
 394   int f = (cmd == NBD_CMD_READ && !error) ? SEND_MORE : 0;
 395
 396   reply.magic = htobe32 (NBD_SIMPLE_REPLY_MAGIC);
 397   reply.handle = handle;
 398   reply.error = htobe32 (nbd_errno (error, false));
 399
 400   r = conn->send (conn, &reply, sizeof reply, f);
 401   if (r == -1) {
 402     nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
 403     return connection_set_status (conn, -1);
 404   }
 405
 406   /* Send the read data buffer. */
 407   if (cmd == NBD_CMD_READ && !error) {
 408     r = conn->send (conn, buf, count, 0);
 409     if (r == -1) {
 410       nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
 411       return connection_set_status (conn, -1);
 412     }
 413   }
 414
 415   return 1;                     /* command processed ok */
 416 }
 417
 418 static int
 419 send_structured_reply_read (struct connection *conn,
 420                             uint64_t handle, uint16_t cmd,
 421                             const char *buf, uint32_t count, uint64_t offset)
 422 {
 423   /* Once we are really using structured replies and sending data back
 424    * in chunks, we'll be able to grab the write lock for each chunk,
 425    * allowing other threads to interleave replies.  As we're not doing
 426    * that yet we acquire the lock for the whole function.
 427    */
 428   ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->write_lock);
 429   struct structured_reply reply;
 430   struct structured_reply_offset_data offset_data;
 431   int r;
 432
 433   assert (cmd == NBD_CMD_READ);
 434
 435   reply.magic = htobe32 (NBD_STRUCTURED_REPLY_MAGIC);
 436   reply.handle = handle;
 437   reply.flags = htobe16 (NBD_REPLY_FLAG_DONE);
 438   reply.type = htobe16 (NBD_REPLY_TYPE_OFFSET_DATA);
 439   reply.length = htobe32 (count + sizeof offset_data);
 440
 441   r = conn->send (conn, &reply, sizeof reply, SEND_MORE);
 442   if (r == -1) {
 443     nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
 444     return connection_set_status (conn, -1);
 445   }
 446
 447   /* Send the offset + read data buffer. */
 448   offset_data.offset = htobe64 (offset);
 449   r = conn->send (conn, &offset_data, sizeof offset_data, SEND_MORE);
 450   if (r == -1) {
 451     nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
 452     return connection_set_status (conn, -1);
 453   }
 454
 455   r = conn->send (conn, buf, count, 0);
 456   if (r == -1) {
 457     nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
 458     return connection_set_status (conn, -1);
 459   }
 460
 461   return 1;                     /* command processed ok */
 462 }
 463
 464 /* Convert a list of extents into NBD_REPLY_TYPE_BLOCK_STATUS blocks.
 465  * The rules here are very complicated.  Read the spec carefully!
 466  */
 467 static struct block_descriptor *
 468 extents_to_block_descriptors (struct nbdkit_extents *extents,
 469                               uint16_t flags,
 470                               uint32_t count, uint64_t offset,
 471                               size_t *nr_blocks)
 472 {
 473   const bool req_one = flags & NBD_CMD_FLAG_REQ_ONE;
 474   const size_t nr_extents = nbdkit_extents_count (extents);
 475   size_t i;
 476   struct block_descriptor *blocks;
 477
 478   /* This is checked in server/plugins.c. */
 479   assert (nr_extents >= 1);
 480
 481   /* We may send fewer than nr_extents blocks, but never more. */
 482   blocks = calloc (req_one ? 1 : nr_extents, sizeof (struct block_descriptor));
 483   if (blocks == NULL) {
 484     nbdkit_error ("calloc: %m");
 485     return NULL;
 486   }
 487
 488   if (req_one) {
 489     const struct nbdkit_extent e = nbdkit_get_extent (extents, 0);
 490
 491     /* Checked as a side effect of how the extent list is created. */
 492     assert (e.length > 0);
 493
 494     *nr_blocks = 1;
 495
 496     /* Must not exceed count of the original request. */
 497     blocks[0].length = MIN (e.length, (uint64_t) count);
 498     blocks[0].status_flags = e.type & 3;
 499   }
 500   else {
 501     uint64_t pos = offset;
 502
 503     *nr_blocks = 0;
 504     for (i = 0; i < nr_extents; ++i) {
 505       const struct nbdkit_extent e = nbdkit_get_extent (extents, i);
 506       uint64_t length;
 507
 508       if (i == 0)
 509         assert (e.offset == offset);
 510
 511       /* Must not exceed UINT32_MAX. */
 512       blocks[i].length = length = MIN (e.length, UINT32_MAX);
 513       blocks[i].status_flags = e.type & 3;
 514       (*nr_blocks)++;
 515
 516       pos += length;
 517       if (pos > offset + count) /* this must be the last block */
 518         break;
 519
 520       /* If we reach here then we must have consumed this whole
 521        * extent.  This is currently true because the server only sends
 522        * 32 bit requests, but if we move to 64 bit requests we will
 523        * need to revisit this code so it can split extents into
 524        * multiple blocks.  XXX
 525        */
 526       assert (e.length <= length);
 527     }
 528   }
 529
 530 #if 0
 531   for (i = 0; i < *nr_blocks; ++i)
 532     nbdkit_debug ("block status: sending block %" PRIu32 " type %" PRIu32,
 533                   blocks[i].length, blocks[i].status_flags);
 534 #endif
 535
 536   /* Convert to big endian for the protocol. */
 537   for (i = 0; i < *nr_blocks; ++i) {
 538     blocks[i].length = htobe32 (blocks[i].length);
 539     blocks[i].status_flags = htobe32 (blocks[i].status_flags);
 540   }
 541
 542   return blocks;
 543 }
 544
 545 static int
 546 send_structured_reply_block_status (struct connection *conn,
 547                                     uint64_t handle,
 548                                     uint16_t cmd, uint16_t flags,
 549                                     uint32_t count, uint64_t offset,
 550                                     struct nbdkit_extents *extents)
 551 {
 552   ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->write_lock);
 553   struct structured_reply reply;
 554   CLEANUP_FREE struct block_descriptor *blocks = NULL;
 555   size_t nr_blocks;
 556   uint32_t context_id;
 557   size_t i;
 558   int r;
 559
 560   assert (conn->meta_context_base_allocation);
 561   assert (cmd == NBD_CMD_BLOCK_STATUS);
 562
 563   blocks = extents_to_block_descriptors (extents, flags, count, offset,
 564                                          &nr_blocks);
 565   if (blocks == NULL)
 566     return connection_set_status (conn, -1);
 567
 568   reply.magic = htobe32 (NBD_STRUCTURED_REPLY_MAGIC);
 569   reply.handle = handle;
 570   reply.flags = htobe16 (NBD_REPLY_FLAG_DONE);
 571   reply.type = htobe16 (NBD_REPLY_TYPE_BLOCK_STATUS);
 572   reply.length = htobe32 (sizeof context_id +
 573                           nr_blocks * sizeof (struct block_descriptor));
 574
 575   r = conn->send (conn, &reply, sizeof reply, SEND_MORE);
 576   if (r == -1) {
 577     nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
 578     return connection_set_status (conn, -1);
 579   }
 580
 581   /* Send the base:allocation context ID. */
 582   context_id = htobe32 (base_allocation_id);
 583   r = conn->send (conn, &context_id, sizeof context_id, SEND_MORE);
 584   if (r == -1) {
 585     nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
 586     return connection_set_status (conn, -1);
 587   }
 588
 589   /* Send each block descriptor. */
 590   for (i = 0; i < nr_blocks; ++i) {
 591     r = conn->send (conn, &blocks[i], sizeof blocks[i],
 592                     i == nr_blocks - 1 ? 0 : SEND_MORE);
 593     if (r == -1) {
 594       nbdkit_error ("write reply: %s: %m", name_of_nbd_cmd (cmd));
 595       return connection_set_status (conn, -1);
 596     }
 597   }
 598
 599   return 1;                     /* command processed ok */
 600 }
 601
 602 static int
 603 send_structured_reply_error (struct connection *conn,
 604                              uint64_t handle, uint16_t cmd, uint16_t flags,
 605                              uint32_t error)
 606 {
 607   ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->write_lock);
 608   struct structured_reply reply;
 609   struct structured_reply_error error_data;
 610   int r;
 611
 612   reply.magic = htobe32 (NBD_STRUCTURED_REPLY_MAGIC);
 613   reply.handle = handle;
 614   reply.flags = htobe16 (NBD_REPLY_FLAG_DONE);
 615   reply.type = htobe16 (NBD_REPLY_TYPE_ERROR);
 616   reply.length = htobe32 (0 /* no human readable error */ + sizeof error_data);
 617
 618   r = conn->send (conn, &reply, sizeof reply, SEND_MORE);
 619   if (r == -1) {
 620     nbdkit_error ("write error reply: %m");
 621     return connection_set_status (conn, -1);
 622   }
 623
 624   /* Send the error. */
 625   error_data.error = htobe32 (nbd_errno (error, flags & NBD_CMD_FLAG_DF));
 626   error_data.len = htobe16 (0);
 627   r = conn->send (conn, &error_data, sizeof error_data, 0);
 628   if (r == -1) {
 629     nbdkit_error ("write data: %s: %m", name_of_nbd_cmd (cmd));
 630     return connection_set_status (conn, -1);
 631   }
 632   /* No human readable error message at the moment. */
 633
 634   return 1;                     /* command processed ok */
 635 }
 636
 637 int
 638 protocol_recv_request_send_reply (struct connection *conn)
 639 {
 640   int r;
 641   struct request request;
 642   uint16_t cmd, flags;
 643   uint32_t magic, count, error = 0;
 644   uint64_t offset;
 645   char *buf = NULL;
 646   CLEANUP_EXTENTS_FREE struct nbdkit_extents *extents = NULL;
 647
 648   /* Read the request packet. */
 649   {
 650     ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&conn->read_lock);
 651     r = connection_get_status (conn);
 652     if (r <= 0)
 653       return r;
 654     r = conn->recv (conn, &request, sizeof request);
 655     if (r == -1) {
 656       nbdkit_error ("read request: %m");
 657       return connection_set_status (conn, -1);
 658     }
 659     if (r == 0) {
 660       debug ("client closed input socket, closing connection");
 661       return connection_set_status (conn, 0); /* disconnect */
 662     }
 663
 664     magic = be32toh (request.magic);
 665     if (magic != NBD_REQUEST_MAGIC) {
 666       nbdkit_error ("invalid request: 'magic' field is incorrect (0x%x)",
 667                     magic);
 668       return connection_set_status (conn, -1);
 669     }
 670
 671     flags = be16toh (request.flags);
 672     cmd = be16toh (request.type);
 673
 674     offset = be64toh (request.offset);
 675     count = be32toh (request.count);
 676
 677     if (cmd == NBD_CMD_DISC) {
 678       debug ("client sent %s, closing connection", name_of_nbd_cmd (cmd));
 679       return connection_set_status (conn, 0); /* disconnect */
 680     }
 681
 682     /* Validate the request. */
 683     if (!validate_request (conn, cmd, flags, offset, count, &error)) {
 684       if (cmd == NBD_CMD_WRITE &&
 685           skip_over_write_buffer (conn->sockin, count) < 0)
 686         return connection_set_status (conn, -1);
 687       goto send_reply;
 688     }
 689
 690     /* Get the data buffer used for either read or write requests.
 691      * This is a common per-thread data buffer, it must not be freed.
 692      */
 693     if (cmd == NBD_CMD_READ || cmd == NBD_CMD_WRITE) {
 694       buf = threadlocal_buffer ((size_t) count);
 695       if (buf == NULL) {
 696         error = ENOMEM;
 697         if (cmd == NBD_CMD_WRITE &&
 698             skip_over_write_buffer (conn->sockin, count) < 0)
 699           return connection_set_status (conn, -1);
 700         goto send_reply;
 701       }
 702     }
 703
 704     /* Allocate the extents list for block status only. */
 705     if (cmd == NBD_CMD_BLOCK_STATUS) {
 706       extents = nbdkit_extents_new (offset, backend_get_size (backend, conn));
 707       if (extents == NULL) {
 708         error = ENOMEM;
 709         goto send_reply;
 710       }
 711     }
 712
 713     /* Receive the write data buffer. */
 714     if (cmd == NBD_CMD_WRITE) {
 715       r = conn->recv (conn, buf, count);
 716       if (r == 0) {
 717         errno = EBADMSG;
 718         r = -1;
 719       }
 720       if (r == -1) {
 721         nbdkit_error ("read data: %s: %m", name_of_nbd_cmd (cmd));
 722         return connection_set_status (conn, -1);
 723       }
 724     }
 725   }
 726
 727   /* Perform the request.  Only this part happens inside the request lock. */
 728   if (quit || !connection_get_status (conn)) {
 729     error = ESHUTDOWN;
 730   }
 731   else {
 732     lock_request (conn);
 733     error = handle_request (conn, cmd, flags, offset, count, buf, extents);
 734     assert ((int) error >= 0);
 735     unlock_request (conn);
 736   }
 737
 738   /* Send the reply packet. */
 739  send_reply:
 740   if (connection_get_status (conn) < 0)
 741     return -1;
 742
 743   if (error != 0) {
 744     /* Since we're about to send only the limited NBD_E* errno to the
 745      * client, don't lose the information about what really happened
 746      * on the server side.  Make sure there is a way for the operator
 747      * to retrieve the real error.
 748      */
 749     debug ("sending error reply: %s", strerror (error));
 750   }
 751
 752   /* Currently we prefer to send simple replies for everything except
 753    * where we have to (ie. NBD_CMD_READ and NBD_CMD_BLOCK_STATUS when
 754    * structured_replies have been negotiated).  However this prevents
 755    * us from sending human-readable error messages to the client, so
 756    * we should reconsider this in future.
 757    */
 758   if (conn->structured_replies &&
 759       (cmd == NBD_CMD_READ || cmd == NBD_CMD_BLOCK_STATUS)) {
 760     if (!error) {
 761       if (cmd == NBD_CMD_READ)
 762         return send_structured_reply_read (conn, request.handle, cmd,
 763                                            buf, count, offset);
 764       else /* NBD_CMD_BLOCK_STATUS */
 765         return send_structured_reply_block_status (conn, request.handle,
 766                                                    cmd, flags,
 767                                                    count, offset,
 768                                                    extents);
 769     }
 770     else
 771       return send_structured_reply_error (conn, request.handle, cmd, flags,
 772                                           error);
 773   }
 774   else
 775     return send_simple_reply (conn, request.handle, cmd, buf, count, error);
 776 }