slirp: Generalizing and neutralizing ARP code
[qemu/ar7.git] / nbd / server.c
blob1ec79cf411c0e2cf453bd9629988eb0087fde54a
1 /*
2 * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws>
4 * Network Block Device Server Side
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; under version 2 of the License.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 #include "nbd-internal.h"
21 static int system_errno_to_nbd_errno(int err)
23 switch (err) {
24 case 0:
25 return NBD_SUCCESS;
26 case EPERM:
27 return NBD_EPERM;
28 case EIO:
29 return NBD_EIO;
30 case ENOMEM:
31 return NBD_ENOMEM;
32 #ifdef EDQUOT
33 case EDQUOT:
34 #endif
35 case EFBIG:
36 case ENOSPC:
37 return NBD_ENOSPC;
38 case EINVAL:
39 default:
40 return NBD_EINVAL;
44 /* Definitions for opaque data types */
46 typedef struct NBDRequest NBDRequest;
48 struct NBDRequest {
49 QSIMPLEQ_ENTRY(NBDRequest) entry;
50 NBDClient *client;
51 uint8_t *data;
54 struct NBDExport {
55 int refcount;
56 void (*close)(NBDExport *exp);
58 BlockBackend *blk;
59 char *name;
60 off_t dev_offset;
61 off_t size;
62 uint32_t nbdflags;
63 QTAILQ_HEAD(, NBDClient) clients;
64 QTAILQ_ENTRY(NBDExport) next;
66 AioContext *ctx;
68 Notifier eject_notifier;
71 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
73 struct NBDClient {
74 int refcount;
75 void (*close)(NBDClient *client);
77 NBDExport *exp;
78 int sock;
80 Coroutine *recv_coroutine;
82 CoMutex send_lock;
83 Coroutine *send_coroutine;
85 bool can_read;
87 QTAILQ_ENTRY(NBDClient) next;
88 int nb_requests;
89 bool closing;
92 /* That's all folks */
94 static void nbd_set_handlers(NBDClient *client);
95 static void nbd_unset_handlers(NBDClient *client);
96 static void nbd_update_can_read(NBDClient *client);
98 static void nbd_negotiate_continue(void *opaque)
100 qemu_coroutine_enter(opaque, NULL);
103 static ssize_t nbd_negotiate_read(int fd, void *buffer, size_t size)
105 ssize_t ret;
107 assert(qemu_in_coroutine());
108 /* Negotiation are always in main loop. */
109 qemu_set_fd_handler(fd, nbd_negotiate_continue, NULL,
110 qemu_coroutine_self());
111 ret = read_sync(fd, buffer, size);
112 qemu_set_fd_handler(fd, NULL, NULL, NULL);
113 return ret;
117 static ssize_t nbd_negotiate_write(int fd, void *buffer, size_t size)
119 ssize_t ret;
121 assert(qemu_in_coroutine());
122 /* Negotiation are always in main loop. */
123 qemu_set_fd_handler(fd, NULL, nbd_negotiate_continue,
124 qemu_coroutine_self());
125 ret = write_sync(fd, buffer, size);
126 qemu_set_fd_handler(fd, NULL, NULL, NULL);
127 return ret;
130 static ssize_t nbd_negotiate_drop_sync(int fd, size_t size)
132 ssize_t ret, dropped = size;
133 uint8_t *buffer = g_malloc(MIN(65536, size));
135 while (size > 0) {
136 ret = nbd_negotiate_read(fd, buffer, MIN(65536, size));
137 if (ret < 0) {
138 g_free(buffer);
139 return ret;
142 assert(ret <= size);
143 size -= ret;
146 g_free(buffer);
147 return dropped;
150 /* Basic flow for negotiation
152 Server Client
153 Negotiate
157 Server Client
158 Negotiate #1
159 Option
160 Negotiate #2
162 ----
164 followed by
166 Server Client
167 Request
168 Response
169 Request
170 Response
173 Request (type == 2)
177 static int nbd_negotiate_send_rep(int csock, uint32_t type, uint32_t opt)
179 uint64_t magic;
180 uint32_t len;
182 magic = cpu_to_be64(NBD_REP_MAGIC);
183 if (nbd_negotiate_write(csock, &magic, sizeof(magic)) != sizeof(magic)) {
184 LOG("write failed (rep magic)");
185 return -EINVAL;
187 opt = cpu_to_be32(opt);
188 if (nbd_negotiate_write(csock, &opt, sizeof(opt)) != sizeof(opt)) {
189 LOG("write failed (rep opt)");
190 return -EINVAL;
192 type = cpu_to_be32(type);
193 if (nbd_negotiate_write(csock, &type, sizeof(type)) != sizeof(type)) {
194 LOG("write failed (rep type)");
195 return -EINVAL;
197 len = cpu_to_be32(0);
198 if (nbd_negotiate_write(csock, &len, sizeof(len)) != sizeof(len)) {
199 LOG("write failed (rep data length)");
200 return -EINVAL;
202 return 0;
205 static int nbd_negotiate_send_rep_list(int csock, NBDExport *exp)
207 uint64_t magic, name_len;
208 uint32_t opt, type, len;
210 name_len = strlen(exp->name);
211 magic = cpu_to_be64(NBD_REP_MAGIC);
212 if (nbd_negotiate_write(csock, &magic, sizeof(magic)) != sizeof(magic)) {
213 LOG("write failed (magic)");
214 return -EINVAL;
216 opt = cpu_to_be32(NBD_OPT_LIST);
217 if (nbd_negotiate_write(csock, &opt, sizeof(opt)) != sizeof(opt)) {
218 LOG("write failed (opt)");
219 return -EINVAL;
221 type = cpu_to_be32(NBD_REP_SERVER);
222 if (nbd_negotiate_write(csock, &type, sizeof(type)) != sizeof(type)) {
223 LOG("write failed (reply type)");
224 return -EINVAL;
226 len = cpu_to_be32(name_len + sizeof(len));
227 if (nbd_negotiate_write(csock, &len, sizeof(len)) != sizeof(len)) {
228 LOG("write failed (length)");
229 return -EINVAL;
231 len = cpu_to_be32(name_len);
232 if (nbd_negotiate_write(csock, &len, sizeof(len)) != sizeof(len)) {
233 LOG("write failed (length)");
234 return -EINVAL;
236 if (nbd_negotiate_write(csock, exp->name, name_len) != name_len) {
237 LOG("write failed (buffer)");
238 return -EINVAL;
240 return 0;
243 static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length)
245 int csock;
246 NBDExport *exp;
248 csock = client->sock;
249 if (length) {
250 if (nbd_negotiate_drop_sync(csock, length) != length) {
251 return -EIO;
253 return nbd_negotiate_send_rep(csock, NBD_REP_ERR_INVALID, NBD_OPT_LIST);
256 /* For each export, send a NBD_REP_SERVER reply. */
257 QTAILQ_FOREACH(exp, &exports, next) {
258 if (nbd_negotiate_send_rep_list(csock, exp)) {
259 return -EINVAL;
262 /* Finish with a NBD_REP_ACK. */
263 return nbd_negotiate_send_rep(csock, NBD_REP_ACK, NBD_OPT_LIST);
266 static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length)
268 int rc = -EINVAL, csock = client->sock;
269 char name[256];
271 /* Client sends:
272 [20 .. xx] export name (length bytes)
274 TRACE("Checking length");
275 if (length > 255) {
276 LOG("Bad length received");
277 goto fail;
279 if (nbd_negotiate_read(csock, name, length) != length) {
280 LOG("read failed");
281 goto fail;
283 name[length] = '\0';
285 client->exp = nbd_export_find(name);
286 if (!client->exp) {
287 LOG("export not found");
288 goto fail;
291 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
292 nbd_export_get(client->exp);
293 rc = 0;
294 fail:
295 return rc;
298 static int nbd_negotiate_options(NBDClient *client)
300 int csock = client->sock;
301 uint32_t flags;
303 /* Client sends:
304 [ 0 .. 3] client flags
306 [ 0 .. 7] NBD_OPTS_MAGIC
307 [ 8 .. 11] NBD option
308 [12 .. 15] Data length
309 ... Rest of request
311 [ 0 .. 7] NBD_OPTS_MAGIC
312 [ 8 .. 11] Second NBD option
313 [12 .. 15] Data length
314 ... Rest of request
317 if (nbd_negotiate_read(csock, &flags, sizeof(flags)) != sizeof(flags)) {
318 LOG("read failed");
319 return -EIO;
321 TRACE("Checking client flags");
322 be32_to_cpus(&flags);
323 if (flags != 0 && flags != NBD_FLAG_C_FIXED_NEWSTYLE) {
324 LOG("Bad client flags received");
325 return -EIO;
328 while (1) {
329 int ret;
330 uint32_t tmp, length;
331 uint64_t magic;
333 if (nbd_negotiate_read(csock, &magic, sizeof(magic)) != sizeof(magic)) {
334 LOG("read failed");
335 return -EINVAL;
337 TRACE("Checking opts magic");
338 if (magic != be64_to_cpu(NBD_OPTS_MAGIC)) {
339 LOG("Bad magic received");
340 return -EINVAL;
343 if (nbd_negotiate_read(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
344 LOG("read failed");
345 return -EINVAL;
348 if (nbd_negotiate_read(csock, &length,
349 sizeof(length)) != sizeof(length)) {
350 LOG("read failed");
351 return -EINVAL;
353 length = be32_to_cpu(length);
355 TRACE("Checking option");
356 switch (be32_to_cpu(tmp)) {
357 case NBD_OPT_LIST:
358 ret = nbd_negotiate_handle_list(client, length);
359 if (ret < 0) {
360 return ret;
362 break;
364 case NBD_OPT_ABORT:
365 return -EINVAL;
367 case NBD_OPT_EXPORT_NAME:
368 return nbd_negotiate_handle_export_name(client, length);
370 default:
371 tmp = be32_to_cpu(tmp);
372 LOG("Unsupported option 0x%x", tmp);
373 nbd_negotiate_send_rep(client->sock, NBD_REP_ERR_UNSUP, tmp);
374 return -EINVAL;
379 typedef struct {
380 NBDClient *client;
381 Coroutine *co;
382 } NBDClientNewData;
384 static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
386 NBDClient *client = data->client;
387 int csock = client->sock;
388 char buf[8 + 8 + 8 + 128];
389 int rc;
390 const int myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
391 NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
393 /* Negotiation header without options:
394 [ 0 .. 7] passwd ("NBDMAGIC")
395 [ 8 .. 15] magic (NBD_CLIENT_MAGIC)
396 [16 .. 23] size
397 [24 .. 25] server flags (0)
398 [26 .. 27] export flags
399 [28 .. 151] reserved (0)
401 Negotiation header with options, part 1:
402 [ 0 .. 7] passwd ("NBDMAGIC")
403 [ 8 .. 15] magic (NBD_OPTS_MAGIC)
404 [16 .. 17] server flags (0)
406 part 2 (after options are sent):
407 [18 .. 25] size
408 [26 .. 27] export flags
409 [28 .. 151] reserved (0)
412 rc = -EINVAL;
414 TRACE("Beginning negotiation.");
415 memset(buf, 0, sizeof(buf));
416 memcpy(buf, "NBDMAGIC", 8);
417 if (client->exp) {
418 assert ((client->exp->nbdflags & ~65535) == 0);
419 cpu_to_be64w((uint64_t*)(buf + 8), NBD_CLIENT_MAGIC);
420 cpu_to_be64w((uint64_t*)(buf + 16), client->exp->size);
421 cpu_to_be16w((uint16_t*)(buf + 26), client->exp->nbdflags | myflags);
422 } else {
423 cpu_to_be64w((uint64_t*)(buf + 8), NBD_OPTS_MAGIC);
424 cpu_to_be16w((uint16_t *)(buf + 16), NBD_FLAG_FIXED_NEWSTYLE);
427 if (client->exp) {
428 if (nbd_negotiate_write(csock, buf, sizeof(buf)) != sizeof(buf)) {
429 LOG("write failed");
430 goto fail;
432 } else {
433 if (nbd_negotiate_write(csock, buf, 18) != 18) {
434 LOG("write failed");
435 goto fail;
437 rc = nbd_negotiate_options(client);
438 if (rc != 0) {
439 LOG("option negotiation failed");
440 goto fail;
443 assert ((client->exp->nbdflags & ~65535) == 0);
444 cpu_to_be64w((uint64_t*)(buf + 18), client->exp->size);
445 cpu_to_be16w((uint16_t*)(buf + 26), client->exp->nbdflags | myflags);
446 if (nbd_negotiate_write(csock, buf + 18,
447 sizeof(buf) - 18) != sizeof(buf) - 18) {
448 LOG("write failed");
449 goto fail;
453 TRACE("Negotiation succeeded.");
454 rc = 0;
455 fail:
456 return rc;
459 #ifdef __linux__
461 int nbd_disconnect(int fd)
463 ioctl(fd, NBD_CLEAR_QUE);
464 ioctl(fd, NBD_DISCONNECT);
465 ioctl(fd, NBD_CLEAR_SOCK);
466 return 0;
469 #else
471 int nbd_disconnect(int fd)
473 return -ENOTSUP;
475 #endif
477 static ssize_t nbd_receive_request(int csock, struct nbd_request *request)
479 uint8_t buf[NBD_REQUEST_SIZE];
480 uint32_t magic;
481 ssize_t ret;
483 ret = read_sync(csock, buf, sizeof(buf));
484 if (ret < 0) {
485 return ret;
488 if (ret != sizeof(buf)) {
489 LOG("read failed");
490 return -EINVAL;
493 /* Request
494 [ 0 .. 3] magic (NBD_REQUEST_MAGIC)
495 [ 4 .. 7] type (0 == READ, 1 == WRITE)
496 [ 8 .. 15] handle
497 [16 .. 23] from
498 [24 .. 27] len
501 magic = be32_to_cpup((uint32_t*)buf);
502 request->type = be32_to_cpup((uint32_t*)(buf + 4));
503 request->handle = be64_to_cpup((uint64_t*)(buf + 8));
504 request->from = be64_to_cpup((uint64_t*)(buf + 16));
505 request->len = be32_to_cpup((uint32_t*)(buf + 24));
507 TRACE("Got request: "
508 "{ magic = 0x%x, .type = %d, from = %" PRIu64" , len = %u }",
509 magic, request->type, request->from, request->len);
511 if (magic != NBD_REQUEST_MAGIC) {
512 LOG("invalid magic (got 0x%x)", magic);
513 return -EINVAL;
515 return 0;
518 static ssize_t nbd_send_reply(int csock, struct nbd_reply *reply)
520 uint8_t buf[NBD_REPLY_SIZE];
521 ssize_t ret;
523 reply->error = system_errno_to_nbd_errno(reply->error);
525 /* Reply
526 [ 0 .. 3] magic (NBD_REPLY_MAGIC)
527 [ 4 .. 7] error (0 == no error)
528 [ 7 .. 15] handle
530 cpu_to_be32w((uint32_t*)buf, NBD_REPLY_MAGIC);
531 cpu_to_be32w((uint32_t*)(buf + 4), reply->error);
532 cpu_to_be64w((uint64_t*)(buf + 8), reply->handle);
534 TRACE("Sending response to client");
536 ret = write_sync(csock, buf, sizeof(buf));
537 if (ret < 0) {
538 return ret;
541 if (ret != sizeof(buf)) {
542 LOG("writing to socket failed");
543 return -EINVAL;
545 return 0;
548 #define MAX_NBD_REQUESTS 16
550 void nbd_client_get(NBDClient *client)
552 client->refcount++;
555 void nbd_client_put(NBDClient *client)
557 if (--client->refcount == 0) {
558 /* The last reference should be dropped by client->close,
559 * which is called by client_close.
561 assert(client->closing);
563 nbd_unset_handlers(client);
564 close(client->sock);
565 client->sock = -1;
566 if (client->exp) {
567 QTAILQ_REMOVE(&client->exp->clients, client, next);
568 nbd_export_put(client->exp);
570 g_free(client);
574 static void client_close(NBDClient *client)
576 if (client->closing) {
577 return;
580 client->closing = true;
582 /* Force requests to finish. They will drop their own references,
583 * then we'll close the socket and free the NBDClient.
585 shutdown(client->sock, 2);
587 /* Also tell the client, so that they release their reference. */
588 if (client->close) {
589 client->close(client);
593 static NBDRequest *nbd_request_get(NBDClient *client)
595 NBDRequest *req;
597 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
598 client->nb_requests++;
599 nbd_update_can_read(client);
601 req = g_new0(NBDRequest, 1);
602 nbd_client_get(client);
603 req->client = client;
604 return req;
607 static void nbd_request_put(NBDRequest *req)
609 NBDClient *client = req->client;
611 if (req->data) {
612 qemu_vfree(req->data);
614 g_free(req);
616 client->nb_requests--;
617 nbd_update_can_read(client);
618 nbd_client_put(client);
621 static void blk_aio_attached(AioContext *ctx, void *opaque)
623 NBDExport *exp = opaque;
624 NBDClient *client;
626 TRACE("Export %s: Attaching clients to AIO context %p\n", exp->name, ctx);
628 exp->ctx = ctx;
630 QTAILQ_FOREACH(client, &exp->clients, next) {
631 nbd_set_handlers(client);
635 static void blk_aio_detach(void *opaque)
637 NBDExport *exp = opaque;
638 NBDClient *client;
640 TRACE("Export %s: Detaching clients from AIO context %p\n", exp->name, exp->ctx);
642 QTAILQ_FOREACH(client, &exp->clients, next) {
643 nbd_unset_handlers(client);
646 exp->ctx = NULL;
649 static void nbd_eject_notifier(Notifier *n, void *data)
651 NBDExport *exp = container_of(n, NBDExport, eject_notifier);
652 nbd_export_close(exp);
655 NBDExport *nbd_export_new(BlockBackend *blk, off_t dev_offset, off_t size,
656 uint32_t nbdflags, void (*close)(NBDExport *),
657 Error **errp)
659 NBDExport *exp = g_malloc0(sizeof(NBDExport));
660 exp->refcount = 1;
661 QTAILQ_INIT(&exp->clients);
662 exp->blk = blk;
663 exp->dev_offset = dev_offset;
664 exp->nbdflags = nbdflags;
665 exp->size = size < 0 ? blk_getlength(blk) : size;
666 if (exp->size < 0) {
667 error_setg_errno(errp, -exp->size,
668 "Failed to determine the NBD export's length");
669 goto fail;
671 exp->size -= exp->size % BDRV_SECTOR_SIZE;
673 exp->close = close;
674 exp->ctx = blk_get_aio_context(blk);
675 blk_ref(blk);
676 blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
678 exp->eject_notifier.notify = nbd_eject_notifier;
679 blk_add_remove_bs_notifier(blk, &exp->eject_notifier);
682 * NBD exports are used for non-shared storage migration. Make sure
683 * that BDRV_O_INACTIVE is cleared and the image is ready for write
684 * access since the export could be available before migration handover.
686 aio_context_acquire(exp->ctx);
687 blk_invalidate_cache(blk, NULL);
688 aio_context_release(exp->ctx);
689 return exp;
691 fail:
692 g_free(exp);
693 return NULL;
696 NBDExport *nbd_export_find(const char *name)
698 NBDExport *exp;
699 QTAILQ_FOREACH(exp, &exports, next) {
700 if (strcmp(name, exp->name) == 0) {
701 return exp;
705 return NULL;
708 void nbd_export_set_name(NBDExport *exp, const char *name)
710 if (exp->name == name) {
711 return;
714 nbd_export_get(exp);
715 if (exp->name != NULL) {
716 g_free(exp->name);
717 exp->name = NULL;
718 QTAILQ_REMOVE(&exports, exp, next);
719 nbd_export_put(exp);
721 if (name != NULL) {
722 nbd_export_get(exp);
723 exp->name = g_strdup(name);
724 QTAILQ_INSERT_TAIL(&exports, exp, next);
726 nbd_export_put(exp);
729 void nbd_export_close(NBDExport *exp)
731 NBDClient *client, *next;
733 nbd_export_get(exp);
734 QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) {
735 client_close(client);
737 nbd_export_set_name(exp, NULL);
738 nbd_export_put(exp);
741 void nbd_export_get(NBDExport *exp)
743 assert(exp->refcount > 0);
744 exp->refcount++;
747 void nbd_export_put(NBDExport *exp)
749 assert(exp->refcount > 0);
750 if (exp->refcount == 1) {
751 nbd_export_close(exp);
754 if (--exp->refcount == 0) {
755 assert(exp->name == NULL);
757 if (exp->close) {
758 exp->close(exp);
761 if (exp->blk) {
762 notifier_remove(&exp->eject_notifier);
763 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
764 blk_aio_detach, exp);
765 blk_unref(exp->blk);
766 exp->blk = NULL;
769 g_free(exp);
773 BlockBackend *nbd_export_get_blockdev(NBDExport *exp)
775 return exp->blk;
778 void nbd_export_close_all(void)
780 NBDExport *exp, *next;
782 QTAILQ_FOREACH_SAFE(exp, &exports, next, next) {
783 nbd_export_close(exp);
787 static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply,
788 int len)
790 NBDClient *client = req->client;
791 int csock = client->sock;
792 ssize_t rc, ret;
794 qemu_co_mutex_lock(&client->send_lock);
795 client->send_coroutine = qemu_coroutine_self();
796 nbd_set_handlers(client);
798 if (!len) {
799 rc = nbd_send_reply(csock, reply);
800 } else {
801 socket_set_cork(csock, 1);
802 rc = nbd_send_reply(csock, reply);
803 if (rc >= 0) {
804 ret = qemu_co_send(csock, req->data, len);
805 if (ret != len) {
806 rc = -EIO;
809 socket_set_cork(csock, 0);
812 client->send_coroutine = NULL;
813 nbd_set_handlers(client);
814 qemu_co_mutex_unlock(&client->send_lock);
815 return rc;
818 static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *request)
820 NBDClient *client = req->client;
821 int csock = client->sock;
822 uint32_t command;
823 ssize_t rc;
825 client->recv_coroutine = qemu_coroutine_self();
826 nbd_update_can_read(client);
828 rc = nbd_receive_request(csock, request);
829 if (rc < 0) {
830 if (rc != -EAGAIN) {
831 rc = -EIO;
833 goto out;
836 if ((request->from + request->len) < request->from) {
837 LOG("integer overflow detected! "
838 "you're probably being attacked");
839 rc = -EINVAL;
840 goto out;
843 TRACE("Decoding type");
845 command = request->type & NBD_CMD_MASK_COMMAND;
846 if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) {
847 if (request->len > NBD_MAX_BUFFER_SIZE) {
848 LOG("len (%u) is larger than max len (%u)",
849 request->len, NBD_MAX_BUFFER_SIZE);
850 rc = -EINVAL;
851 goto out;
854 req->data = blk_try_blockalign(client->exp->blk, request->len);
855 if (req->data == NULL) {
856 rc = -ENOMEM;
857 goto out;
860 if (command == NBD_CMD_WRITE) {
861 TRACE("Reading %u byte(s)", request->len);
863 if (qemu_co_recv(csock, req->data, request->len) != request->len) {
864 LOG("reading from socket failed");
865 rc = -EIO;
866 goto out;
869 rc = 0;
871 out:
872 client->recv_coroutine = NULL;
873 nbd_update_can_read(client);
875 return rc;
878 static void nbd_trip(void *opaque)
880 NBDClient *client = opaque;
881 NBDExport *exp = client->exp;
882 NBDRequest *req;
883 struct nbd_request request;
884 struct nbd_reply reply;
885 ssize_t ret;
886 uint32_t command;
888 TRACE("Reading request.");
889 if (client->closing) {
890 return;
893 req = nbd_request_get(client);
894 ret = nbd_co_receive_request(req, &request);
895 if (ret == -EAGAIN) {
896 goto done;
898 if (ret == -EIO) {
899 goto out;
902 reply.handle = request.handle;
903 reply.error = 0;
905 if (ret < 0) {
906 reply.error = -ret;
907 goto error_reply;
909 command = request.type & NBD_CMD_MASK_COMMAND;
910 if (command != NBD_CMD_DISC && (request.from + request.len) > exp->size) {
911 LOG("From: %" PRIu64 ", Len: %u, Size: %" PRIu64
912 ", Offset: %" PRIu64 "\n",
913 request.from, request.len,
914 (uint64_t)exp->size, (uint64_t)exp->dev_offset);
915 LOG("requested operation past EOF--bad client?");
916 goto invalid_request;
919 if (client->closing) {
921 * The client may be closed when we are blocked in
922 * nbd_co_receive_request()
924 goto done;
927 switch (command) {
928 case NBD_CMD_READ:
929 TRACE("Request type is READ");
931 if (request.type & NBD_CMD_FLAG_FUA) {
932 ret = blk_co_flush(exp->blk);
933 if (ret < 0) {
934 LOG("flush failed");
935 reply.error = -ret;
936 goto error_reply;
940 ret = blk_read(exp->blk,
941 (request.from + exp->dev_offset) / BDRV_SECTOR_SIZE,
942 req->data, request.len / BDRV_SECTOR_SIZE);
943 if (ret < 0) {
944 LOG("reading from file failed");
945 reply.error = -ret;
946 goto error_reply;
949 TRACE("Read %u byte(s)", request.len);
950 if (nbd_co_send_reply(req, &reply, request.len) < 0)
951 goto out;
952 break;
953 case NBD_CMD_WRITE:
954 TRACE("Request type is WRITE");
956 if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
957 TRACE("Server is read-only, return error");
958 reply.error = EROFS;
959 goto error_reply;
962 TRACE("Writing to device");
964 ret = blk_write(exp->blk,
965 (request.from + exp->dev_offset) / BDRV_SECTOR_SIZE,
966 req->data, request.len / BDRV_SECTOR_SIZE);
967 if (ret < 0) {
968 LOG("writing to file failed");
969 reply.error = -ret;
970 goto error_reply;
973 if (request.type & NBD_CMD_FLAG_FUA) {
974 ret = blk_co_flush(exp->blk);
975 if (ret < 0) {
976 LOG("flush failed");
977 reply.error = -ret;
978 goto error_reply;
982 if (nbd_co_send_reply(req, &reply, 0) < 0) {
983 goto out;
985 break;
986 case NBD_CMD_DISC:
987 TRACE("Request type is DISCONNECT");
988 errno = 0;
989 goto out;
990 case NBD_CMD_FLUSH:
991 TRACE("Request type is FLUSH");
993 ret = blk_co_flush(exp->blk);
994 if (ret < 0) {
995 LOG("flush failed");
996 reply.error = -ret;
998 if (nbd_co_send_reply(req, &reply, 0) < 0) {
999 goto out;
1001 break;
1002 case NBD_CMD_TRIM:
1003 TRACE("Request type is TRIM");
1004 ret = blk_co_discard(exp->blk, (request.from + exp->dev_offset)
1005 / BDRV_SECTOR_SIZE,
1006 request.len / BDRV_SECTOR_SIZE);
1007 if (ret < 0) {
1008 LOG("discard failed");
1009 reply.error = -ret;
1011 if (nbd_co_send_reply(req, &reply, 0) < 0) {
1012 goto out;
1014 break;
1015 default:
1016 LOG("invalid request type (%u) received", request.type);
1017 invalid_request:
1018 reply.error = EINVAL;
1019 error_reply:
1020 if (nbd_co_send_reply(req, &reply, 0) < 0) {
1021 goto out;
1023 break;
1026 TRACE("Request/Reply complete");
1028 done:
1029 nbd_request_put(req);
1030 return;
1032 out:
1033 nbd_request_put(req);
1034 client_close(client);
1037 static void nbd_read(void *opaque)
1039 NBDClient *client = opaque;
1041 if (client->recv_coroutine) {
1042 qemu_coroutine_enter(client->recv_coroutine, NULL);
1043 } else {
1044 qemu_coroutine_enter(qemu_coroutine_create(nbd_trip), client);
1048 static void nbd_restart_write(void *opaque)
1050 NBDClient *client = opaque;
1052 qemu_coroutine_enter(client->send_coroutine, NULL);
1055 static void nbd_set_handlers(NBDClient *client)
1057 if (client->exp && client->exp->ctx) {
1058 aio_set_fd_handler(client->exp->ctx, client->sock,
1059 true,
1060 client->can_read ? nbd_read : NULL,
1061 client->send_coroutine ? nbd_restart_write : NULL,
1062 client);
1066 static void nbd_unset_handlers(NBDClient *client)
1068 if (client->exp && client->exp->ctx) {
1069 aio_set_fd_handler(client->exp->ctx, client->sock,
1070 true, NULL, NULL, NULL);
1074 static void nbd_update_can_read(NBDClient *client)
1076 bool can_read = client->recv_coroutine ||
1077 client->nb_requests < MAX_NBD_REQUESTS;
1079 if (can_read != client->can_read) {
1080 client->can_read = can_read;
1081 nbd_set_handlers(client);
1083 /* There is no need to invoke aio_notify(), since aio_set_fd_handler()
1084 * in nbd_set_handlers() will have taken care of that */
1088 static coroutine_fn void nbd_co_client_start(void *opaque)
1090 NBDClientNewData *data = opaque;
1091 NBDClient *client = data->client;
1092 NBDExport *exp = client->exp;
1094 if (exp) {
1095 nbd_export_get(exp);
1097 if (nbd_negotiate(data)) {
1098 client_close(client);
1099 goto out;
1101 qemu_co_mutex_init(&client->send_lock);
1102 nbd_set_handlers(client);
1104 if (exp) {
1105 QTAILQ_INSERT_TAIL(&exp->clients, client, next);
1107 out:
1108 g_free(data);
1111 void nbd_client_new(NBDExport *exp, int csock, void (*close_fn)(NBDClient *))
1113 NBDClient *client;
1114 NBDClientNewData *data = g_new(NBDClientNewData, 1);
1116 client = g_malloc0(sizeof(NBDClient));
1117 client->refcount = 1;
1118 client->exp = exp;
1119 client->sock = csock;
1120 client->can_read = true;
1121 client->close = close_fn;
1123 data->client = client;
1124 data->co = qemu_coroutine_create(nbd_co_client_start);
1125 qemu_coroutine_enter(data->co, data);