target-microblaze: dec_barrel: Use bool instead of unsigned int
[qemu/ar7.git] / nbd / server.c
blob8a70c054a6b85c613dd54b489e07feb77c336420
1 /*
2 * Copyright (C) 2016 Red Hat, Inc.
3 * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws>
5 * Network Block Device Server Side
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; under version 2 of the License.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "qapi/error.h"
22 #include "nbd-internal.h"
24 static int system_errno_to_nbd_errno(int err)
26 switch (err) {
27 case 0:
28 return NBD_SUCCESS;
29 case EPERM:
30 case EROFS:
31 return NBD_EPERM;
32 case EIO:
33 return NBD_EIO;
34 case ENOMEM:
35 return NBD_ENOMEM;
36 #ifdef EDQUOT
37 case EDQUOT:
38 #endif
39 case EFBIG:
40 case ENOSPC:
41 return NBD_ENOSPC;
42 case ESHUTDOWN:
43 return NBD_ESHUTDOWN;
44 case EINVAL:
45 default:
46 return NBD_EINVAL;
50 /* Definitions for opaque data types */
52 typedef struct NBDRequestData NBDRequestData;
54 struct NBDRequestData {
55 QSIMPLEQ_ENTRY(NBDRequestData) entry;
56 NBDClient *client;
57 uint8_t *data;
58 bool complete;
61 struct NBDExport {
62 int refcount;
63 void (*close)(NBDExport *exp);
65 BlockBackend *blk;
66 char *name;
67 char *description;
68 off_t dev_offset;
69 off_t size;
70 uint16_t nbdflags;
71 QTAILQ_HEAD(, NBDClient) clients;
72 QTAILQ_ENTRY(NBDExport) next;
74 AioContext *ctx;
76 BlockBackend *eject_notifier_blk;
77 Notifier eject_notifier;
80 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
82 struct NBDClient {
83 int refcount;
84 void (*close_fn)(NBDClient *client, bool negotiated);
86 bool no_zeroes;
87 NBDExport *exp;
88 QCryptoTLSCreds *tlscreds;
89 char *tlsaclname;
90 QIOChannelSocket *sioc; /* The underlying data channel */
91 QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
93 Coroutine *recv_coroutine;
95 CoMutex send_lock;
96 Coroutine *send_coroutine;
98 QTAILQ_ENTRY(NBDClient) next;
99 int nb_requests;
100 bool closing;
103 /* That's all folks */
105 static void nbd_client_receive_next_request(NBDClient *client);
107 /* Basic flow for negotiation
109 Server Client
110 Negotiate
114 Server Client
115 Negotiate #1
116 Option
117 Negotiate #2
119 ----
121 followed by
123 Server Client
124 Request
125 Response
126 Request
127 Response
130 Request (type == 2)
134 /* Send a reply header, including length, but no payload.
135 * Return -errno on error, 0 on success. */
136 static int nbd_negotiate_send_rep_len(QIOChannel *ioc, uint32_t type,
137 uint32_t opt, uint32_t len)
139 uint64_t magic;
141 TRACE("Reply opt=%" PRIx32 " type=%" PRIx32 " len=%" PRIu32,
142 type, opt, len);
144 magic = cpu_to_be64(NBD_REP_MAGIC);
145 if (nbd_write(ioc, &magic, sizeof(magic), NULL) < 0) {
146 LOG("write failed (rep magic)");
147 return -EINVAL;
149 opt = cpu_to_be32(opt);
150 if (nbd_write(ioc, &opt, sizeof(opt), NULL) < 0) {
151 LOG("write failed (rep opt)");
152 return -EINVAL;
154 type = cpu_to_be32(type);
155 if (nbd_write(ioc, &type, sizeof(type), NULL) < 0) {
156 LOG("write failed (rep type)");
157 return -EINVAL;
159 len = cpu_to_be32(len);
160 if (nbd_write(ioc, &len, sizeof(len), NULL) < 0) {
161 LOG("write failed (rep data length)");
162 return -EINVAL;
164 return 0;
167 /* Send a reply header with default 0 length.
168 * Return -errno on error, 0 on success. */
169 static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt)
171 return nbd_negotiate_send_rep_len(ioc, type, opt, 0);
174 /* Send an error reply.
175 * Return -errno on error, 0 on success. */
176 static int GCC_FMT_ATTR(4, 5)
177 nbd_negotiate_send_rep_err(QIOChannel *ioc, uint32_t type,
178 uint32_t opt, const char *fmt, ...)
180 va_list va;
181 char *msg;
182 int ret;
183 size_t len;
185 va_start(va, fmt);
186 msg = g_strdup_vprintf(fmt, va);
187 va_end(va);
188 len = strlen(msg);
189 assert(len < 4096);
190 TRACE("sending error message \"%s\"", msg);
191 ret = nbd_negotiate_send_rep_len(ioc, type, opt, len);
192 if (ret < 0) {
193 goto out;
195 if (nbd_write(ioc, msg, len, NULL) < 0) {
196 LOG("write failed (error message)");
197 ret = -EIO;
198 } else {
199 ret = 0;
201 out:
202 g_free(msg);
203 return ret;
206 /* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload.
207 * Return -errno on error, 0 on success. */
208 static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp)
210 size_t name_len, desc_len;
211 uint32_t len;
212 const char *name = exp->name ? exp->name : "";
213 const char *desc = exp->description ? exp->description : "";
214 int ret;
216 TRACE("Advertising export name '%s' description '%s'", name, desc);
217 name_len = strlen(name);
218 desc_len = strlen(desc);
219 len = name_len + desc_len + sizeof(len);
220 ret = nbd_negotiate_send_rep_len(ioc, NBD_REP_SERVER, NBD_OPT_LIST, len);
221 if (ret < 0) {
222 return ret;
225 len = cpu_to_be32(name_len);
226 if (nbd_write(ioc, &len, sizeof(len), NULL) < 0) {
227 LOG("write failed (name length)");
228 return -EINVAL;
230 if (nbd_write(ioc, name, name_len, NULL) < 0) {
231 LOG("write failed (name buffer)");
232 return -EINVAL;
234 if (nbd_write(ioc, desc, desc_len, NULL) < 0) {
235 LOG("write failed (description buffer)");
236 return -EINVAL;
238 return 0;
241 /* Process the NBD_OPT_LIST command, with a potential series of replies.
242 * Return -errno on error, 0 on success. */
243 static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length)
245 NBDExport *exp;
247 if (length) {
248 if (nbd_drop(client->ioc, length, NULL) < 0) {
249 return -EIO;
251 return nbd_negotiate_send_rep_err(client->ioc,
252 NBD_REP_ERR_INVALID, NBD_OPT_LIST,
253 "OPT_LIST should not have length");
256 /* For each export, send a NBD_REP_SERVER reply. */
257 QTAILQ_FOREACH(exp, &exports, next) {
258 if (nbd_negotiate_send_rep_list(client->ioc, exp)) {
259 return -EINVAL;
262 /* Finish with a NBD_REP_ACK. */
263 return nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, NBD_OPT_LIST);
266 static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length)
268 char name[NBD_MAX_NAME_SIZE + 1];
270 /* Client sends:
271 [20 .. xx] export name (length bytes)
273 TRACE("Checking length");
274 if (length >= sizeof(name)) {
275 LOG("Bad length received");
276 return -EINVAL;
278 if (nbd_read(client->ioc, name, length, NULL) < 0) {
279 LOG("read failed");
280 return -EINVAL;
282 name[length] = '\0';
284 TRACE("Client requested export '%s'", name);
286 client->exp = nbd_export_find(name);
287 if (!client->exp) {
288 LOG("export not found");
289 return -EINVAL;
292 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
293 nbd_export_get(client->exp);
295 return 0;
298 /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
299 * new channel for all further (now-encrypted) communication. */
300 static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
301 uint32_t length)
303 QIOChannel *ioc;
304 QIOChannelTLS *tioc;
305 struct NBDTLSHandshakeData data = { 0 };
307 TRACE("Setting up TLS");
308 ioc = client->ioc;
309 if (length) {
310 if (nbd_drop(ioc, length, NULL) < 0) {
311 return NULL;
313 nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS,
314 "OPT_STARTTLS should not have length");
315 return NULL;
318 if (nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
319 NBD_OPT_STARTTLS) < 0) {
320 return NULL;
323 tioc = qio_channel_tls_new_server(ioc,
324 client->tlscreds,
325 client->tlsaclname,
326 NULL);
327 if (!tioc) {
328 return NULL;
331 qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
332 TRACE("Starting TLS handshake");
333 data.loop = g_main_loop_new(g_main_context_default(), FALSE);
334 qio_channel_tls_handshake(tioc,
335 nbd_tls_handshake,
336 &data,
337 NULL);
339 if (!data.complete) {
340 g_main_loop_run(data.loop);
342 g_main_loop_unref(data.loop);
343 if (data.error) {
344 object_unref(OBJECT(tioc));
345 error_free(data.error);
346 return NULL;
349 return QIO_CHANNEL(tioc);
353 /* Process all NBD_OPT_* client option commands.
354 * Return -errno on error, 0 on success. */
355 static int nbd_negotiate_options(NBDClient *client)
357 uint32_t flags;
358 bool fixedNewstyle = false;
360 /* Client sends:
361 [ 0 .. 3] client flags
363 [ 0 .. 7] NBD_OPTS_MAGIC
364 [ 8 .. 11] NBD option
365 [12 .. 15] Data length
366 ... Rest of request
368 [ 0 .. 7] NBD_OPTS_MAGIC
369 [ 8 .. 11] Second NBD option
370 [12 .. 15] Data length
371 ... Rest of request
374 if (nbd_read(client->ioc, &flags, sizeof(flags), NULL) < 0) {
375 LOG("read failed");
376 return -EIO;
378 TRACE("Checking client flags");
379 be32_to_cpus(&flags);
380 if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
381 TRACE("Client supports fixed newstyle handshake");
382 fixedNewstyle = true;
383 flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
385 if (flags & NBD_FLAG_C_NO_ZEROES) {
386 TRACE("Client supports no zeroes at handshake end");
387 client->no_zeroes = true;
388 flags &= ~NBD_FLAG_C_NO_ZEROES;
390 if (flags != 0) {
391 TRACE("Unknown client flags 0x%" PRIx32 " received", flags);
392 return -EIO;
395 while (1) {
396 int ret;
397 uint32_t clientflags, length;
398 uint64_t magic;
400 if (nbd_read(client->ioc, &magic, sizeof(magic), NULL) < 0) {
401 LOG("read failed");
402 return -EINVAL;
404 TRACE("Checking opts magic");
405 if (magic != be64_to_cpu(NBD_OPTS_MAGIC)) {
406 LOG("Bad magic received");
407 return -EINVAL;
410 if (nbd_read(client->ioc, &clientflags,
411 sizeof(clientflags), NULL) < 0)
413 LOG("read failed");
414 return -EINVAL;
416 clientflags = be32_to_cpu(clientflags);
418 if (nbd_read(client->ioc, &length, sizeof(length), NULL) < 0) {
419 LOG("read failed");
420 return -EINVAL;
422 length = be32_to_cpu(length);
424 TRACE("Checking option 0x%" PRIx32, clientflags);
425 if (client->tlscreds &&
426 client->ioc == (QIOChannel *)client->sioc) {
427 QIOChannel *tioc;
428 if (!fixedNewstyle) {
429 TRACE("Unsupported option 0x%" PRIx32, clientflags);
430 return -EINVAL;
432 switch (clientflags) {
433 case NBD_OPT_STARTTLS:
434 tioc = nbd_negotiate_handle_starttls(client, length);
435 if (!tioc) {
436 return -EIO;
438 object_unref(OBJECT(client->ioc));
439 client->ioc = QIO_CHANNEL(tioc);
440 break;
442 case NBD_OPT_EXPORT_NAME:
443 /* No way to return an error to client, so drop connection */
444 TRACE("Option 0x%x not permitted before TLS", clientflags);
445 return -EINVAL;
447 default:
448 if (nbd_drop(client->ioc, length, NULL) < 0) {
449 return -EIO;
451 ret = nbd_negotiate_send_rep_err(client->ioc,
452 NBD_REP_ERR_TLS_REQD,
453 clientflags,
454 "Option 0x%" PRIx32
455 "not permitted before TLS",
456 clientflags);
457 if (ret < 0) {
458 return ret;
460 /* Let the client keep trying, unless they asked to quit */
461 if (clientflags == NBD_OPT_ABORT) {
462 return -EINVAL;
464 break;
466 } else if (fixedNewstyle) {
467 switch (clientflags) {
468 case NBD_OPT_LIST:
469 ret = nbd_negotiate_handle_list(client, length);
470 if (ret < 0) {
471 return ret;
473 break;
475 case NBD_OPT_ABORT:
476 /* NBD spec says we must try to reply before
477 * disconnecting, but that we must also tolerate
478 * guests that don't wait for our reply. */
479 nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, clientflags);
480 return -EINVAL;
482 case NBD_OPT_EXPORT_NAME:
483 return nbd_negotiate_handle_export_name(client, length);
485 case NBD_OPT_STARTTLS:
486 if (nbd_drop(client->ioc, length, NULL) < 0) {
487 return -EIO;
489 if (client->tlscreds) {
490 ret = nbd_negotiate_send_rep_err(client->ioc,
491 NBD_REP_ERR_INVALID,
492 clientflags,
493 "TLS already enabled");
494 } else {
495 ret = nbd_negotiate_send_rep_err(client->ioc,
496 NBD_REP_ERR_POLICY,
497 clientflags,
498 "TLS not configured");
500 if (ret < 0) {
501 return ret;
503 break;
504 default:
505 if (nbd_drop(client->ioc, length, NULL) < 0) {
506 return -EIO;
508 ret = nbd_negotiate_send_rep_err(client->ioc,
509 NBD_REP_ERR_UNSUP,
510 clientflags,
511 "Unsupported option 0x%"
512 PRIx32,
513 clientflags);
514 if (ret < 0) {
515 return ret;
517 break;
519 } else {
521 * If broken new-style we should drop the connection
522 * for anything except NBD_OPT_EXPORT_NAME
524 switch (clientflags) {
525 case NBD_OPT_EXPORT_NAME:
526 return nbd_negotiate_handle_export_name(client, length);
528 default:
529 TRACE("Unsupported option 0x%" PRIx32, clientflags);
530 return -EINVAL;
536 static coroutine_fn int nbd_negotiate(NBDClient *client)
538 char buf[8 + 8 + 8 + 128];
539 int ret;
540 const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
541 NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
542 NBD_FLAG_SEND_WRITE_ZEROES);
543 bool oldStyle;
544 size_t len;
546 /* Old style negotiation header without options
547 [ 0 .. 7] passwd ("NBDMAGIC")
548 [ 8 .. 15] magic (NBD_CLIENT_MAGIC)
549 [16 .. 23] size
550 [24 .. 25] server flags (0)
551 [26 .. 27] export flags
552 [28 .. 151] reserved (0)
554 New style negotiation header with options
555 [ 0 .. 7] passwd ("NBDMAGIC")
556 [ 8 .. 15] magic (NBD_OPTS_MAGIC)
557 [16 .. 17] server flags (0)
558 ....options sent....
559 [18 .. 25] size
560 [26 .. 27] export flags
561 [28 .. 151] reserved (0, omit if no_zeroes)
564 qio_channel_set_blocking(client->ioc, false, NULL);
566 TRACE("Beginning negotiation.");
567 memset(buf, 0, sizeof(buf));
568 memcpy(buf, "NBDMAGIC", 8);
570 oldStyle = client->exp != NULL && !client->tlscreds;
571 if (oldStyle) {
572 TRACE("advertising size %" PRIu64 " and flags %x",
573 client->exp->size, client->exp->nbdflags | myflags);
574 stq_be_p(buf + 8, NBD_CLIENT_MAGIC);
575 stq_be_p(buf + 16, client->exp->size);
576 stw_be_p(buf + 26, client->exp->nbdflags | myflags);
577 } else {
578 stq_be_p(buf + 8, NBD_OPTS_MAGIC);
579 stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES);
582 if (oldStyle) {
583 if (client->tlscreds) {
584 TRACE("TLS cannot be enabled with oldstyle protocol");
585 return -EINVAL;
587 if (nbd_write(client->ioc, buf, sizeof(buf), NULL) < 0) {
588 LOG("write failed");
589 return -EINVAL;
591 } else {
592 if (nbd_write(client->ioc, buf, 18, NULL) < 0) {
593 LOG("write failed");
594 return -EINVAL;
596 ret = nbd_negotiate_options(client);
597 if (ret != 0) {
598 LOG("option negotiation failed");
599 return ret;
602 TRACE("advertising size %" PRIu64 " and flags %x",
603 client->exp->size, client->exp->nbdflags | myflags);
604 stq_be_p(buf + 18, client->exp->size);
605 stw_be_p(buf + 26, client->exp->nbdflags | myflags);
606 len = client->no_zeroes ? 10 : sizeof(buf) - 18;
607 ret = nbd_write(client->ioc, buf + 18, len, NULL);
608 if (ret < 0) {
609 LOG("write failed");
610 return ret;
614 TRACE("Negotiation succeeded.");
616 return 0;
619 static int nbd_receive_request(QIOChannel *ioc, NBDRequest *request)
621 uint8_t buf[NBD_REQUEST_SIZE];
622 uint32_t magic;
623 int ret;
625 ret = nbd_read(ioc, buf, sizeof(buf), NULL);
626 if (ret < 0) {
627 return ret;
630 /* Request
631 [ 0 .. 3] magic (NBD_REQUEST_MAGIC)
632 [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, ...)
633 [ 6 .. 7] type (NBD_CMD_READ, ...)
634 [ 8 .. 15] handle
635 [16 .. 23] from
636 [24 .. 27] len
639 magic = ldl_be_p(buf);
640 request->flags = lduw_be_p(buf + 4);
641 request->type = lduw_be_p(buf + 6);
642 request->handle = ldq_be_p(buf + 8);
643 request->from = ldq_be_p(buf + 16);
644 request->len = ldl_be_p(buf + 24);
646 TRACE("Got request: { magic = 0x%" PRIx32 ", .flags = %" PRIx16
647 ", .type = %" PRIx16 ", from = %" PRIu64 ", len = %" PRIu32 " }",
648 magic, request->flags, request->type, request->from, request->len);
650 if (magic != NBD_REQUEST_MAGIC) {
651 LOG("invalid magic (got 0x%" PRIx32 ")", magic);
652 return -EINVAL;
654 return 0;
657 static int nbd_send_reply(QIOChannel *ioc, NBDReply *reply)
659 uint8_t buf[NBD_REPLY_SIZE];
661 reply->error = system_errno_to_nbd_errno(reply->error);
663 TRACE("Sending response to client: { .error = %" PRId32
664 ", handle = %" PRIu64 " }",
665 reply->error, reply->handle);
667 /* Reply
668 [ 0 .. 3] magic (NBD_REPLY_MAGIC)
669 [ 4 .. 7] error (0 == no error)
670 [ 7 .. 15] handle
672 stl_be_p(buf, NBD_REPLY_MAGIC);
673 stl_be_p(buf + 4, reply->error);
674 stq_be_p(buf + 8, reply->handle);
676 return nbd_write(ioc, buf, sizeof(buf), NULL);
679 #define MAX_NBD_REQUESTS 16
681 void nbd_client_get(NBDClient *client)
683 client->refcount++;
686 void nbd_client_put(NBDClient *client)
688 if (--client->refcount == 0) {
689 /* The last reference should be dropped by client->close,
690 * which is called by client_close.
692 assert(client->closing);
694 qio_channel_detach_aio_context(client->ioc);
695 object_unref(OBJECT(client->sioc));
696 object_unref(OBJECT(client->ioc));
697 if (client->tlscreds) {
698 object_unref(OBJECT(client->tlscreds));
700 g_free(client->tlsaclname);
701 if (client->exp) {
702 QTAILQ_REMOVE(&client->exp->clients, client, next);
703 nbd_export_put(client->exp);
705 g_free(client);
709 static void client_close(NBDClient *client, bool negotiated)
711 if (client->closing) {
712 return;
715 client->closing = true;
717 /* Force requests to finish. They will drop their own references,
718 * then we'll close the socket and free the NBDClient.
720 qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH,
721 NULL);
723 /* Also tell the client, so that they release their reference. */
724 if (client->close_fn) {
725 client->close_fn(client, negotiated);
729 static NBDRequestData *nbd_request_get(NBDClient *client)
731 NBDRequestData *req;
733 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
734 client->nb_requests++;
736 req = g_new0(NBDRequestData, 1);
737 nbd_client_get(client);
738 req->client = client;
739 return req;
742 static void nbd_request_put(NBDRequestData *req)
744 NBDClient *client = req->client;
746 if (req->data) {
747 qemu_vfree(req->data);
749 g_free(req);
751 client->nb_requests--;
752 nbd_client_receive_next_request(client);
754 nbd_client_put(client);
757 static void blk_aio_attached(AioContext *ctx, void *opaque)
759 NBDExport *exp = opaque;
760 NBDClient *client;
762 TRACE("Export %s: Attaching clients to AIO context %p\n", exp->name, ctx);
764 exp->ctx = ctx;
766 QTAILQ_FOREACH(client, &exp->clients, next) {
767 qio_channel_attach_aio_context(client->ioc, ctx);
768 if (client->recv_coroutine) {
769 aio_co_schedule(ctx, client->recv_coroutine);
771 if (client->send_coroutine) {
772 aio_co_schedule(ctx, client->send_coroutine);
777 static void blk_aio_detach(void *opaque)
779 NBDExport *exp = opaque;
780 NBDClient *client;
782 TRACE("Export %s: Detaching clients from AIO context %p\n", exp->name, exp->ctx);
784 QTAILQ_FOREACH(client, &exp->clients, next) {
785 qio_channel_detach_aio_context(client->ioc);
788 exp->ctx = NULL;
791 static void nbd_eject_notifier(Notifier *n, void *data)
793 NBDExport *exp = container_of(n, NBDExport, eject_notifier);
794 nbd_export_close(exp);
797 NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size,
798 uint16_t nbdflags, void (*close)(NBDExport *),
799 bool writethrough, BlockBackend *on_eject_blk,
800 Error **errp)
802 BlockBackend *blk;
803 NBDExport *exp = g_malloc0(sizeof(NBDExport));
804 uint64_t perm;
805 int ret;
807 /* Don't allow resize while the NBD server is running, otherwise we don't
808 * care what happens with the node. */
809 perm = BLK_PERM_CONSISTENT_READ;
810 if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) {
811 perm |= BLK_PERM_WRITE;
813 blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
814 BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
815 ret = blk_insert_bs(blk, bs, errp);
816 if (ret < 0) {
817 goto fail;
819 blk_set_enable_write_cache(blk, !writethrough);
821 exp->refcount = 1;
822 QTAILQ_INIT(&exp->clients);
823 exp->blk = blk;
824 exp->dev_offset = dev_offset;
825 exp->nbdflags = nbdflags;
826 exp->size = size < 0 ? blk_getlength(blk) : size;
827 if (exp->size < 0) {
828 error_setg_errno(errp, -exp->size,
829 "Failed to determine the NBD export's length");
830 goto fail;
832 exp->size -= exp->size % BDRV_SECTOR_SIZE;
834 exp->close = close;
835 exp->ctx = blk_get_aio_context(blk);
836 blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
838 if (on_eject_blk) {
839 blk_ref(on_eject_blk);
840 exp->eject_notifier_blk = on_eject_blk;
841 exp->eject_notifier.notify = nbd_eject_notifier;
842 blk_add_remove_bs_notifier(on_eject_blk, &exp->eject_notifier);
846 * NBD exports are used for non-shared storage migration. Make sure
847 * that BDRV_O_INACTIVE is cleared and the image is ready for write
848 * access since the export could be available before migration handover.
850 aio_context_acquire(exp->ctx);
851 blk_invalidate_cache(blk, NULL);
852 aio_context_release(exp->ctx);
853 return exp;
855 fail:
856 blk_unref(blk);
857 g_free(exp);
858 return NULL;
861 NBDExport *nbd_export_find(const char *name)
863 NBDExport *exp;
864 QTAILQ_FOREACH(exp, &exports, next) {
865 if (strcmp(name, exp->name) == 0) {
866 return exp;
870 return NULL;
873 void nbd_export_set_name(NBDExport *exp, const char *name)
875 if (exp->name == name) {
876 return;
879 nbd_export_get(exp);
880 if (exp->name != NULL) {
881 g_free(exp->name);
882 exp->name = NULL;
883 QTAILQ_REMOVE(&exports, exp, next);
884 nbd_export_put(exp);
886 if (name != NULL) {
887 nbd_export_get(exp);
888 exp->name = g_strdup(name);
889 QTAILQ_INSERT_TAIL(&exports, exp, next);
891 nbd_export_put(exp);
894 void nbd_export_set_description(NBDExport *exp, const char *description)
896 g_free(exp->description);
897 exp->description = g_strdup(description);
900 void nbd_export_close(NBDExport *exp)
902 NBDClient *client, *next;
904 nbd_export_get(exp);
905 QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) {
906 client_close(client, true);
908 nbd_export_set_name(exp, NULL);
909 nbd_export_set_description(exp, NULL);
910 nbd_export_put(exp);
913 void nbd_export_get(NBDExport *exp)
915 assert(exp->refcount > 0);
916 exp->refcount++;
919 void nbd_export_put(NBDExport *exp)
921 assert(exp->refcount > 0);
922 if (exp->refcount == 1) {
923 nbd_export_close(exp);
926 if (--exp->refcount == 0) {
927 assert(exp->name == NULL);
928 assert(exp->description == NULL);
930 if (exp->close) {
931 exp->close(exp);
934 if (exp->blk) {
935 if (exp->eject_notifier_blk) {
936 notifier_remove(&exp->eject_notifier);
937 blk_unref(exp->eject_notifier_blk);
939 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
940 blk_aio_detach, exp);
941 blk_unref(exp->blk);
942 exp->blk = NULL;
945 g_free(exp);
949 BlockBackend *nbd_export_get_blockdev(NBDExport *exp)
951 return exp->blk;
954 void nbd_export_close_all(void)
956 NBDExport *exp, *next;
958 QTAILQ_FOREACH_SAFE(exp, &exports, next, next) {
959 nbd_export_close(exp);
963 static int nbd_co_send_reply(NBDRequestData *req, NBDReply *reply, int len)
965 NBDClient *client = req->client;
966 int ret;
968 g_assert(qemu_in_coroutine());
969 qemu_co_mutex_lock(&client->send_lock);
970 client->send_coroutine = qemu_coroutine_self();
972 if (!len) {
973 ret = nbd_send_reply(client->ioc, reply);
974 } else {
975 qio_channel_set_cork(client->ioc, true);
976 ret = nbd_send_reply(client->ioc, reply);
977 if (ret == 0) {
978 ret = nbd_write(client->ioc, req->data, len, NULL);
979 if (ret < 0) {
980 ret = -EIO;
983 qio_channel_set_cork(client->ioc, false);
986 client->send_coroutine = NULL;
987 qemu_co_mutex_unlock(&client->send_lock);
988 return ret;
991 /* nbd_co_receive_request
992 * Collect a client request. Return 0 if request looks valid, -EIO to drop
993 * connection right away, and any other negative value to report an error to
994 * the client (although the caller may still need to disconnect after reporting
995 * the error).
997 static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request)
999 NBDClient *client = req->client;
1001 g_assert(qemu_in_coroutine());
1002 assert(client->recv_coroutine == qemu_coroutine_self());
1003 if (nbd_receive_request(client->ioc, request) < 0) {
1004 return -EIO;
1007 TRACE("Decoding type");
1009 if (request->type != NBD_CMD_WRITE) {
1010 /* No payload, we are ready to read the next request. */
1011 req->complete = true;
1014 if (request->type == NBD_CMD_DISC) {
1015 /* Special case: we're going to disconnect without a reply,
1016 * whether or not flags, from, or len are bogus */
1017 TRACE("Request type is DISCONNECT");
1018 return -EIO;
1021 /* Check for sanity in the parameters, part 1. Defer as many
1022 * checks as possible until after reading any NBD_CMD_WRITE
1023 * payload, so we can try and keep the connection alive. */
1024 if ((request->from + request->len) < request->from) {
1025 LOG("integer overflow detected, you're probably being attacked");
1026 return -EINVAL;
1029 if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE) {
1030 if (request->len > NBD_MAX_BUFFER_SIZE) {
1031 LOG("len (%" PRIu32" ) is larger than max len (%u)",
1032 request->len, NBD_MAX_BUFFER_SIZE);
1033 return -EINVAL;
1036 req->data = blk_try_blockalign(client->exp->blk, request->len);
1037 if (req->data == NULL) {
1038 return -ENOMEM;
1041 if (request->type == NBD_CMD_WRITE) {
1042 TRACE("Reading %" PRIu32 " byte(s)", request->len);
1044 if (nbd_read(client->ioc, req->data, request->len, NULL) < 0) {
1045 LOG("reading from socket failed");
1046 return -EIO;
1048 req->complete = true;
1051 /* Sanity checks, part 2. */
1052 if (request->from + request->len > client->exp->size) {
1053 LOG("operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
1054 ", Size: %" PRIu64, request->from, request->len,
1055 (uint64_t)client->exp->size);
1056 return request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
1058 if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
1059 LOG("unsupported flags (got 0x%x)", request->flags);
1060 return -EINVAL;
1062 if (request->type != NBD_CMD_WRITE_ZEROES &&
1063 (request->flags & NBD_CMD_FLAG_NO_HOLE)) {
1064 LOG("unexpected flags (got 0x%x)", request->flags);
1065 return -EINVAL;
1068 return 0;
1071 /* Owns a reference to the NBDClient passed as opaque. */
1072 static coroutine_fn void nbd_trip(void *opaque)
1074 NBDClient *client = opaque;
1075 NBDExport *exp = client->exp;
1076 NBDRequestData *req;
1077 NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */
1078 NBDReply reply;
1079 int ret;
1080 int flags;
1081 int reply_data_len = 0;
1083 TRACE("Reading request.");
1084 if (client->closing) {
1085 nbd_client_put(client);
1086 return;
1089 req = nbd_request_get(client);
1090 ret = nbd_co_receive_request(req, &request);
1091 client->recv_coroutine = NULL;
1092 nbd_client_receive_next_request(client);
1093 if (ret == -EIO) {
1094 goto disconnect;
1097 reply.handle = request.handle;
1098 reply.error = 0;
1100 if (ret < 0) {
1101 reply.error = -ret;
1102 goto reply;
1105 if (client->closing) {
1107 * The client may be closed when we are blocked in
1108 * nbd_co_receive_request()
1110 goto done;
1113 switch (request.type) {
1114 case NBD_CMD_READ:
1115 TRACE("Request type is READ");
1117 /* XXX: NBD Protocol only documents use of FUA with WRITE */
1118 if (request.flags & NBD_CMD_FLAG_FUA) {
1119 ret = blk_co_flush(exp->blk);
1120 if (ret < 0) {
1121 LOG("flush failed");
1122 reply.error = -ret;
1123 break;
1127 ret = blk_pread(exp->blk, request.from + exp->dev_offset,
1128 req->data, request.len);
1129 if (ret < 0) {
1130 LOG("reading from file failed");
1131 reply.error = -ret;
1132 break;
1135 reply_data_len = request.len;
1136 TRACE("Read %" PRIu32" byte(s)", request.len);
1138 break;
1139 case NBD_CMD_WRITE:
1140 TRACE("Request type is WRITE");
1142 if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
1143 TRACE("Server is read-only, return error");
1144 reply.error = EROFS;
1145 break;
1148 TRACE("Writing to device");
1150 flags = 0;
1151 if (request.flags & NBD_CMD_FLAG_FUA) {
1152 flags |= BDRV_REQ_FUA;
1154 ret = blk_pwrite(exp->blk, request.from + exp->dev_offset,
1155 req->data, request.len, flags);
1156 if (ret < 0) {
1157 LOG("writing to file failed");
1158 reply.error = -ret;
1161 break;
1162 case NBD_CMD_WRITE_ZEROES:
1163 TRACE("Request type is WRITE_ZEROES");
1165 if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
1166 TRACE("Server is read-only, return error");
1167 reply.error = EROFS;
1168 break;
1171 TRACE("Writing to device");
1173 flags = 0;
1174 if (request.flags & NBD_CMD_FLAG_FUA) {
1175 flags |= BDRV_REQ_FUA;
1177 if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) {
1178 flags |= BDRV_REQ_MAY_UNMAP;
1180 ret = blk_pwrite_zeroes(exp->blk, request.from + exp->dev_offset,
1181 request.len, flags);
1182 if (ret < 0) {
1183 LOG("writing to file failed");
1184 reply.error = -ret;
1187 break;
1188 case NBD_CMD_DISC:
1189 /* unreachable, thanks to special case in nbd_co_receive_request() */
1190 abort();
1192 case NBD_CMD_FLUSH:
1193 TRACE("Request type is FLUSH");
1195 ret = blk_co_flush(exp->blk);
1196 if (ret < 0) {
1197 LOG("flush failed");
1198 reply.error = -ret;
1201 break;
1202 case NBD_CMD_TRIM:
1203 TRACE("Request type is TRIM");
1204 ret = blk_co_pdiscard(exp->blk, request.from + exp->dev_offset,
1205 request.len);
1206 if (ret < 0) {
1207 LOG("discard failed");
1208 reply.error = -ret;
1211 break;
1212 default:
1213 LOG("invalid request type (%" PRIu32 ") received", request.type);
1214 reply.error = EINVAL;
1217 reply:
1218 /* We must disconnect after NBD_CMD_WRITE if we did not
1219 * read the payload.
1221 if (nbd_co_send_reply(req, &reply, reply_data_len) < 0 || !req->complete) {
1222 goto disconnect;
1225 TRACE("Request/Reply complete");
1227 done:
1228 nbd_request_put(req);
1229 nbd_client_put(client);
1230 return;
1232 disconnect:
1233 nbd_request_put(req);
1234 client_close(client, true);
1235 nbd_client_put(client);
1238 static void nbd_client_receive_next_request(NBDClient *client)
1240 if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS) {
1241 nbd_client_get(client);
1242 client->recv_coroutine = qemu_coroutine_create(nbd_trip, client);
1243 aio_co_schedule(client->exp->ctx, client->recv_coroutine);
1247 static coroutine_fn void nbd_co_client_start(void *opaque)
1249 NBDClient *client = opaque;
1250 NBDExport *exp = client->exp;
1252 if (exp) {
1253 nbd_export_get(exp);
1254 QTAILQ_INSERT_TAIL(&exp->clients, client, next);
1256 qemu_co_mutex_init(&client->send_lock);
1258 if (nbd_negotiate(client)) {
1259 client_close(client, false);
1260 return;
1263 nbd_client_receive_next_request(client);
1267 * Create a new client listener on the given export @exp, using the
1268 * given channel @sioc. Begin servicing it in a coroutine. When the
1269 * connection closes, call @close_fn with an indication of whether the
1270 * client completed negotiation.
1272 void nbd_client_new(NBDExport *exp,
1273 QIOChannelSocket *sioc,
1274 QCryptoTLSCreds *tlscreds,
1275 const char *tlsaclname,
1276 void (*close_fn)(NBDClient *, bool))
1278 NBDClient *client;
1279 Coroutine *co;
1281 client = g_malloc0(sizeof(NBDClient));
1282 client->refcount = 1;
1283 client->exp = exp;
1284 client->tlscreds = tlscreds;
1285 if (tlscreds) {
1286 object_ref(OBJECT(client->tlscreds));
1288 client->tlsaclname = g_strdup(tlsaclname);
1289 client->sioc = sioc;
1290 object_ref(OBJECT(client->sioc));
1291 client->ioc = QIO_CHANNEL(sioc);
1292 object_ref(OBJECT(client->ioc));
1293 client->close_fn = close_fn;
1295 co = qemu_coroutine_create(nbd_co_client_start, client);
1296 qemu_coroutine_enter(co);