ipmi: add a realize function to the device class
[qemu/ar7.git] / block / iscsi.c
blob128ea79c13f3c98809cb4201b5ad6825422a85ba
1 /*
2 * QEMU Block driver for iSCSI images
4 * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
5 * Copyright (c) 2012-2015 Peter Lieven <pl@kamp.de>
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
26 #include "qemu/osdep.h"
28 #include <poll.h>
29 #include <math.h>
30 #include <arpa/inet.h>
31 #include "qemu-common.h"
32 #include "qemu/config-file.h"
33 #include "qemu/error-report.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "block/block_int.h"
37 #include "block/scsi.h"
38 #include "qemu/iov.h"
39 #include "sysemu/sysemu.h"
40 #include "qmp-commands.h"
41 #include "qapi/qmp/qstring.h"
42 #include "crypto/secret.h"
44 #include <iscsi/iscsi.h>
45 #include <iscsi/scsi-lowlevel.h>
47 #ifdef __linux__
48 #include <scsi/sg.h>
49 #include <block/scsi.h>
50 #endif
52 typedef struct IscsiLun {
53 struct iscsi_context *iscsi;
54 AioContext *aio_context;
55 int lun;
56 enum scsi_inquiry_peripheral_device_type type;
57 int block_size;
58 uint64_t num_blocks;
59 int events;
60 QEMUTimer *nop_timer;
61 QEMUTimer *event_timer;
62 struct scsi_inquiry_logical_block_provisioning lbp;
63 struct scsi_inquiry_block_limits bl;
64 unsigned char *zeroblock;
65 unsigned long *allocationmap;
66 int cluster_sectors;
67 bool use_16_for_rw;
68 bool write_protected;
69 bool lbpme;
70 bool lbprz;
71 bool dpofua;
72 bool has_write_same;
73 bool force_next_flush;
74 bool request_timed_out;
75 } IscsiLun;
77 typedef struct IscsiTask {
78 int status;
79 int complete;
80 int retries;
81 int do_retry;
82 struct scsi_task *task;
83 Coroutine *co;
84 QEMUBH *bh;
85 IscsiLun *iscsilun;
86 QEMUTimer retry_timer;
87 bool force_next_flush;
88 int err_code;
89 } IscsiTask;
91 typedef struct IscsiAIOCB {
92 BlockAIOCB common;
93 QEMUIOVector *qiov;
94 QEMUBH *bh;
95 IscsiLun *iscsilun;
96 struct scsi_task *task;
97 uint8_t *buf;
98 int status;
99 int64_t sector_num;
100 int nb_sectors;
101 int ret;
102 #ifdef __linux__
103 sg_io_hdr_t *ioh;
104 #endif
105 } IscsiAIOCB;
107 /* libiscsi uses time_t so its enough to process events every second */
108 #define EVENT_INTERVAL 1000
109 #define NOP_INTERVAL 5000
110 #define MAX_NOP_FAILURES 3
111 #define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
112 static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048, 8192, 32768};
114 /* this threshold is a trade-off knob to choose between
115 * the potential additional overhead of an extra GET_LBA_STATUS request
116 * vs. unnecessarily reading a lot of zero sectors over the wire.
117 * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES
118 * sectors we check the allocation status of the area covered by the
119 * request first if the allocationmap indicates that the area might be
120 * unallocated. */
121 #define ISCSI_CHECKALLOC_THRES 64
123 static void
124 iscsi_bh_cb(void *p)
126 IscsiAIOCB *acb = p;
128 qemu_bh_delete(acb->bh);
130 g_free(acb->buf);
131 acb->buf = NULL;
133 acb->common.cb(acb->common.opaque, acb->status);
135 if (acb->task != NULL) {
136 scsi_free_scsi_task(acb->task);
137 acb->task = NULL;
140 qemu_aio_unref(acb);
143 static void
144 iscsi_schedule_bh(IscsiAIOCB *acb)
146 if (acb->bh) {
147 return;
149 acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
150 qemu_bh_schedule(acb->bh);
153 static void iscsi_co_generic_bh_cb(void *opaque)
155 struct IscsiTask *iTask = opaque;
156 iTask->complete = 1;
157 qemu_bh_delete(iTask->bh);
158 qemu_coroutine_enter(iTask->co, NULL);
161 static void iscsi_retry_timer_expired(void *opaque)
163 struct IscsiTask *iTask = opaque;
164 iTask->complete = 1;
165 if (iTask->co) {
166 qemu_coroutine_enter(iTask->co, NULL);
170 static inline unsigned exp_random(double mean)
172 return -mean * log((double)rand() / RAND_MAX);
175 /* SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST was introduced in
176 * libiscsi 1.10.0, together with other constants we need. Use it as
177 * a hint that we have to define them ourselves if needed, to keep the
178 * minimum required libiscsi version at 1.9.0. We use an ASCQ macro for
179 * the test because SCSI_STATUS_* is an enum.
181 * To guard against future changes where SCSI_SENSE_ASCQ_* also becomes
182 * an enum, check against the LIBISCSI_API_VERSION macro, which was
183 * introduced in 1.11.0. If it is present, there is no need to define
184 * anything.
186 #if !defined(SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST) && \
187 !defined(LIBISCSI_API_VERSION)
188 #define SCSI_STATUS_TASK_SET_FULL 0x28
189 #define SCSI_STATUS_TIMEOUT 0x0f000002
190 #define SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST 0x2600
191 #define SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR 0x1a00
192 #endif
194 static int iscsi_translate_sense(struct scsi_sense *sense)
196 int ret;
198 switch (sense->key) {
199 case SCSI_SENSE_NOT_READY:
200 return -EBUSY;
201 case SCSI_SENSE_DATA_PROTECTION:
202 return -EACCES;
203 case SCSI_SENSE_COMMAND_ABORTED:
204 return -ECANCELED;
205 case SCSI_SENSE_ILLEGAL_REQUEST:
206 /* Parse ASCQ */
207 break;
208 default:
209 return -EIO;
211 switch (sense->ascq) {
212 case SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR:
213 case SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE:
214 case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB:
215 case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST:
216 ret = -EINVAL;
217 break;
218 case SCSI_SENSE_ASCQ_LBA_OUT_OF_RANGE:
219 ret = -ENOSPC;
220 break;
221 case SCSI_SENSE_ASCQ_LOGICAL_UNIT_NOT_SUPPORTED:
222 ret = -ENOTSUP;
223 break;
224 case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT:
225 case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_CLOSED:
226 case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_OPEN:
227 ret = -ENOMEDIUM;
228 break;
229 case SCSI_SENSE_ASCQ_WRITE_PROTECTED:
230 ret = -EACCES;
231 break;
232 default:
233 ret = -EIO;
234 break;
236 return ret;
239 static void
240 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
241 void *command_data, void *opaque)
243 struct IscsiTask *iTask = opaque;
244 struct scsi_task *task = command_data;
246 iTask->status = status;
247 iTask->do_retry = 0;
248 iTask->task = task;
250 if (status != SCSI_STATUS_GOOD) {
251 if (iTask->retries++ < ISCSI_CMD_RETRIES) {
252 if (status == SCSI_STATUS_CHECK_CONDITION
253 && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
254 error_report("iSCSI CheckCondition: %s",
255 iscsi_get_error(iscsi));
256 iTask->do_retry = 1;
257 goto out;
259 if (status == SCSI_STATUS_BUSY ||
260 status == SCSI_STATUS_TIMEOUT ||
261 status == SCSI_STATUS_TASK_SET_FULL) {
262 unsigned retry_time =
263 exp_random(iscsi_retry_times[iTask->retries - 1]);
264 if (status == SCSI_STATUS_TIMEOUT) {
265 /* make sure the request is rescheduled AFTER the
266 * reconnect is initiated */
267 retry_time = EVENT_INTERVAL * 2;
268 iTask->iscsilun->request_timed_out = true;
270 error_report("iSCSI Busy/TaskSetFull/TimeOut"
271 " (retry #%u in %u ms): %s",
272 iTask->retries, retry_time,
273 iscsi_get_error(iscsi));
274 aio_timer_init(iTask->iscsilun->aio_context,
275 &iTask->retry_timer, QEMU_CLOCK_REALTIME,
276 SCALE_MS, iscsi_retry_timer_expired, iTask);
277 timer_mod(&iTask->retry_timer,
278 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
279 iTask->do_retry = 1;
280 return;
283 iTask->err_code = iscsi_translate_sense(&task->sense);
284 error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
285 } else {
286 iTask->iscsilun->force_next_flush |= iTask->force_next_flush;
289 out:
290 if (iTask->co) {
291 iTask->bh = aio_bh_new(iTask->iscsilun->aio_context,
292 iscsi_co_generic_bh_cb, iTask);
293 qemu_bh_schedule(iTask->bh);
294 } else {
295 iTask->complete = 1;
299 static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
301 *iTask = (struct IscsiTask) {
302 .co = qemu_coroutine_self(),
303 .iscsilun = iscsilun,
307 static void
308 iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
309 void *private_data)
311 IscsiAIOCB *acb = private_data;
313 acb->status = -ECANCELED;
314 iscsi_schedule_bh(acb);
317 static void
318 iscsi_aio_cancel(BlockAIOCB *blockacb)
320 IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
321 IscsiLun *iscsilun = acb->iscsilun;
323 if (acb->status != -EINPROGRESS) {
324 return;
327 /* send a task mgmt call to the target to cancel the task on the target */
328 iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
329 iscsi_abort_task_cb, acb);
333 static const AIOCBInfo iscsi_aiocb_info = {
334 .aiocb_size = sizeof(IscsiAIOCB),
335 .cancel_async = iscsi_aio_cancel,
339 static void iscsi_process_read(void *arg);
340 static void iscsi_process_write(void *arg);
342 static void
343 iscsi_set_events(IscsiLun *iscsilun)
345 struct iscsi_context *iscsi = iscsilun->iscsi;
346 int ev = iscsi_which_events(iscsi);
348 if (ev != iscsilun->events) {
349 aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi),
350 false,
351 (ev & POLLIN) ? iscsi_process_read : NULL,
352 (ev & POLLOUT) ? iscsi_process_write : NULL,
353 iscsilun);
354 iscsilun->events = ev;
358 static void iscsi_timed_check_events(void *opaque)
360 IscsiLun *iscsilun = opaque;
362 /* check for timed out requests */
363 iscsi_service(iscsilun->iscsi, 0);
365 if (iscsilun->request_timed_out) {
366 iscsilun->request_timed_out = false;
367 iscsi_reconnect(iscsilun->iscsi);
370 /* newer versions of libiscsi may return zero events. Ensure we are able
371 * to return to service once this situation changes. */
372 iscsi_set_events(iscsilun);
374 timer_mod(iscsilun->event_timer,
375 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
378 static void
379 iscsi_process_read(void *arg)
381 IscsiLun *iscsilun = arg;
382 struct iscsi_context *iscsi = iscsilun->iscsi;
384 iscsi_service(iscsi, POLLIN);
385 iscsi_set_events(iscsilun);
388 static void
389 iscsi_process_write(void *arg)
391 IscsiLun *iscsilun = arg;
392 struct iscsi_context *iscsi = iscsilun->iscsi;
394 iscsi_service(iscsi, POLLOUT);
395 iscsi_set_events(iscsilun);
398 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
400 return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
403 static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
405 return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
408 static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
409 IscsiLun *iscsilun)
411 if ((sector_num * BDRV_SECTOR_SIZE) % iscsilun->block_size ||
412 (nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size) {
413 error_report("iSCSI misaligned request: "
414 "iscsilun->block_size %u, sector_num %" PRIi64
415 ", nb_sectors %d",
416 iscsilun->block_size, sector_num, nb_sectors);
417 return 0;
419 return 1;
422 static unsigned long *iscsi_allocationmap_init(IscsiLun *iscsilun)
424 return bitmap_try_new(DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks,
425 iscsilun),
426 iscsilun->cluster_sectors));
429 static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num,
430 int nb_sectors)
432 if (iscsilun->allocationmap == NULL) {
433 return;
435 bitmap_set(iscsilun->allocationmap,
436 sector_num / iscsilun->cluster_sectors,
437 DIV_ROUND_UP(nb_sectors, iscsilun->cluster_sectors));
440 static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
441 int nb_sectors)
443 int64_t cluster_num, nb_clusters;
444 if (iscsilun->allocationmap == NULL) {
445 return;
447 cluster_num = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
448 nb_clusters = (sector_num + nb_sectors) / iscsilun->cluster_sectors
449 - cluster_num;
450 if (nb_clusters > 0) {
451 bitmap_clear(iscsilun->allocationmap, cluster_num, nb_clusters);
455 static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
456 int64_t sector_num, int nb_sectors,
457 QEMUIOVector *iov)
459 IscsiLun *iscsilun = bs->opaque;
460 struct IscsiTask iTask;
461 uint64_t lba;
462 uint32_t num_sectors;
463 int fua;
465 if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
466 return -EINVAL;
469 if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
470 error_report("iSCSI Error: Write of %d sectors exceeds max_xfer_len "
471 "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
472 return -EINVAL;
475 lba = sector_qemu2lun(sector_num, iscsilun);
476 num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
477 iscsi_co_init_iscsitask(iscsilun, &iTask);
478 retry:
479 fua = iscsilun->dpofua && !bs->enable_write_cache;
480 iTask.force_next_flush = !fua;
481 if (iscsilun->use_16_for_rw) {
482 iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
483 NULL, num_sectors * iscsilun->block_size,
484 iscsilun->block_size, 0, 0, fua, 0, 0,
485 iscsi_co_generic_cb, &iTask);
486 } else {
487 iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
488 NULL, num_sectors * iscsilun->block_size,
489 iscsilun->block_size, 0, 0, fua, 0, 0,
490 iscsi_co_generic_cb, &iTask);
492 if (iTask.task == NULL) {
493 return -ENOMEM;
495 scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
496 iov->niov);
497 while (!iTask.complete) {
498 iscsi_set_events(iscsilun);
499 qemu_coroutine_yield();
502 if (iTask.task != NULL) {
503 scsi_free_scsi_task(iTask.task);
504 iTask.task = NULL;
507 if (iTask.do_retry) {
508 iTask.complete = 0;
509 goto retry;
512 if (iTask.status != SCSI_STATUS_GOOD) {
513 return iTask.err_code;
516 iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
518 return 0;
522 static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
523 int64_t sector_num, int nb_sectors)
525 unsigned long size;
526 if (iscsilun->allocationmap == NULL) {
527 return true;
529 size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
530 return !(find_next_bit(iscsilun->allocationmap, size,
531 sector_num / iscsilun->cluster_sectors) == size);
534 static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
535 int64_t sector_num,
536 int nb_sectors, int *pnum,
537 BlockDriverState **file)
539 IscsiLun *iscsilun = bs->opaque;
540 struct scsi_get_lba_status *lbas = NULL;
541 struct scsi_lba_status_descriptor *lbasd = NULL;
542 struct IscsiTask iTask;
543 int64_t ret;
545 iscsi_co_init_iscsitask(iscsilun, &iTask);
547 if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
548 ret = -EINVAL;
549 goto out;
552 /* default to all sectors allocated */
553 ret = BDRV_BLOCK_DATA;
554 ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
555 *pnum = nb_sectors;
557 /* LUN does not support logical block provisioning */
558 if (!iscsilun->lbpme) {
559 goto out;
562 retry:
563 if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
564 sector_qemu2lun(sector_num, iscsilun),
565 8 + 16, iscsi_co_generic_cb,
566 &iTask) == NULL) {
567 ret = -ENOMEM;
568 goto out;
571 while (!iTask.complete) {
572 iscsi_set_events(iscsilun);
573 qemu_coroutine_yield();
576 if (iTask.do_retry) {
577 if (iTask.task != NULL) {
578 scsi_free_scsi_task(iTask.task);
579 iTask.task = NULL;
581 iTask.complete = 0;
582 goto retry;
585 if (iTask.status != SCSI_STATUS_GOOD) {
586 /* in case the get_lba_status_callout fails (i.e.
587 * because the device is busy or the cmd is not
588 * supported) we pretend all blocks are allocated
589 * for backwards compatibility */
590 goto out;
593 lbas = scsi_datain_unmarshall(iTask.task);
594 if (lbas == NULL) {
595 ret = -EIO;
596 goto out;
599 lbasd = &lbas->descriptors[0];
601 if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
602 ret = -EIO;
603 goto out;
606 *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
608 if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
609 lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
610 ret &= ~BDRV_BLOCK_DATA;
611 if (iscsilun->lbprz) {
612 ret |= BDRV_BLOCK_ZERO;
616 if (ret & BDRV_BLOCK_ZERO) {
617 iscsi_allocationmap_clear(iscsilun, sector_num, *pnum);
618 } else {
619 iscsi_allocationmap_set(iscsilun, sector_num, *pnum);
622 if (*pnum > nb_sectors) {
623 *pnum = nb_sectors;
625 out:
626 if (iTask.task != NULL) {
627 scsi_free_scsi_task(iTask.task);
629 if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) {
630 *file = bs;
632 return ret;
635 static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
636 int64_t sector_num, int nb_sectors,
637 QEMUIOVector *iov)
639 IscsiLun *iscsilun = bs->opaque;
640 struct IscsiTask iTask;
641 uint64_t lba;
642 uint32_t num_sectors;
644 if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
645 return -EINVAL;
648 if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
649 error_report("iSCSI Error: Read of %d sectors exceeds max_xfer_len "
650 "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
651 return -EINVAL;
654 if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES &&
655 !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
656 int64_t ret;
657 int pnum;
658 BlockDriverState *file;
659 ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum, &file);
660 if (ret < 0) {
661 return ret;
663 if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors) {
664 qemu_iovec_memset(iov, 0, 0x00, iov->size);
665 return 0;
669 lba = sector_qemu2lun(sector_num, iscsilun);
670 num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
672 iscsi_co_init_iscsitask(iscsilun, &iTask);
673 retry:
674 if (iscsilun->use_16_for_rw) {
675 iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
676 num_sectors * iscsilun->block_size,
677 iscsilun->block_size, 0, 0, 0, 0, 0,
678 iscsi_co_generic_cb, &iTask);
679 } else {
680 iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
681 num_sectors * iscsilun->block_size,
682 iscsilun->block_size,
683 0, 0, 0, 0, 0,
684 iscsi_co_generic_cb, &iTask);
686 if (iTask.task == NULL) {
687 return -ENOMEM;
689 scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
691 while (!iTask.complete) {
692 iscsi_set_events(iscsilun);
693 qemu_coroutine_yield();
696 if (iTask.task != NULL) {
697 scsi_free_scsi_task(iTask.task);
698 iTask.task = NULL;
701 if (iTask.do_retry) {
702 iTask.complete = 0;
703 goto retry;
706 if (iTask.status != SCSI_STATUS_GOOD) {
707 return iTask.err_code;
710 return 0;
713 static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
715 IscsiLun *iscsilun = bs->opaque;
716 struct IscsiTask iTask;
718 if (!iscsilun->force_next_flush) {
719 return 0;
721 iscsilun->force_next_flush = false;
723 iscsi_co_init_iscsitask(iscsilun, &iTask);
724 retry:
725 if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
726 0, iscsi_co_generic_cb, &iTask) == NULL) {
727 return -ENOMEM;
730 while (!iTask.complete) {
731 iscsi_set_events(iscsilun);
732 qemu_coroutine_yield();
735 if (iTask.task != NULL) {
736 scsi_free_scsi_task(iTask.task);
737 iTask.task = NULL;
740 if (iTask.do_retry) {
741 iTask.complete = 0;
742 goto retry;
745 if (iTask.status != SCSI_STATUS_GOOD) {
746 return iTask.err_code;
749 return 0;
752 #ifdef __linux__
753 static void
754 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
755 void *command_data, void *opaque)
757 IscsiAIOCB *acb = opaque;
759 g_free(acb->buf);
760 acb->buf = NULL;
762 acb->status = 0;
763 if (status < 0) {
764 error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
765 iscsi_get_error(iscsi));
766 acb->status = iscsi_translate_sense(&acb->task->sense);
769 acb->ioh->driver_status = 0;
770 acb->ioh->host_status = 0;
771 acb->ioh->resid = 0;
773 #define SG_ERR_DRIVER_SENSE 0x08
775 if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
776 int ss;
778 acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
780 acb->ioh->sb_len_wr = acb->task->datain.size - 2;
781 ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
782 acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
783 memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
786 iscsi_schedule_bh(acb);
789 static void iscsi_ioctl_bh_completion(void *opaque)
791 IscsiAIOCB *acb = opaque;
793 qemu_bh_delete(acb->bh);
794 acb->common.cb(acb->common.opaque, acb->ret);
795 qemu_aio_unref(acb);
798 static void iscsi_ioctl_handle_emulated(IscsiAIOCB *acb, int req, void *buf)
800 BlockDriverState *bs = acb->common.bs;
801 IscsiLun *iscsilun = bs->opaque;
802 int ret = 0;
804 switch (req) {
805 case SG_GET_VERSION_NUM:
806 *(int *)buf = 30000;
807 break;
808 case SG_GET_SCSI_ID:
809 ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
810 break;
811 default:
812 ret = -EINVAL;
814 assert(!acb->bh);
815 acb->bh = aio_bh_new(bdrv_get_aio_context(bs),
816 iscsi_ioctl_bh_completion, acb);
817 acb->ret = ret;
818 qemu_bh_schedule(acb->bh);
821 static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
822 unsigned long int req, void *buf,
823 BlockCompletionFunc *cb, void *opaque)
825 IscsiLun *iscsilun = bs->opaque;
826 struct iscsi_context *iscsi = iscsilun->iscsi;
827 struct iscsi_data data;
828 IscsiAIOCB *acb;
830 acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
832 acb->iscsilun = iscsilun;
833 acb->bh = NULL;
834 acb->status = -EINPROGRESS;
835 acb->buf = NULL;
836 acb->ioh = buf;
838 if (req != SG_IO) {
839 iscsi_ioctl_handle_emulated(acb, req, buf);
840 return &acb->common;
843 acb->task = malloc(sizeof(struct scsi_task));
844 if (acb->task == NULL) {
845 error_report("iSCSI: Failed to allocate task for scsi command. %s",
846 iscsi_get_error(iscsi));
847 qemu_aio_unref(acb);
848 return NULL;
850 memset(acb->task, 0, sizeof(struct scsi_task));
852 switch (acb->ioh->dxfer_direction) {
853 case SG_DXFER_TO_DEV:
854 acb->task->xfer_dir = SCSI_XFER_WRITE;
855 break;
856 case SG_DXFER_FROM_DEV:
857 acb->task->xfer_dir = SCSI_XFER_READ;
858 break;
859 default:
860 acb->task->xfer_dir = SCSI_XFER_NONE;
861 break;
864 acb->task->cdb_size = acb->ioh->cmd_len;
865 memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
866 acb->task->expxferlen = acb->ioh->dxfer_len;
868 data.size = 0;
869 if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
870 if (acb->ioh->iovec_count == 0) {
871 data.data = acb->ioh->dxferp;
872 data.size = acb->ioh->dxfer_len;
873 } else {
874 scsi_task_set_iov_out(acb->task,
875 (struct scsi_iovec *) acb->ioh->dxferp,
876 acb->ioh->iovec_count);
880 if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
881 iscsi_aio_ioctl_cb,
882 (data.size > 0) ? &data : NULL,
883 acb) != 0) {
884 scsi_free_scsi_task(acb->task);
885 qemu_aio_unref(acb);
886 return NULL;
889 /* tell libiscsi to read straight into the buffer we got from ioctl */
890 if (acb->task->xfer_dir == SCSI_XFER_READ) {
891 if (acb->ioh->iovec_count == 0) {
892 scsi_task_add_data_in_buffer(acb->task,
893 acb->ioh->dxfer_len,
894 acb->ioh->dxferp);
895 } else {
896 scsi_task_set_iov_in(acb->task,
897 (struct scsi_iovec *) acb->ioh->dxferp,
898 acb->ioh->iovec_count);
902 iscsi_set_events(iscsilun);
904 return &acb->common;
907 #endif
909 static int64_t
910 iscsi_getlength(BlockDriverState *bs)
912 IscsiLun *iscsilun = bs->opaque;
913 int64_t len;
915 len = iscsilun->num_blocks;
916 len *= iscsilun->block_size;
918 return len;
921 static int
922 coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
923 int nb_sectors)
925 IscsiLun *iscsilun = bs->opaque;
926 struct IscsiTask iTask;
927 struct unmap_list list;
929 if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
930 return -EINVAL;
933 if (!iscsilun->lbp.lbpu) {
934 /* UNMAP is not supported by the target */
935 return 0;
938 list.lba = sector_qemu2lun(sector_num, iscsilun);
939 list.num = sector_qemu2lun(nb_sectors, iscsilun);
941 iscsi_co_init_iscsitask(iscsilun, &iTask);
942 retry:
943 if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
944 iscsi_co_generic_cb, &iTask) == NULL) {
945 return -ENOMEM;
948 while (!iTask.complete) {
949 iscsi_set_events(iscsilun);
950 qemu_coroutine_yield();
953 if (iTask.task != NULL) {
954 scsi_free_scsi_task(iTask.task);
955 iTask.task = NULL;
958 if (iTask.do_retry) {
959 iTask.complete = 0;
960 goto retry;
963 if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
964 /* the target might fail with a check condition if it
965 is not happy with the alignment of the UNMAP request
966 we silently fail in this case */
967 return 0;
970 if (iTask.status != SCSI_STATUS_GOOD) {
971 return iTask.err_code;
974 iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
976 return 0;
979 static int
980 coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
981 int nb_sectors, BdrvRequestFlags flags)
983 IscsiLun *iscsilun = bs->opaque;
984 struct IscsiTask iTask;
985 uint64_t lba;
986 uint32_t nb_blocks;
987 bool use_16_for_ws = iscsilun->use_16_for_rw;
989 if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
990 return -EINVAL;
993 if (flags & BDRV_REQ_MAY_UNMAP) {
994 if (!use_16_for_ws && !iscsilun->lbp.lbpws10) {
995 /* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */
996 use_16_for_ws = true;
998 if (use_16_for_ws && !iscsilun->lbp.lbpws) {
999 /* WRITESAME16 with UNMAP is not supported by the target,
1000 * fall back and try WRITESAME10/16 without UNMAP */
1001 flags &= ~BDRV_REQ_MAY_UNMAP;
1002 use_16_for_ws = iscsilun->use_16_for_rw;
1006 if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
1007 /* WRITESAME without UNMAP is not supported by the target */
1008 return -ENOTSUP;
1011 lba = sector_qemu2lun(sector_num, iscsilun);
1012 nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
1014 if (iscsilun->zeroblock == NULL) {
1015 iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
1016 if (iscsilun->zeroblock == NULL) {
1017 return -ENOMEM;
1021 iscsi_co_init_iscsitask(iscsilun, &iTask);
1022 iTask.force_next_flush = true;
1023 retry:
1024 if (use_16_for_ws) {
1025 iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
1026 iscsilun->zeroblock, iscsilun->block_size,
1027 nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
1028 0, 0, iscsi_co_generic_cb, &iTask);
1029 } else {
1030 iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
1031 iscsilun->zeroblock, iscsilun->block_size,
1032 nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
1033 0, 0, iscsi_co_generic_cb, &iTask);
1035 if (iTask.task == NULL) {
1036 return -ENOMEM;
1039 while (!iTask.complete) {
1040 iscsi_set_events(iscsilun);
1041 qemu_coroutine_yield();
1044 if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
1045 iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
1046 (iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE ||
1047 iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) {
1048 /* WRITE SAME is not supported by the target */
1049 iscsilun->has_write_same = false;
1050 scsi_free_scsi_task(iTask.task);
1051 return -ENOTSUP;
1054 if (iTask.task != NULL) {
1055 scsi_free_scsi_task(iTask.task);
1056 iTask.task = NULL;
1059 if (iTask.do_retry) {
1060 iTask.complete = 0;
1061 goto retry;
1064 if (iTask.status != SCSI_STATUS_GOOD) {
1065 return iTask.err_code;
1068 if (flags & BDRV_REQ_MAY_UNMAP) {
1069 iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
1070 } else {
1071 iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
1074 return 0;
1077 static void parse_chap(struct iscsi_context *iscsi, const char *target,
1078 Error **errp)
1080 QemuOptsList *list;
1081 QemuOpts *opts;
1082 const char *user = NULL;
1083 const char *password = NULL;
1084 const char *secretid;
1085 char *secret = NULL;
1087 list = qemu_find_opts("iscsi");
1088 if (!list) {
1089 return;
1092 opts = qemu_opts_find(list, target);
1093 if (opts == NULL) {
1094 opts = QTAILQ_FIRST(&list->head);
1095 if (!opts) {
1096 return;
1100 user = qemu_opt_get(opts, "user");
1101 if (!user) {
1102 return;
1105 secretid = qemu_opt_get(opts, "password-secret");
1106 password = qemu_opt_get(opts, "password");
1107 if (secretid && password) {
1108 error_setg(errp, "'password' and 'password-secret' properties are "
1109 "mutually exclusive");
1110 return;
1112 if (secretid) {
1113 secret = qcrypto_secret_lookup_as_utf8(secretid, errp);
1114 if (!secret) {
1115 return;
1117 password = secret;
1118 } else if (!password) {
1119 error_setg(errp, "CHAP username specified but no password was given");
1120 return;
1123 if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
1124 error_setg(errp, "Failed to set initiator username and password");
1127 g_free(secret);
1130 static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
1131 Error **errp)
1133 QemuOptsList *list;
1134 QemuOpts *opts;
1135 const char *digest = NULL;
1137 list = qemu_find_opts("iscsi");
1138 if (!list) {
1139 return;
1142 opts = qemu_opts_find(list, target);
1143 if (opts == NULL) {
1144 opts = QTAILQ_FIRST(&list->head);
1145 if (!opts) {
1146 return;
1150 digest = qemu_opt_get(opts, "header-digest");
1151 if (!digest) {
1152 return;
1155 if (!strcmp(digest, "CRC32C")) {
1156 iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
1157 } else if (!strcmp(digest, "NONE")) {
1158 iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
1159 } else if (!strcmp(digest, "CRC32C-NONE")) {
1160 iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
1161 } else if (!strcmp(digest, "NONE-CRC32C")) {
1162 iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1163 } else {
1164 error_setg(errp, "Invalid header-digest setting : %s", digest);
1168 static char *parse_initiator_name(const char *target)
1170 QemuOptsList *list;
1171 QemuOpts *opts;
1172 const char *name;
1173 char *iscsi_name;
1174 UuidInfo *uuid_info;
1176 list = qemu_find_opts("iscsi");
1177 if (list) {
1178 opts = qemu_opts_find(list, target);
1179 if (!opts) {
1180 opts = QTAILQ_FIRST(&list->head);
1182 if (opts) {
1183 name = qemu_opt_get(opts, "initiator-name");
1184 if (name) {
1185 return g_strdup(name);
1190 uuid_info = qmp_query_uuid(NULL);
1191 if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
1192 name = qemu_get_vm_name();
1193 } else {
1194 name = uuid_info->UUID;
1196 iscsi_name = g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
1197 name ? ":" : "", name ? name : "");
1198 qapi_free_UuidInfo(uuid_info);
1199 return iscsi_name;
1202 static int parse_timeout(const char *target)
1204 QemuOptsList *list;
1205 QemuOpts *opts;
1206 const char *timeout;
1208 list = qemu_find_opts("iscsi");
1209 if (list) {
1210 opts = qemu_opts_find(list, target);
1211 if (!opts) {
1212 opts = QTAILQ_FIRST(&list->head);
1214 if (opts) {
1215 timeout = qemu_opt_get(opts, "timeout");
1216 if (timeout) {
1217 return atoi(timeout);
1222 return 0;
1225 static void iscsi_nop_timed_event(void *opaque)
1227 IscsiLun *iscsilun = opaque;
1229 if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
1230 error_report("iSCSI: NOP timeout. Reconnecting...");
1231 iscsilun->request_timed_out = true;
1232 } else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
1233 error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
1234 return;
1237 timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1238 iscsi_set_events(iscsilun);
1241 static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
1243 struct scsi_task *task = NULL;
1244 struct scsi_readcapacity10 *rc10 = NULL;
1245 struct scsi_readcapacity16 *rc16 = NULL;
1246 int retries = ISCSI_CMD_RETRIES;
1248 do {
1249 if (task != NULL) {
1250 scsi_free_scsi_task(task);
1251 task = NULL;
1254 switch (iscsilun->type) {
1255 case TYPE_DISK:
1256 task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
1257 if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1258 rc16 = scsi_datain_unmarshall(task);
1259 if (rc16 == NULL) {
1260 error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data.");
1261 } else {
1262 iscsilun->block_size = rc16->block_length;
1263 iscsilun->num_blocks = rc16->returned_lba + 1;
1264 iscsilun->lbpme = !!rc16->lbpme;
1265 iscsilun->lbprz = !!rc16->lbprz;
1266 iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
1268 break;
1270 if (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1271 && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
1272 break;
1274 /* Fall through and try READ CAPACITY(10) instead. */
1275 case TYPE_ROM:
1276 task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
1277 if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1278 rc10 = scsi_datain_unmarshall(task);
1279 if (rc10 == NULL) {
1280 error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data.");
1281 } else {
1282 iscsilun->block_size = rc10->block_size;
1283 if (rc10->lba == 0) {
1284 /* blank disk loaded */
1285 iscsilun->num_blocks = 0;
1286 } else {
1287 iscsilun->num_blocks = rc10->lba + 1;
1291 break;
1292 default:
1293 return;
1295 } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1296 && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
1297 && retries-- > 0);
1299 if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1300 error_setg(errp, "iSCSI: failed to send readcapacity10/16 command");
1301 } else if (!iscsilun->block_size ||
1302 iscsilun->block_size % BDRV_SECTOR_SIZE) {
1303 error_setg(errp, "iSCSI: the target returned an invalid "
1304 "block size of %d.", iscsilun->block_size);
1306 if (task) {
1307 scsi_free_scsi_task(task);
1311 /* TODO Convert to fine grained options */
1312 static QemuOptsList runtime_opts = {
1313 .name = "iscsi",
1314 .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
1315 .desc = {
1317 .name = "filename",
1318 .type = QEMU_OPT_STRING,
1319 .help = "URL to the iscsi image",
1321 { /* end of list */ }
1325 static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
1326 int evpd, int pc, void **inq, Error **errp)
1328 int full_size;
1329 struct scsi_task *task = NULL;
1330 task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
1331 if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1332 goto fail;
1334 full_size = scsi_datain_getfullsize(task);
1335 if (full_size > task->datain.size) {
1336 scsi_free_scsi_task(task);
1338 /* we need more data for the full list */
1339 task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
1340 if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1341 goto fail;
1345 *inq = scsi_datain_unmarshall(task);
1346 if (*inq == NULL) {
1347 error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
1348 goto fail_with_err;
1351 return task;
1353 fail:
1354 error_setg(errp, "iSCSI: Inquiry command failed : %s",
1355 iscsi_get_error(iscsi));
1356 fail_with_err:
1357 if (task != NULL) {
1358 scsi_free_scsi_task(task);
1360 return NULL;
1363 static void iscsi_detach_aio_context(BlockDriverState *bs)
1365 IscsiLun *iscsilun = bs->opaque;
1367 aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
1368 false, NULL, NULL, NULL);
1369 iscsilun->events = 0;
1371 if (iscsilun->nop_timer) {
1372 timer_del(iscsilun->nop_timer);
1373 timer_free(iscsilun->nop_timer);
1374 iscsilun->nop_timer = NULL;
1376 if (iscsilun->event_timer) {
1377 timer_del(iscsilun->event_timer);
1378 timer_free(iscsilun->event_timer);
1379 iscsilun->event_timer = NULL;
1383 static void iscsi_attach_aio_context(BlockDriverState *bs,
1384 AioContext *new_context)
1386 IscsiLun *iscsilun = bs->opaque;
1388 iscsilun->aio_context = new_context;
1389 iscsi_set_events(iscsilun);
1391 /* Set up a timer for sending out iSCSI NOPs */
1392 iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
1393 QEMU_CLOCK_REALTIME, SCALE_MS,
1394 iscsi_nop_timed_event, iscsilun);
1395 timer_mod(iscsilun->nop_timer,
1396 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1398 /* Set up a timer for periodic calls to iscsi_set_events and to
1399 * scan for command timeout */
1400 iscsilun->event_timer = aio_timer_new(iscsilun->aio_context,
1401 QEMU_CLOCK_REALTIME, SCALE_MS,
1402 iscsi_timed_check_events, iscsilun);
1403 timer_mod(iscsilun->event_timer,
1404 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
1407 static void iscsi_modesense_sync(IscsiLun *iscsilun)
1409 struct scsi_task *task;
1410 struct scsi_mode_sense *ms = NULL;
1411 iscsilun->write_protected = false;
1412 iscsilun->dpofua = false;
1414 task = iscsi_modesense6_sync(iscsilun->iscsi, iscsilun->lun,
1415 1, SCSI_MODESENSE_PC_CURRENT,
1416 0x3F, 0, 255);
1417 if (task == NULL) {
1418 error_report("iSCSI: Failed to send MODE_SENSE(6) command: %s",
1419 iscsi_get_error(iscsilun->iscsi));
1420 goto out;
1423 if (task->status != SCSI_STATUS_GOOD) {
1424 error_report("iSCSI: Failed MODE_SENSE(6), LUN assumed writable");
1425 goto out;
1427 ms = scsi_datain_unmarshall(task);
1428 if (!ms) {
1429 error_report("iSCSI: Failed to unmarshall MODE_SENSE(6) data: %s",
1430 iscsi_get_error(iscsilun->iscsi));
1431 goto out;
1433 iscsilun->write_protected = ms->device_specific_parameter & 0x80;
1434 iscsilun->dpofua = ms->device_specific_parameter & 0x10;
1436 out:
1437 if (task) {
1438 scsi_free_scsi_task(task);
1443 * We support iscsi url's on the form
1444 * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1446 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
1447 Error **errp)
1449 IscsiLun *iscsilun = bs->opaque;
1450 struct iscsi_context *iscsi = NULL;
1451 struct iscsi_url *iscsi_url = NULL;
1452 struct scsi_task *task = NULL;
1453 struct scsi_inquiry_standard *inq = NULL;
1454 struct scsi_inquiry_supported_pages *inq_vpd;
1455 char *initiator_name = NULL;
1456 QemuOpts *opts;
1457 Error *local_err = NULL;
1458 const char *filename;
1459 int i, ret = 0, timeout = 0;
1461 opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
1462 qemu_opts_absorb_qdict(opts, options, &local_err);
1463 if (local_err) {
1464 error_propagate(errp, local_err);
1465 ret = -EINVAL;
1466 goto out;
1469 filename = qemu_opt_get(opts, "filename");
1471 iscsi_url = iscsi_parse_full_url(iscsi, filename);
1472 if (iscsi_url == NULL) {
1473 error_setg(errp, "Failed to parse URL : %s", filename);
1474 ret = -EINVAL;
1475 goto out;
1478 memset(iscsilun, 0, sizeof(IscsiLun));
1480 initiator_name = parse_initiator_name(iscsi_url->target);
1482 iscsi = iscsi_create_context(initiator_name);
1483 if (iscsi == NULL) {
1484 error_setg(errp, "iSCSI: Failed to create iSCSI context.");
1485 ret = -ENOMEM;
1486 goto out;
1489 if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
1490 error_setg(errp, "iSCSI: Failed to set target name.");
1491 ret = -EINVAL;
1492 goto out;
1495 if (iscsi_url->user[0] != '\0') {
1496 ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
1497 iscsi_url->passwd);
1498 if (ret != 0) {
1499 error_setg(errp, "Failed to set initiator username and password");
1500 ret = -EINVAL;
1501 goto out;
1505 /* check if we got CHAP username/password via the options */
1506 parse_chap(iscsi, iscsi_url->target, &local_err);
1507 if (local_err != NULL) {
1508 error_propagate(errp, local_err);
1509 ret = -EINVAL;
1510 goto out;
1513 if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1514 error_setg(errp, "iSCSI: Failed to set session type to normal.");
1515 ret = -EINVAL;
1516 goto out;
1519 iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1521 /* check if we got HEADER_DIGEST via the options */
1522 parse_header_digest(iscsi, iscsi_url->target, &local_err);
1523 if (local_err != NULL) {
1524 error_propagate(errp, local_err);
1525 ret = -EINVAL;
1526 goto out;
1529 /* timeout handling is broken in libiscsi before 1.15.0 */
1530 timeout = parse_timeout(iscsi_url->target);
1531 #if defined(LIBISCSI_API_VERSION) && LIBISCSI_API_VERSION >= 20150621
1532 iscsi_set_timeout(iscsi, timeout);
1533 #else
1534 if (timeout) {
1535 error_report("iSCSI: ignoring timeout value for libiscsi <1.15.0");
1537 #endif
1539 if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
1540 error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
1541 iscsi_get_error(iscsi));
1542 ret = -EINVAL;
1543 goto out;
1546 iscsilun->iscsi = iscsi;
1547 iscsilun->aio_context = bdrv_get_aio_context(bs);
1548 iscsilun->lun = iscsi_url->lun;
1549 iscsilun->has_write_same = true;
1551 task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
1552 (void **) &inq, errp);
1553 if (task == NULL) {
1554 ret = -EINVAL;
1555 goto out;
1557 iscsilun->type = inq->periperal_device_type;
1558 scsi_free_scsi_task(task);
1559 task = NULL;
1561 iscsi_modesense_sync(iscsilun);
1563 /* Check the write protect flag of the LUN if we want to write */
1564 if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
1565 iscsilun->write_protected) {
1566 error_setg(errp, "Cannot open a write protected LUN as read-write");
1567 ret = -EACCES;
1568 goto out;
1571 iscsi_readcapacity_sync(iscsilun, &local_err);
1572 if (local_err != NULL) {
1573 error_propagate(errp, local_err);
1574 ret = -EINVAL;
1575 goto out;
1577 bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
1578 bs->request_alignment = iscsilun->block_size;
1580 /* We don't have any emulation for devices other than disks and CD-ROMs, so
1581 * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
1582 * will try to read from the device to guess the image format.
1584 if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
1585 bs->sg = 1;
1588 task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1589 SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
1590 (void **) &inq_vpd, errp);
1591 if (task == NULL) {
1592 ret = -EINVAL;
1593 goto out;
1595 for (i = 0; i < inq_vpd->num_pages; i++) {
1596 struct scsi_task *inq_task;
1597 struct scsi_inquiry_logical_block_provisioning *inq_lbp;
1598 struct scsi_inquiry_block_limits *inq_bl;
1599 switch (inq_vpd->pages[i]) {
1600 case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
1601 inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1602 SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
1603 (void **) &inq_lbp, errp);
1604 if (inq_task == NULL) {
1605 ret = -EINVAL;
1606 goto out;
1608 memcpy(&iscsilun->lbp, inq_lbp,
1609 sizeof(struct scsi_inquiry_logical_block_provisioning));
1610 scsi_free_scsi_task(inq_task);
1611 break;
1612 case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
1613 inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1614 SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
1615 (void **) &inq_bl, errp);
1616 if (inq_task == NULL) {
1617 ret = -EINVAL;
1618 goto out;
1620 memcpy(&iscsilun->bl, inq_bl,
1621 sizeof(struct scsi_inquiry_block_limits));
1622 scsi_free_scsi_task(inq_task);
1623 break;
1624 default:
1625 break;
1628 scsi_free_scsi_task(task);
1629 task = NULL;
1631 iscsi_attach_aio_context(bs, iscsilun->aio_context);
1633 /* Guess the internal cluster (page) size of the iscsi target by the means
1634 * of opt_unmap_gran. Transfer the unmap granularity only if it has a
1635 * reasonable size */
1636 if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
1637 iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
1638 iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
1639 iscsilun->block_size) >> BDRV_SECTOR_BITS;
1640 if (iscsilun->lbprz) {
1641 iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
1642 if (iscsilun->allocationmap == NULL) {
1643 ret = -ENOMEM;
1648 out:
1649 qemu_opts_del(opts);
1650 g_free(initiator_name);
1651 if (iscsi_url != NULL) {
1652 iscsi_destroy_url(iscsi_url);
1654 if (task != NULL) {
1655 scsi_free_scsi_task(task);
1658 if (ret) {
1659 if (iscsi != NULL) {
1660 if (iscsi_is_logged_in(iscsi)) {
1661 iscsi_logout_sync(iscsi);
1663 iscsi_destroy_context(iscsi);
1665 memset(iscsilun, 0, sizeof(IscsiLun));
1667 return ret;
1670 static void iscsi_close(BlockDriverState *bs)
1672 IscsiLun *iscsilun = bs->opaque;
1673 struct iscsi_context *iscsi = iscsilun->iscsi;
1675 iscsi_detach_aio_context(bs);
1676 if (iscsi_is_logged_in(iscsi)) {
1677 iscsi_logout_sync(iscsi);
1679 iscsi_destroy_context(iscsi);
1680 g_free(iscsilun->zeroblock);
1681 g_free(iscsilun->allocationmap);
1682 memset(iscsilun, 0, sizeof(IscsiLun));
1685 static int sector_limits_lun2qemu(int64_t sector, IscsiLun *iscsilun)
1687 return MIN(sector_lun2qemu(sector, iscsilun), INT_MAX / 2 + 1);
1690 static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
1692 /* We don't actually refresh here, but just return data queried in
1693 * iscsi_open(): iscsi targets don't change their limits. */
1695 IscsiLun *iscsilun = bs->opaque;
1696 uint32_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
1698 if (iscsilun->bl.max_xfer_len) {
1699 max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
1702 bs->bl.max_transfer_length = sector_limits_lun2qemu(max_xfer_len, iscsilun);
1704 if (iscsilun->lbp.lbpu) {
1705 if (iscsilun->bl.max_unmap < 0xffffffff) {
1706 bs->bl.max_discard =
1707 sector_limits_lun2qemu(iscsilun->bl.max_unmap, iscsilun);
1709 bs->bl.discard_alignment =
1710 sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
1713 if (iscsilun->bl.max_ws_len < 0xffffffff) {
1714 bs->bl.max_write_zeroes =
1715 sector_limits_lun2qemu(iscsilun->bl.max_ws_len, iscsilun);
1717 if (iscsilun->lbp.lbpws) {
1718 bs->bl.write_zeroes_alignment =
1719 sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
1721 bs->bl.opt_transfer_length =
1722 sector_limits_lun2qemu(iscsilun->bl.opt_xfer_len, iscsilun);
1725 /* Note that this will not re-establish a connection with an iSCSI target - it
1726 * is effectively a NOP. */
1727 static int iscsi_reopen_prepare(BDRVReopenState *state,
1728 BlockReopenQueue *queue, Error **errp)
1730 IscsiLun *iscsilun = state->bs->opaque;
1732 if (state->flags & BDRV_O_RDWR && iscsilun->write_protected) {
1733 error_setg(errp, "Cannot open a write protected LUN as read-write");
1734 return -EACCES;
1736 return 0;
1739 static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
1741 IscsiLun *iscsilun = bs->opaque;
1742 Error *local_err = NULL;
1744 if (iscsilun->type != TYPE_DISK) {
1745 return -ENOTSUP;
1748 iscsi_readcapacity_sync(iscsilun, &local_err);
1749 if (local_err != NULL) {
1750 error_free(local_err);
1751 return -EIO;
1754 if (offset > iscsi_getlength(bs)) {
1755 return -EINVAL;
1758 if (iscsilun->allocationmap != NULL) {
1759 g_free(iscsilun->allocationmap);
1760 iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
1763 return 0;
1766 static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
1768 int ret = 0;
1769 int64_t total_size = 0;
1770 BlockDriverState *bs;
1771 IscsiLun *iscsilun = NULL;
1772 QDict *bs_options;
1774 bs = bdrv_new();
1776 /* Read out options */
1777 total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
1778 BDRV_SECTOR_SIZE);
1779 bs->opaque = g_new0(struct IscsiLun, 1);
1780 iscsilun = bs->opaque;
1782 bs_options = qdict_new();
1783 qdict_put(bs_options, "filename", qstring_from_str(filename));
1784 ret = iscsi_open(bs, bs_options, 0, NULL);
1785 QDECREF(bs_options);
1787 if (ret != 0) {
1788 goto out;
1790 iscsi_detach_aio_context(bs);
1791 if (iscsilun->type != TYPE_DISK) {
1792 ret = -ENODEV;
1793 goto out;
1795 if (bs->total_sectors < total_size) {
1796 ret = -ENOSPC;
1797 goto out;
1800 ret = 0;
1801 out:
1802 if (iscsilun->iscsi != NULL) {
1803 iscsi_destroy_context(iscsilun->iscsi);
1805 g_free(bs->opaque);
1806 bs->opaque = NULL;
1807 bdrv_unref(bs);
1808 return ret;
1811 static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1813 IscsiLun *iscsilun = bs->opaque;
1814 bdi->unallocated_blocks_are_zero = iscsilun->lbprz;
1815 bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
1816 bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
1817 return 0;
1820 static QemuOptsList iscsi_create_opts = {
1821 .name = "iscsi-create-opts",
1822 .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
1823 .desc = {
1825 .name = BLOCK_OPT_SIZE,
1826 .type = QEMU_OPT_SIZE,
1827 .help = "Virtual disk size"
1829 { /* end of list */ }
1833 static BlockDriver bdrv_iscsi = {
1834 .format_name = "iscsi",
1835 .protocol_name = "iscsi",
1837 .instance_size = sizeof(IscsiLun),
1838 .bdrv_needs_filename = true,
1839 .bdrv_file_open = iscsi_open,
1840 .bdrv_close = iscsi_close,
1841 .bdrv_create = iscsi_create,
1842 .create_opts = &iscsi_create_opts,
1843 .bdrv_reopen_prepare = iscsi_reopen_prepare,
1845 .bdrv_getlength = iscsi_getlength,
1846 .bdrv_get_info = iscsi_get_info,
1847 .bdrv_truncate = iscsi_truncate,
1848 .bdrv_refresh_limits = iscsi_refresh_limits,
1850 .bdrv_co_get_block_status = iscsi_co_get_block_status,
1851 .bdrv_co_discard = iscsi_co_discard,
1852 .bdrv_co_write_zeroes = iscsi_co_write_zeroes,
1853 .bdrv_co_readv = iscsi_co_readv,
1854 .bdrv_co_writev = iscsi_co_writev,
1855 .bdrv_co_flush_to_disk = iscsi_co_flush,
1857 #ifdef __linux__
1858 .bdrv_aio_ioctl = iscsi_aio_ioctl,
1859 #endif
1861 .bdrv_detach_aio_context = iscsi_detach_aio_context,
1862 .bdrv_attach_aio_context = iscsi_attach_aio_context,
1865 static QemuOptsList qemu_iscsi_opts = {
1866 .name = "iscsi",
1867 .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
1868 .desc = {
1870 .name = "user",
1871 .type = QEMU_OPT_STRING,
1872 .help = "username for CHAP authentication to target",
1874 .name = "password",
1875 .type = QEMU_OPT_STRING,
1876 .help = "password for CHAP authentication to target",
1878 .name = "password-secret",
1879 .type = QEMU_OPT_STRING,
1880 .help = "ID of the secret providing password for CHAP "
1881 "authentication to target",
1883 .name = "header-digest",
1884 .type = QEMU_OPT_STRING,
1885 .help = "HeaderDigest setting. "
1886 "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
1888 .name = "initiator-name",
1889 .type = QEMU_OPT_STRING,
1890 .help = "Initiator iqn name to use when connecting",
1892 .name = "timeout",
1893 .type = QEMU_OPT_NUMBER,
1894 .help = "Request timeout in seconds (default 0 = no timeout)",
1896 { /* end of list */ }
1900 static void iscsi_block_init(void)
1902 bdrv_register(&bdrv_iscsi);
1903 qemu_add_opts(&qemu_iscsi_opts);
1906 block_init(iscsi_block_init);