1 /* BlockDriver implementation for "raw" format driver
3 * Copyright (C) 2010-2016 Red Hat, Inc.
4 * Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com>
5 * Copyright (C) 2009, Anthony Liguori <aliguori@us.ibm.com>
8 * Laszlo Ersek <lersek@redhat.com>
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to
12 * deal in the Software without restriction, including without limitation the
13 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
14 * sell copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29 #include "qemu/osdep.h"
30 #include "block/block-io.h"
31 #include "block/block_int.h"
32 #include "qapi/error.h"
33 #include "qemu/module.h"
34 #include "qemu/option.h"
35 #include "qemu/memalign.h"
37 typedef struct BDRVRawState
{
43 static const char *const mutable_opts
[] = { "offset", "size", NULL
};
45 static QemuOptsList raw_runtime_opts
= {
47 .head
= QTAILQ_HEAD_INITIALIZER(raw_runtime_opts
.head
),
51 .type
= QEMU_OPT_SIZE
,
52 .help
= "offset in the disk where the image starts",
56 .type
= QEMU_OPT_SIZE
,
57 .help
= "virtual disk size",
63 static QemuOptsList raw_create_opts
= {
64 .name
= "raw-create-opts",
65 .head
= QTAILQ_HEAD_INITIALIZER(raw_create_opts
.head
),
68 .name
= BLOCK_OPT_SIZE
,
69 .type
= QEMU_OPT_SIZE
,
70 .help
= "Virtual disk size"
76 static int raw_read_options(QDict
*options
, uint64_t *offset
, bool *has_size
,
77 uint64_t *size
, Error
**errp
)
79 QemuOpts
*opts
= NULL
;
82 opts
= qemu_opts_create(&raw_runtime_opts
, NULL
, 0, &error_abort
);
83 if (!qemu_opts_absorb_qdict(opts
, options
, errp
)) {
88 *offset
= qemu_opt_get_size(opts
, "offset", 0);
89 *has_size
= qemu_opt_find(opts
, "size");
90 *size
= qemu_opt_get_size(opts
, "size", 0);
98 static int GRAPH_RDLOCK
99 raw_apply_options(BlockDriverState
*bs
, BDRVRawState
*s
, uint64_t offset
,
100 bool has_size
, uint64_t size
, Error
**errp
)
102 int64_t real_size
= 0;
104 real_size
= bdrv_getlength(bs
->file
->bs
);
106 error_setg_errno(errp
, -real_size
, "Could not get image size");
110 /* Check size and offset */
111 if (offset
> real_size
) {
112 error_setg(errp
, "Offset (%" PRIu64
") cannot be greater than "
113 "size of the containing file (%" PRId64
")",
114 s
->offset
, real_size
);
118 if (has_size
&& (real_size
- offset
) < size
) {
119 error_setg(errp
, "The sum of offset (%" PRIu64
") and size "
120 "(%" PRIu64
") has to be smaller or equal to the "
121 " actual size of the containing file (%" PRId64
")",
122 s
->offset
, s
->size
, real_size
);
126 /* Make sure size is multiple of BDRV_SECTOR_SIZE to prevent rounding
127 * up and leaking out of the specified area. */
128 if (has_size
&& !QEMU_IS_ALIGNED(size
, BDRV_SECTOR_SIZE
)) {
129 error_setg(errp
, "Specified size is not multiple of %llu",
135 s
->has_size
= has_size
;
136 s
->size
= has_size
? size
: real_size
- offset
;
141 static int raw_reopen_prepare(BDRVReopenState
*reopen_state
,
142 BlockReopenQueue
*queue
, Error
**errp
)
145 uint64_t offset
, size
;
149 GRAPH_RDLOCK_GUARD_MAINLOOP();
151 assert(reopen_state
!= NULL
);
152 assert(reopen_state
->bs
!= NULL
);
154 reopen_state
->opaque
= g_new0(BDRVRawState
, 1);
156 ret
= raw_read_options(reopen_state
->options
, &offset
, &has_size
, &size
,
162 ret
= raw_apply_options(reopen_state
->bs
, reopen_state
->opaque
,
163 offset
, has_size
, size
, errp
);
171 static void raw_reopen_commit(BDRVReopenState
*state
)
173 BDRVRawState
*new_s
= state
->opaque
;
174 BDRVRawState
*s
= state
->bs
->opaque
;
176 memcpy(s
, new_s
, sizeof(BDRVRawState
));
178 g_free(state
->opaque
);
179 state
->opaque
= NULL
;
182 static void raw_reopen_abort(BDRVReopenState
*state
)
184 g_free(state
->opaque
);
185 state
->opaque
= NULL
;
188 /* Check and adjust the offset, against 'offset' and 'size' options. */
189 static inline int raw_adjust_offset(BlockDriverState
*bs
, int64_t *offset
,
190 int64_t bytes
, bool is_write
)
192 BDRVRawState
*s
= bs
->opaque
;
194 if (s
->has_size
&& (*offset
> s
->size
|| bytes
> (s
->size
- *offset
))) {
195 /* There's not enough space for the write, or the read request is
196 * out-of-range. Don't read/write anything to prevent leaking out of
197 * the size specified in options. */
198 return is_write
? -ENOSPC
: -EINVAL
;
201 if (*offset
> INT64_MAX
- s
->offset
) {
204 *offset
+= s
->offset
;
209 static int coroutine_fn GRAPH_RDLOCK
210 raw_co_preadv(BlockDriverState
*bs
, int64_t offset
, int64_t bytes
,
211 QEMUIOVector
*qiov
, BdrvRequestFlags flags
)
215 ret
= raw_adjust_offset(bs
, &offset
, bytes
, false);
220 BLKDBG_CO_EVENT(bs
->file
, BLKDBG_READ_AIO
);
221 return bdrv_co_preadv(bs
->file
, offset
, bytes
, qiov
, flags
);
224 static int coroutine_fn GRAPH_RDLOCK
225 raw_co_pwritev(BlockDriverState
*bs
, int64_t offset
, int64_t bytes
,
226 QEMUIOVector
*qiov
, BdrvRequestFlags flags
)
230 QEMUIOVector local_qiov
;
233 if (bs
->probed
&& offset
< BLOCK_PROBE_BUF_SIZE
&& bytes
) {
234 /* Handling partial writes would be a pain - so we just
235 * require that guests have 512-byte request alignment if
236 * probing occurred */
237 QEMU_BUILD_BUG_ON(BLOCK_PROBE_BUF_SIZE
!= 512);
238 QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE
!= 512);
239 assert(offset
== 0 && bytes
>= BLOCK_PROBE_BUF_SIZE
);
241 buf
= qemu_try_blockalign(bs
->file
->bs
, 512);
247 ret
= qemu_iovec_to_buf(qiov
, 0, buf
, 512);
253 drv
= bdrv_probe_all(buf
, 512, NULL
);
254 if (drv
!= bs
->drv
) {
259 /* Use the checked buffer, a malicious guest might be overwriting its
260 * original buffer in the background. */
261 qemu_iovec_init(&local_qiov
, qiov
->niov
+ 1);
262 qemu_iovec_add(&local_qiov
, buf
, 512);
263 qemu_iovec_concat(&local_qiov
, qiov
, 512, qiov
->size
- 512);
266 flags
&= ~BDRV_REQ_REGISTERED_BUF
;
269 ret
= raw_adjust_offset(bs
, &offset
, bytes
, true);
274 BLKDBG_CO_EVENT(bs
->file
, BLKDBG_WRITE_AIO
);
275 ret
= bdrv_co_pwritev(bs
->file
, offset
, bytes
, qiov
, flags
);
278 if (qiov
== &local_qiov
) {
279 qemu_iovec_destroy(&local_qiov
);
285 static int coroutine_fn GRAPH_RDLOCK
286 raw_co_block_status(BlockDriverState
*bs
, bool want_zero
, int64_t offset
,
287 int64_t bytes
, int64_t *pnum
, int64_t *map
,
288 BlockDriverState
**file
)
290 BDRVRawState
*s
= bs
->opaque
;
292 *file
= bs
->file
->bs
;
293 *map
= offset
+ s
->offset
;
294 return BDRV_BLOCK_RAW
| BDRV_BLOCK_OFFSET_VALID
;
297 static int coroutine_fn GRAPH_RDLOCK
298 raw_co_pwrite_zeroes(BlockDriverState
*bs
, int64_t offset
, int64_t bytes
,
299 BdrvRequestFlags flags
)
303 ret
= raw_adjust_offset(bs
, &offset
, bytes
, true);
307 return bdrv_co_pwrite_zeroes(bs
->file
, offset
, bytes
, flags
);
310 static int coroutine_fn GRAPH_RDLOCK
311 raw_co_pdiscard(BlockDriverState
*bs
, int64_t offset
, int64_t bytes
)
315 ret
= raw_adjust_offset(bs
, &offset
, bytes
, true);
319 return bdrv_co_pdiscard(bs
->file
, offset
, bytes
);
322 static int coroutine_fn GRAPH_RDLOCK
323 raw_co_zone_report(BlockDriverState
*bs
, int64_t offset
,
324 unsigned int *nr_zones
,
325 BlockZoneDescriptor
*zones
)
327 return bdrv_co_zone_report(bs
->file
->bs
, offset
, nr_zones
, zones
);
330 static int coroutine_fn GRAPH_RDLOCK
331 raw_co_zone_mgmt(BlockDriverState
*bs
, BlockZoneOp op
,
332 int64_t offset
, int64_t len
)
334 return bdrv_co_zone_mgmt(bs
->file
->bs
, op
, offset
, len
);
337 static int coroutine_fn GRAPH_RDLOCK
338 raw_co_zone_append(BlockDriverState
*bs
,int64_t *offset
, QEMUIOVector
*qiov
,
339 BdrvRequestFlags flags
)
341 return bdrv_co_zone_append(bs
->file
->bs
, offset
, qiov
, flags
);
344 static int64_t coroutine_fn GRAPH_RDLOCK
345 raw_co_getlength(BlockDriverState
*bs
)
348 BDRVRawState
*s
= bs
->opaque
;
350 /* Update size. It should not change unless the file was externally
352 len
= bdrv_co_getlength(bs
->file
->bs
);
357 if (len
< s
->offset
) {
361 /* Try to honour the size */
362 s
->size
= MIN(s
->size
, len
- s
->offset
);
364 s
->size
= len
- s
->offset
;
371 static BlockMeasureInfo
*raw_measure(QemuOpts
*opts
, BlockDriverState
*in_bs
,
374 BlockMeasureInfo
*info
;
378 required
= bdrv_getlength(in_bs
);
380 error_setg_errno(errp
, -required
, "Unable to get image size");
384 required
= ROUND_UP(qemu_opt_get_size_del(opts
, BLOCK_OPT_SIZE
, 0),
388 info
= g_new0(BlockMeasureInfo
, 1);
389 info
->required
= required
;
391 /* Unallocated sectors count towards the file size in raw images */
392 info
->fully_allocated
= info
->required
;
396 static int coroutine_fn GRAPH_RDLOCK
397 raw_co_get_info(BlockDriverState
*bs
, BlockDriverInfo
*bdi
)
399 return bdrv_co_get_info(bs
->file
->bs
, bdi
);
402 static void GRAPH_RDLOCK
raw_refresh_limits(BlockDriverState
*bs
, Error
**errp
)
404 bs
->bl
.has_variable_length
= bs
->file
->bs
->bl
.has_variable_length
;
407 /* To make it easier to protect the first sector, any probed
408 * image is restricted to read-modify-write on sub-sector
410 bs
->bl
.request_alignment
= BDRV_SECTOR_SIZE
;
414 static int coroutine_fn GRAPH_RDLOCK
415 raw_co_truncate(BlockDriverState
*bs
, int64_t offset
, bool exact
,
416 PreallocMode prealloc
, BdrvRequestFlags flags
, Error
**errp
)
418 BDRVRawState
*s
= bs
->opaque
;
421 error_setg(errp
, "Cannot resize fixed-size raw disks");
425 if (INT64_MAX
- offset
< s
->offset
) {
426 error_setg(errp
, "Disk size too large for the chosen offset");
432 return bdrv_co_truncate(bs
->file
, offset
, exact
, prealloc
, flags
, errp
);
435 static void coroutine_fn GRAPH_RDLOCK
436 raw_co_eject(BlockDriverState
*bs
, bool eject_flag
)
438 bdrv_co_eject(bs
->file
->bs
, eject_flag
);
441 static void coroutine_fn GRAPH_RDLOCK
442 raw_co_lock_medium(BlockDriverState
*bs
, bool locked
)
444 bdrv_co_lock_medium(bs
->file
->bs
, locked
);
447 static int coroutine_fn GRAPH_RDLOCK
448 raw_co_ioctl(BlockDriverState
*bs
, unsigned long int req
, void *buf
)
450 BDRVRawState
*s
= bs
->opaque
;
451 if (s
->offset
|| s
->has_size
) {
454 return bdrv_co_ioctl(bs
->file
->bs
, req
, buf
);
457 static int GRAPH_RDLOCK
raw_has_zero_init(BlockDriverState
*bs
)
459 return bdrv_has_zero_init(bs
->file
->bs
);
462 static int coroutine_fn GRAPH_UNLOCKED
463 raw_co_create_opts(BlockDriver
*drv
, const char *filename
,
464 QemuOpts
*opts
, Error
**errp
)
466 return bdrv_co_create_file(filename
, opts
, errp
);
469 static int raw_open(BlockDriverState
*bs
, QDict
*options
, int flags
,
472 BDRVRawState
*s
= bs
->opaque
;
474 uint64_t offset
, size
;
475 BdrvChildRole file_role
;
480 ret
= raw_read_options(options
, &offset
, &has_size
, &size
, errp
);
486 * Without offset and a size limit, this driver behaves very much
487 * like a filter. With any such limit, it does not.
489 if (offset
|| has_size
) {
490 file_role
= BDRV_CHILD_DATA
| BDRV_CHILD_PRIMARY
;
492 file_role
= BDRV_CHILD_FILTERED
| BDRV_CHILD_PRIMARY
;
495 bdrv_open_child(NULL
, options
, "file", bs
, &child_of_bds
,
496 file_role
, false, errp
);
498 GRAPH_RDLOCK_GUARD_MAINLOOP();
503 bs
->sg
= bdrv_is_sg(bs
->file
->bs
);
504 bs
->supported_write_flags
= BDRV_REQ_WRITE_UNCHANGED
|
505 (BDRV_REQ_FUA
& bs
->file
->bs
->supported_write_flags
);
506 bs
->supported_zero_flags
= BDRV_REQ_WRITE_UNCHANGED
|
507 ((BDRV_REQ_FUA
| BDRV_REQ_MAY_UNMAP
| BDRV_REQ_NO_FALLBACK
) &
508 bs
->file
->bs
->supported_zero_flags
);
509 bs
->supported_truncate_flags
= bs
->file
->bs
->supported_truncate_flags
&
512 if (bs
->probed
&& !bdrv_is_read_only(bs
)) {
513 bdrv_refresh_filename(bs
->file
->bs
);
515 "WARNING: Image format was not specified for '%s' and probing "
517 " Automatically detecting the format is dangerous for "
518 "raw images, write operations on block 0 will be restricted.\n"
519 " Specify the 'raw' format explicitly to remove the "
521 bs
->file
->bs
->filename
);
524 ret
= raw_apply_options(bs
, s
, offset
, has_size
, size
, errp
);
529 if (bdrv_is_sg(bs
) && (s
->offset
|| s
->has_size
)) {
530 error_setg(errp
, "Cannot use offset/size with SCSI generic devices");
537 static int raw_probe(const uint8_t *buf
, int buf_size
, const char *filename
)
539 /* smallest possible positive score so that raw is used if and only if no
540 * other block driver works
545 static int GRAPH_RDLOCK
546 raw_probe_blocksizes(BlockDriverState
*bs
, BlockSizes
*bsz
)
548 BDRVRawState
*s
= bs
->opaque
;
551 ret
= bdrv_probe_blocksizes(bs
->file
->bs
, bsz
);
556 if (!QEMU_IS_ALIGNED(s
->offset
, MAX(bsz
->log
, bsz
->phys
))) {
563 static int GRAPH_RDLOCK
564 raw_probe_geometry(BlockDriverState
*bs
, HDGeometry
*geo
)
566 BDRVRawState
*s
= bs
->opaque
;
567 if (s
->offset
|| s
->has_size
) {
570 return bdrv_probe_geometry(bs
->file
->bs
, geo
);
573 static int coroutine_fn GRAPH_RDLOCK
574 raw_co_copy_range_from(BlockDriverState
*bs
,
575 BdrvChild
*src
, int64_t src_offset
,
576 BdrvChild
*dst
, int64_t dst_offset
,
577 int64_t bytes
, BdrvRequestFlags read_flags
,
578 BdrvRequestFlags write_flags
)
582 ret
= raw_adjust_offset(bs
, &src_offset
, bytes
, false);
586 return bdrv_co_copy_range_from(bs
->file
, src_offset
, dst
, dst_offset
,
587 bytes
, read_flags
, write_flags
);
590 static int coroutine_fn GRAPH_RDLOCK
591 raw_co_copy_range_to(BlockDriverState
*bs
,
592 BdrvChild
*src
, int64_t src_offset
,
593 BdrvChild
*dst
, int64_t dst_offset
,
594 int64_t bytes
, BdrvRequestFlags read_flags
,
595 BdrvRequestFlags write_flags
)
599 ret
= raw_adjust_offset(bs
, &dst_offset
, bytes
, true);
603 return bdrv_co_copy_range_to(src
, src_offset
, bs
->file
, dst_offset
, bytes
,
604 read_flags
, write_flags
);
607 static const char *const raw_strong_runtime_opts
[] = {
614 static void GRAPH_RDLOCK
raw_cancel_in_flight(BlockDriverState
*bs
)
616 bdrv_cancel_in_flight(bs
->file
->bs
);
619 static void raw_child_perm(BlockDriverState
*bs
, BdrvChild
*c
,
621 BlockReopenQueue
*reopen_queue
,
622 uint64_t parent_perm
, uint64_t parent_shared
,
623 uint64_t *nperm
, uint64_t *nshared
)
625 bdrv_default_perms(bs
, c
, role
, reopen_queue
, parent_perm
,
626 parent_shared
, nperm
, nshared
);
629 * bdrv_default_perms() may add WRITE and/or RESIZE (see comment in
630 * bdrv_default_perms_for_storage() for an explanation) but we only need
631 * them if they are in parent_perm. Drop WRITE and RESIZE whenever possible
632 * to avoid permission conflicts.
634 *nperm
&= ~(BLK_PERM_WRITE
| BLK_PERM_RESIZE
);
635 *nperm
|= parent_perm
& (BLK_PERM_WRITE
| BLK_PERM_RESIZE
);
638 BlockDriver bdrv_raw
= {
639 .format_name
= "raw",
640 .instance_size
= sizeof(BDRVRawState
),
641 .supports_zoned_children
= true,
642 .bdrv_probe
= &raw_probe
,
643 .bdrv_reopen_prepare
= &raw_reopen_prepare
,
644 .bdrv_reopen_commit
= &raw_reopen_commit
,
645 .bdrv_reopen_abort
= &raw_reopen_abort
,
646 .bdrv_open
= &raw_open
,
647 .bdrv_child_perm
= raw_child_perm
,
648 .bdrv_co_create_opts
= &raw_co_create_opts
,
649 .bdrv_co_preadv
= &raw_co_preadv
,
650 .bdrv_co_pwritev
= &raw_co_pwritev
,
651 .bdrv_co_pwrite_zeroes
= &raw_co_pwrite_zeroes
,
652 .bdrv_co_pdiscard
= &raw_co_pdiscard
,
653 .bdrv_co_zone_report
= &raw_co_zone_report
,
654 .bdrv_co_zone_mgmt
= &raw_co_zone_mgmt
,
655 .bdrv_co_zone_append
= &raw_co_zone_append
,
656 .bdrv_co_block_status
= &raw_co_block_status
,
657 .bdrv_co_copy_range_from
= &raw_co_copy_range_from
,
658 .bdrv_co_copy_range_to
= &raw_co_copy_range_to
,
659 .bdrv_co_truncate
= &raw_co_truncate
,
660 .bdrv_co_getlength
= &raw_co_getlength
,
662 .bdrv_measure
= &raw_measure
,
663 .bdrv_co_get_info
= &raw_co_get_info
,
664 .bdrv_refresh_limits
= &raw_refresh_limits
,
665 .bdrv_probe_blocksizes
= &raw_probe_blocksizes
,
666 .bdrv_probe_geometry
= &raw_probe_geometry
,
667 .bdrv_co_eject
= &raw_co_eject
,
668 .bdrv_co_lock_medium
= &raw_co_lock_medium
,
669 .bdrv_co_ioctl
= &raw_co_ioctl
,
670 .create_opts
= &raw_create_opts
,
671 .bdrv_has_zero_init
= &raw_has_zero_init
,
672 .strong_runtime_opts
= raw_strong_runtime_opts
,
673 .mutable_opts
= mutable_opts
,
674 .bdrv_cancel_in_flight
= raw_cancel_in_flight
,
677 static void bdrv_raw_init(void)
679 bdrv_register(&bdrv_raw
);
682 block_init(bdrv_raw_init
);