2 * L2/refcount table cache for the QCOW2 format
4 * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "block/block_int.h"
26 #include "qemu-common.h"
30 typedef struct Qcow2CachedTable
{
39 Qcow2CachedTable
* entries
;
40 struct Qcow2Cache
* depends
;
42 bool depends_on_flush
;
45 Qcow2Cache
*qcow2_cache_create(BlockDriverState
*bs
, int num_tables
)
47 BDRVQcowState
*s
= bs
->opaque
;
51 c
= g_new0(Qcow2Cache
, 1);
53 c
->entries
= g_try_new0(Qcow2CachedTable
, num_tables
);
58 for (i
= 0; i
< c
->size
; i
++) {
59 c
->entries
[i
].table
= qemu_try_blockalign(bs
->file
, s
->cluster_size
);
60 if (c
->entries
[i
].table
== NULL
) {
69 for (i
= 0; i
< c
->size
; i
++) {
70 qemu_vfree(c
->entries
[i
].table
);
78 int qcow2_cache_destroy(BlockDriverState
* bs
, Qcow2Cache
*c
)
82 for (i
= 0; i
< c
->size
; i
++) {
83 assert(c
->entries
[i
].ref
== 0);
84 qemu_vfree(c
->entries
[i
].table
);
93 static int qcow2_cache_flush_dependency(BlockDriverState
*bs
, Qcow2Cache
*c
)
97 ret
= qcow2_cache_flush(bs
, c
->depends
);
103 c
->depends_on_flush
= false;
108 static int qcow2_cache_entry_flush(BlockDriverState
*bs
, Qcow2Cache
*c
, int i
)
110 BDRVQcowState
*s
= bs
->opaque
;
113 if (!c
->entries
[i
].dirty
|| !c
->entries
[i
].offset
) {
117 trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
118 c
== s
->l2_table_cache
, i
);
121 ret
= qcow2_cache_flush_dependency(bs
, c
);
122 } else if (c
->depends_on_flush
) {
123 ret
= bdrv_flush(bs
->file
);
125 c
->depends_on_flush
= false;
133 if (c
== s
->refcount_block_cache
) {
134 ret
= qcow2_pre_write_overlap_check(bs
, QCOW2_OL_REFCOUNT_BLOCK
,
135 c
->entries
[i
].offset
, s
->cluster_size
);
136 } else if (c
== s
->l2_table_cache
) {
137 ret
= qcow2_pre_write_overlap_check(bs
, QCOW2_OL_ACTIVE_L2
,
138 c
->entries
[i
].offset
, s
->cluster_size
);
140 ret
= qcow2_pre_write_overlap_check(bs
, 0,
141 c
->entries
[i
].offset
, s
->cluster_size
);
148 if (c
== s
->refcount_block_cache
) {
149 BLKDBG_EVENT(bs
->file
, BLKDBG_REFBLOCK_UPDATE_PART
);
150 } else if (c
== s
->l2_table_cache
) {
151 BLKDBG_EVENT(bs
->file
, BLKDBG_L2_UPDATE
);
154 ret
= bdrv_pwrite(bs
->file
, c
->entries
[i
].offset
, c
->entries
[i
].table
,
160 c
->entries
[i
].dirty
= false;
165 int qcow2_cache_flush(BlockDriverState
*bs
, Qcow2Cache
*c
)
167 BDRVQcowState
*s
= bs
->opaque
;
172 trace_qcow2_cache_flush(qemu_coroutine_self(), c
== s
->l2_table_cache
);
174 for (i
= 0; i
< c
->size
; i
++) {
175 ret
= qcow2_cache_entry_flush(bs
, c
, i
);
176 if (ret
< 0 && result
!= -ENOSPC
) {
182 ret
= bdrv_flush(bs
->file
);
191 int qcow2_cache_set_dependency(BlockDriverState
*bs
, Qcow2Cache
*c
,
192 Qcow2Cache
*dependency
)
196 if (dependency
->depends
) {
197 ret
= qcow2_cache_flush_dependency(bs
, dependency
);
203 if (c
->depends
&& (c
->depends
!= dependency
)) {
204 ret
= qcow2_cache_flush_dependency(bs
, c
);
210 c
->depends
= dependency
;
214 void qcow2_cache_depends_on_flush(Qcow2Cache
*c
)
216 c
->depends_on_flush
= true;
219 int qcow2_cache_empty(BlockDriverState
*bs
, Qcow2Cache
*c
)
223 ret
= qcow2_cache_flush(bs
, c
);
228 for (i
= 0; i
< c
->size
; i
++) {
229 assert(c
->entries
[i
].ref
== 0);
230 c
->entries
[i
].offset
= 0;
231 c
->entries
[i
].cache_hits
= 0;
237 static int qcow2_cache_find_entry_to_replace(Qcow2Cache
*c
)
240 int min_count
= INT_MAX
;
244 for (i
= 0; i
< c
->size
; i
++) {
245 if (c
->entries
[i
].ref
) {
249 if (c
->entries
[i
].cache_hits
< min_count
) {
251 min_count
= c
->entries
[i
].cache_hits
;
254 /* Give newer hits priority */
255 /* TODO Check how to optimize the replacement strategy */
256 c
->entries
[i
].cache_hits
/= 2;
259 if (min_index
== -1) {
260 /* This can't happen in current synchronous code, but leave the check
261 * here as a reminder for whoever starts using AIO with the cache */
267 static int qcow2_cache_do_get(BlockDriverState
*bs
, Qcow2Cache
*c
,
268 uint64_t offset
, void **table
, bool read_from_disk
)
270 BDRVQcowState
*s
= bs
->opaque
;
274 trace_qcow2_cache_get(qemu_coroutine_self(), c
== s
->l2_table_cache
,
275 offset
, read_from_disk
);
277 /* Check if the table is already cached */
278 for (i
= 0; i
< c
->size
; i
++) {
279 if (c
->entries
[i
].offset
== offset
) {
284 /* If not, write a table back and replace it */
285 i
= qcow2_cache_find_entry_to_replace(c
);
286 trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
287 c
== s
->l2_table_cache
, i
);
292 ret
= qcow2_cache_entry_flush(bs
, c
, i
);
297 trace_qcow2_cache_get_read(qemu_coroutine_self(),
298 c
== s
->l2_table_cache
, i
);
299 c
->entries
[i
].offset
= 0;
300 if (read_from_disk
) {
301 if (c
== s
->l2_table_cache
) {
302 BLKDBG_EVENT(bs
->file
, BLKDBG_L2_LOAD
);
305 ret
= bdrv_pread(bs
->file
, offset
, c
->entries
[i
].table
, s
->cluster_size
);
311 /* Give the table some hits for the start so that it won't be replaced
312 * immediately. The number 32 is completely arbitrary. */
313 c
->entries
[i
].cache_hits
= 32;
314 c
->entries
[i
].offset
= offset
;
316 /* And return the right table */
318 c
->entries
[i
].cache_hits
++;
320 *table
= c
->entries
[i
].table
;
322 trace_qcow2_cache_get_done(qemu_coroutine_self(),
323 c
== s
->l2_table_cache
, i
);
328 int qcow2_cache_get(BlockDriverState
*bs
, Qcow2Cache
*c
, uint64_t offset
,
331 return qcow2_cache_do_get(bs
, c
, offset
, table
, true);
334 int qcow2_cache_get_empty(BlockDriverState
*bs
, Qcow2Cache
*c
, uint64_t offset
,
337 return qcow2_cache_do_get(bs
, c
, offset
, table
, false);
340 int qcow2_cache_put(BlockDriverState
*bs
, Qcow2Cache
*c
, void **table
)
344 for (i
= 0; i
< c
->size
; i
++) {
345 if (c
->entries
[i
].table
== *table
) {
355 assert(c
->entries
[i
].ref
>= 0);
359 void qcow2_cache_entry_mark_dirty(Qcow2Cache
*c
, void *table
)
363 for (i
= 0; i
< c
->size
; i
++) {
364 if (c
->entries
[i
].table
== table
) {
371 c
->entries
[i
].dirty
= true;