2 * L2/refcount table cache for the QCOW2 format
4 * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed for CONFIG_MADVISE */
26 #include "config-host.h"
28 #if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE)
32 #include "block/block_int.h"
33 #include "qemu-common.h"
34 #include "qemu/osdep.h"
38 typedef struct Qcow2CachedTable
{
46 Qcow2CachedTable
*entries
;
47 struct Qcow2Cache
*depends
;
49 bool depends_on_flush
;
52 uint64_t cache_clean_lru_counter
;
55 static inline void *qcow2_cache_get_table_addr(BlockDriverState
*bs
,
56 Qcow2Cache
*c
, int table
)
58 BDRVQcow2State
*s
= bs
->opaque
;
59 return (uint8_t *) c
->table_array
+ (size_t) table
* s
->cluster_size
;
62 static inline int qcow2_cache_get_table_idx(BlockDriverState
*bs
,
63 Qcow2Cache
*c
, void *table
)
65 BDRVQcow2State
*s
= bs
->opaque
;
66 ptrdiff_t table_offset
= (uint8_t *) table
- (uint8_t *) c
->table_array
;
67 int idx
= table_offset
/ s
->cluster_size
;
68 assert(idx
>= 0 && idx
< c
->size
&& table_offset
% s
->cluster_size
== 0);
72 static void qcow2_cache_table_release(BlockDriverState
*bs
, Qcow2Cache
*c
,
73 int i
, int num_tables
)
75 #if QEMU_MADV_DONTNEED != QEMU_MADV_INVALID
76 BDRVQcow2State
*s
= bs
->opaque
;
77 void *t
= qcow2_cache_get_table_addr(bs
, c
, i
);
78 int align
= getpagesize();
79 size_t mem_size
= (size_t) s
->cluster_size
* num_tables
;
80 size_t offset
= QEMU_ALIGN_UP((uintptr_t) t
, align
) - (uintptr_t) t
;
81 size_t length
= QEMU_ALIGN_DOWN(mem_size
- offset
, align
);
83 qemu_madvise((uint8_t *) t
+ offset
, length
, QEMU_MADV_DONTNEED
);
88 static inline bool can_clean_entry(Qcow2Cache
*c
, int i
)
90 Qcow2CachedTable
*t
= &c
->entries
[i
];
91 return t
->ref
== 0 && !t
->dirty
&& t
->offset
!= 0 &&
92 t
->lru_counter
<= c
->cache_clean_lru_counter
;
95 void qcow2_cache_clean_unused(BlockDriverState
*bs
, Qcow2Cache
*c
)
101 /* Skip the entries that we don't need to clean */
102 while (i
< c
->size
&& !can_clean_entry(c
, i
)) {
106 /* And count how many we can clean in a row */
107 while (i
< c
->size
&& can_clean_entry(c
, i
)) {
108 c
->entries
[i
].offset
= 0;
109 c
->entries
[i
].lru_counter
= 0;
115 qcow2_cache_table_release(bs
, c
, i
- to_clean
, to_clean
);
119 c
->cache_clean_lru_counter
= c
->lru_counter
;
122 Qcow2Cache
*qcow2_cache_create(BlockDriverState
*bs
, int num_tables
)
124 BDRVQcow2State
*s
= bs
->opaque
;
127 c
= g_new0(Qcow2Cache
, 1);
128 c
->size
= num_tables
;
129 c
->entries
= g_try_new0(Qcow2CachedTable
, num_tables
);
130 c
->table_array
= qemu_try_blockalign(bs
->file
->bs
,
131 (size_t) num_tables
* s
->cluster_size
);
133 if (!c
->entries
|| !c
->table_array
) {
134 qemu_vfree(c
->table_array
);
143 int qcow2_cache_destroy(BlockDriverState
*bs
, Qcow2Cache
*c
)
147 for (i
= 0; i
< c
->size
; i
++) {
148 assert(c
->entries
[i
].ref
== 0);
151 qemu_vfree(c
->table_array
);
158 static int qcow2_cache_flush_dependency(BlockDriverState
*bs
, Qcow2Cache
*c
)
162 ret
= qcow2_cache_flush(bs
, c
->depends
);
168 c
->depends_on_flush
= false;
173 static int qcow2_cache_entry_flush(BlockDriverState
*bs
, Qcow2Cache
*c
, int i
)
175 BDRVQcow2State
*s
= bs
->opaque
;
178 if (!c
->entries
[i
].dirty
|| !c
->entries
[i
].offset
) {
182 trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
183 c
== s
->l2_table_cache
, i
);
186 ret
= qcow2_cache_flush_dependency(bs
, c
);
187 } else if (c
->depends_on_flush
) {
188 ret
= bdrv_flush(bs
->file
->bs
);
190 c
->depends_on_flush
= false;
198 if (c
== s
->refcount_block_cache
) {
199 ret
= qcow2_pre_write_overlap_check(bs
, QCOW2_OL_REFCOUNT_BLOCK
,
200 c
->entries
[i
].offset
, s
->cluster_size
);
201 } else if (c
== s
->l2_table_cache
) {
202 ret
= qcow2_pre_write_overlap_check(bs
, QCOW2_OL_ACTIVE_L2
,
203 c
->entries
[i
].offset
, s
->cluster_size
);
205 ret
= qcow2_pre_write_overlap_check(bs
, 0,
206 c
->entries
[i
].offset
, s
->cluster_size
);
213 if (c
== s
->refcount_block_cache
) {
214 BLKDBG_EVENT(bs
->file
, BLKDBG_REFBLOCK_UPDATE_PART
);
215 } else if (c
== s
->l2_table_cache
) {
216 BLKDBG_EVENT(bs
->file
, BLKDBG_L2_UPDATE
);
219 ret
= bdrv_pwrite(bs
->file
->bs
, c
->entries
[i
].offset
,
220 qcow2_cache_get_table_addr(bs
, c
, i
), s
->cluster_size
);
225 c
->entries
[i
].dirty
= false;
230 int qcow2_cache_flush(BlockDriverState
*bs
, Qcow2Cache
*c
)
232 BDRVQcow2State
*s
= bs
->opaque
;
237 trace_qcow2_cache_flush(qemu_coroutine_self(), c
== s
->l2_table_cache
);
239 for (i
= 0; i
< c
->size
; i
++) {
240 ret
= qcow2_cache_entry_flush(bs
, c
, i
);
241 if (ret
< 0 && result
!= -ENOSPC
) {
247 ret
= bdrv_flush(bs
->file
->bs
);
256 int qcow2_cache_set_dependency(BlockDriverState
*bs
, Qcow2Cache
*c
,
257 Qcow2Cache
*dependency
)
261 if (dependency
->depends
) {
262 ret
= qcow2_cache_flush_dependency(bs
, dependency
);
268 if (c
->depends
&& (c
->depends
!= dependency
)) {
269 ret
= qcow2_cache_flush_dependency(bs
, c
);
275 c
->depends
= dependency
;
279 void qcow2_cache_depends_on_flush(Qcow2Cache
*c
)
281 c
->depends_on_flush
= true;
284 int qcow2_cache_empty(BlockDriverState
*bs
, Qcow2Cache
*c
)
288 ret
= qcow2_cache_flush(bs
, c
);
293 for (i
= 0; i
< c
->size
; i
++) {
294 assert(c
->entries
[i
].ref
== 0);
295 c
->entries
[i
].offset
= 0;
296 c
->entries
[i
].lru_counter
= 0;
299 qcow2_cache_table_release(bs
, c
, 0, c
->size
);
306 static int qcow2_cache_do_get(BlockDriverState
*bs
, Qcow2Cache
*c
,
307 uint64_t offset
, void **table
, bool read_from_disk
)
309 BDRVQcow2State
*s
= bs
->opaque
;
313 uint64_t min_lru_counter
= UINT64_MAX
;
314 int min_lru_index
= -1;
316 trace_qcow2_cache_get(qemu_coroutine_self(), c
== s
->l2_table_cache
,
317 offset
, read_from_disk
);
319 /* Check if the table is already cached */
320 i
= lookup_index
= (offset
/ s
->cluster_size
* 4) % c
->size
;
322 const Qcow2CachedTable
*t
= &c
->entries
[i
];
323 if (t
->offset
== offset
) {
326 if (t
->ref
== 0 && t
->lru_counter
< min_lru_counter
) {
327 min_lru_counter
= t
->lru_counter
;
330 if (++i
== c
->size
) {
333 } while (i
!= lookup_index
);
335 if (min_lru_index
== -1) {
336 /* This can't happen in current synchronous code, but leave the check
337 * here as a reminder for whoever starts using AIO with the cache */
341 /* Cache miss: write a table back and replace it */
343 trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
344 c
== s
->l2_table_cache
, i
);
346 ret
= qcow2_cache_entry_flush(bs
, c
, i
);
351 trace_qcow2_cache_get_read(qemu_coroutine_self(),
352 c
== s
->l2_table_cache
, i
);
353 c
->entries
[i
].offset
= 0;
354 if (read_from_disk
) {
355 if (c
== s
->l2_table_cache
) {
356 BLKDBG_EVENT(bs
->file
, BLKDBG_L2_LOAD
);
359 ret
= bdrv_pread(bs
->file
->bs
, offset
,
360 qcow2_cache_get_table_addr(bs
, c
, i
),
367 c
->entries
[i
].offset
= offset
;
369 /* And return the right table */
372 *table
= qcow2_cache_get_table_addr(bs
, c
, i
);
374 trace_qcow2_cache_get_done(qemu_coroutine_self(),
375 c
== s
->l2_table_cache
, i
);
380 int qcow2_cache_get(BlockDriverState
*bs
, Qcow2Cache
*c
, uint64_t offset
,
383 return qcow2_cache_do_get(bs
, c
, offset
, table
, true);
386 int qcow2_cache_get_empty(BlockDriverState
*bs
, Qcow2Cache
*c
, uint64_t offset
,
389 return qcow2_cache_do_get(bs
, c
, offset
, table
, false);
392 void qcow2_cache_put(BlockDriverState
*bs
, Qcow2Cache
*c
, void **table
)
394 int i
= qcow2_cache_get_table_idx(bs
, c
, *table
);
399 if (c
->entries
[i
].ref
== 0) {
400 c
->entries
[i
].lru_counter
= ++c
->lru_counter
;
403 assert(c
->entries
[i
].ref
>= 0);
406 void qcow2_cache_entry_mark_dirty(BlockDriverState
*bs
, Qcow2Cache
*c
,
409 int i
= qcow2_cache_get_table_idx(bs
, c
, table
);
410 assert(c
->entries
[i
].offset
!= 0);
411 c
->entries
[i
].dirty
= true;