2 * L2/refcount table cache for the QCOW2 format
4 * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "block/block_int.h"
26 #include "qemu-common.h"
30 typedef struct Qcow2CachedTable
{
38 Qcow2CachedTable
*entries
;
39 struct Qcow2Cache
*depends
;
41 bool depends_on_flush
;
46 static inline void *qcow2_cache_get_table_addr(BlockDriverState
*bs
,
47 Qcow2Cache
*c
, int table
)
49 BDRVQcowState
*s
= bs
->opaque
;
50 return (uint8_t *) c
->table_array
+ (size_t) table
* s
->cluster_size
;
53 static inline int qcow2_cache_get_table_idx(BlockDriverState
*bs
,
54 Qcow2Cache
*c
, void *table
)
56 BDRVQcowState
*s
= bs
->opaque
;
57 ptrdiff_t table_offset
= (uint8_t *) table
- (uint8_t *) c
->table_array
;
58 int idx
= table_offset
/ s
->cluster_size
;
59 assert(idx
>= 0 && idx
< c
->size
&& table_offset
% s
->cluster_size
== 0);
63 Qcow2Cache
*qcow2_cache_create(BlockDriverState
*bs
, int num_tables
)
65 BDRVQcowState
*s
= bs
->opaque
;
68 c
= g_new0(Qcow2Cache
, 1);
70 c
->entries
= g_try_new0(Qcow2CachedTable
, num_tables
);
71 c
->table_array
= qemu_try_blockalign(bs
->file
,
72 (size_t) num_tables
* s
->cluster_size
);
74 if (!c
->entries
|| !c
->table_array
) {
75 qemu_vfree(c
->table_array
);
84 int qcow2_cache_destroy(BlockDriverState
*bs
, Qcow2Cache
*c
)
88 for (i
= 0; i
< c
->size
; i
++) {
89 assert(c
->entries
[i
].ref
== 0);
92 qemu_vfree(c
->table_array
);
99 static int qcow2_cache_flush_dependency(BlockDriverState
*bs
, Qcow2Cache
*c
)
103 ret
= qcow2_cache_flush(bs
, c
->depends
);
109 c
->depends_on_flush
= false;
114 static int qcow2_cache_entry_flush(BlockDriverState
*bs
, Qcow2Cache
*c
, int i
)
116 BDRVQcowState
*s
= bs
->opaque
;
119 if (!c
->entries
[i
].dirty
|| !c
->entries
[i
].offset
) {
123 trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
124 c
== s
->l2_table_cache
, i
);
127 ret
= qcow2_cache_flush_dependency(bs
, c
);
128 } else if (c
->depends_on_flush
) {
129 ret
= bdrv_flush(bs
->file
);
131 c
->depends_on_flush
= false;
139 if (c
== s
->refcount_block_cache
) {
140 ret
= qcow2_pre_write_overlap_check(bs
, QCOW2_OL_REFCOUNT_BLOCK
,
141 c
->entries
[i
].offset
, s
->cluster_size
);
142 } else if (c
== s
->l2_table_cache
) {
143 ret
= qcow2_pre_write_overlap_check(bs
, QCOW2_OL_ACTIVE_L2
,
144 c
->entries
[i
].offset
, s
->cluster_size
);
146 ret
= qcow2_pre_write_overlap_check(bs
, 0,
147 c
->entries
[i
].offset
, s
->cluster_size
);
154 if (c
== s
->refcount_block_cache
) {
155 BLKDBG_EVENT(bs
->file
, BLKDBG_REFBLOCK_UPDATE_PART
);
156 } else if (c
== s
->l2_table_cache
) {
157 BLKDBG_EVENT(bs
->file
, BLKDBG_L2_UPDATE
);
160 ret
= bdrv_pwrite(bs
->file
, c
->entries
[i
].offset
,
161 qcow2_cache_get_table_addr(bs
, c
, i
), s
->cluster_size
);
166 c
->entries
[i
].dirty
= false;
171 int qcow2_cache_flush(BlockDriverState
*bs
, Qcow2Cache
*c
)
173 BDRVQcowState
*s
= bs
->opaque
;
178 trace_qcow2_cache_flush(qemu_coroutine_self(), c
== s
->l2_table_cache
);
180 for (i
= 0; i
< c
->size
; i
++) {
181 ret
= qcow2_cache_entry_flush(bs
, c
, i
);
182 if (ret
< 0 && result
!= -ENOSPC
) {
188 ret
= bdrv_flush(bs
->file
);
197 int qcow2_cache_set_dependency(BlockDriverState
*bs
, Qcow2Cache
*c
,
198 Qcow2Cache
*dependency
)
202 if (dependency
->depends
) {
203 ret
= qcow2_cache_flush_dependency(bs
, dependency
);
209 if (c
->depends
&& (c
->depends
!= dependency
)) {
210 ret
= qcow2_cache_flush_dependency(bs
, c
);
216 c
->depends
= dependency
;
220 void qcow2_cache_depends_on_flush(Qcow2Cache
*c
)
222 c
->depends_on_flush
= true;
225 int qcow2_cache_empty(BlockDriverState
*bs
, Qcow2Cache
*c
)
229 ret
= qcow2_cache_flush(bs
, c
);
234 for (i
= 0; i
< c
->size
; i
++) {
235 assert(c
->entries
[i
].ref
== 0);
236 c
->entries
[i
].offset
= 0;
237 c
->entries
[i
].lru_counter
= 0;
245 static int qcow2_cache_do_get(BlockDriverState
*bs
, Qcow2Cache
*c
,
246 uint64_t offset
, void **table
, bool read_from_disk
)
248 BDRVQcowState
*s
= bs
->opaque
;
252 uint64_t min_lru_counter
= UINT64_MAX
;
253 int min_lru_index
= -1;
255 trace_qcow2_cache_get(qemu_coroutine_self(), c
== s
->l2_table_cache
,
256 offset
, read_from_disk
);
258 /* Check if the table is already cached */
259 i
= lookup_index
= (offset
/ s
->cluster_size
* 4) % c
->size
;
261 const Qcow2CachedTable
*t
= &c
->entries
[i
];
262 if (t
->offset
== offset
) {
265 if (t
->ref
== 0 && t
->lru_counter
< min_lru_counter
) {
266 min_lru_counter
= t
->lru_counter
;
269 if (++i
== c
->size
) {
272 } while (i
!= lookup_index
);
274 if (min_lru_index
== -1) {
275 /* This can't happen in current synchronous code, but leave the check
276 * here as a reminder for whoever starts using AIO with the cache */
280 /* Cache miss: write a table back and replace it */
282 trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
283 c
== s
->l2_table_cache
, i
);
285 ret
= qcow2_cache_entry_flush(bs
, c
, i
);
290 trace_qcow2_cache_get_read(qemu_coroutine_self(),
291 c
== s
->l2_table_cache
, i
);
292 c
->entries
[i
].offset
= 0;
293 if (read_from_disk
) {
294 if (c
== s
->l2_table_cache
) {
295 BLKDBG_EVENT(bs
->file
, BLKDBG_L2_LOAD
);
298 ret
= bdrv_pread(bs
->file
, offset
, qcow2_cache_get_table_addr(bs
, c
, i
),
305 c
->entries
[i
].offset
= offset
;
307 /* And return the right table */
310 *table
= qcow2_cache_get_table_addr(bs
, c
, i
);
312 trace_qcow2_cache_get_done(qemu_coroutine_self(),
313 c
== s
->l2_table_cache
, i
);
318 int qcow2_cache_get(BlockDriverState
*bs
, Qcow2Cache
*c
, uint64_t offset
,
321 return qcow2_cache_do_get(bs
, c
, offset
, table
, true);
324 int qcow2_cache_get_empty(BlockDriverState
*bs
, Qcow2Cache
*c
, uint64_t offset
,
327 return qcow2_cache_do_get(bs
, c
, offset
, table
, false);
330 void qcow2_cache_put(BlockDriverState
*bs
, Qcow2Cache
*c
, void **table
)
332 int i
= qcow2_cache_get_table_idx(bs
, c
, *table
);
337 if (c
->entries
[i
].ref
== 0) {
338 c
->entries
[i
].lru_counter
= ++c
->lru_counter
;
341 assert(c
->entries
[i
].ref
>= 0);
344 void qcow2_cache_entry_mark_dirty(BlockDriverState
*bs
, Qcow2Cache
*c
,
347 int i
= qcow2_cache_get_table_idx(bs
, c
, table
);
348 assert(c
->entries
[i
].offset
!= 0);
349 c
->entries
[i
].dirty
= true;