qcow2: add option to clean unused cache entries after some time
[qemu/rayw.git] / block / qcow2-cache.c
blob8457458418cb78c7a4a602a092b94ab12be15feb
1 /*
2 * L2/refcount table cache for the QCOW2 format
4 * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* Needed for CONFIG_MADVISE */
26 #include "config-host.h"
28 #if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE)
29 #include <sys/mman.h>
30 #endif
32 #include "block/block_int.h"
33 #include "qemu-common.h"
34 #include "qemu/osdep.h"
35 #include "qcow2.h"
36 #include "trace.h"
38 typedef struct Qcow2CachedTable {
39 int64_t offset;
40 bool dirty;
41 uint64_t lru_counter;
42 int ref;
43 } Qcow2CachedTable;
45 struct Qcow2Cache {
46 Qcow2CachedTable *entries;
47 struct Qcow2Cache *depends;
48 int size;
49 bool depends_on_flush;
50 void *table_array;
51 uint64_t lru_counter;
52 uint64_t cache_clean_lru_counter;
55 static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
56 Qcow2Cache *c, int table)
58 BDRVQcowState *s = bs->opaque;
59 return (uint8_t *) c->table_array + (size_t) table * s->cluster_size;
62 static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
63 Qcow2Cache *c, void *table)
65 BDRVQcowState *s = bs->opaque;
66 ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
67 int idx = table_offset / s->cluster_size;
68 assert(idx >= 0 && idx < c->size && table_offset % s->cluster_size == 0);
69 return idx;
72 static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
73 int i, int num_tables)
75 #if QEMU_MADV_DONTNEED != QEMU_MADV_INVALID
76 BDRVQcowState *s = bs->opaque;
77 void *t = qcow2_cache_get_table_addr(bs, c, i);
78 int align = getpagesize();
79 size_t mem_size = (size_t) s->cluster_size * num_tables;
80 size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
81 size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
82 if (length > 0) {
83 qemu_madvise((uint8_t *) t + offset, length, QEMU_MADV_DONTNEED);
85 #endif
88 static inline bool can_clean_entry(Qcow2Cache *c, int i)
90 Qcow2CachedTable *t = &c->entries[i];
91 return t->ref == 0 && !t->dirty && t->offset != 0 &&
92 t->lru_counter <= c->cache_clean_lru_counter;
95 void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c)
97 int i = 0;
98 while (i < c->size) {
99 int to_clean = 0;
101 /* Skip the entries that we don't need to clean */
102 while (i < c->size && !can_clean_entry(c, i)) {
103 i++;
106 /* And count how many we can clean in a row */
107 while (i < c->size && can_clean_entry(c, i)) {
108 c->entries[i].offset = 0;
109 c->entries[i].lru_counter = 0;
110 i++;
111 to_clean++;
114 if (to_clean > 0) {
115 qcow2_cache_table_release(bs, c, i - to_clean, to_clean);
119 c->cache_clean_lru_counter = c->lru_counter;
122 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
124 BDRVQcowState *s = bs->opaque;
125 Qcow2Cache *c;
127 c = g_new0(Qcow2Cache, 1);
128 c->size = num_tables;
129 c->entries = g_try_new0(Qcow2CachedTable, num_tables);
130 c->table_array = qemu_try_blockalign(bs->file,
131 (size_t) num_tables * s->cluster_size);
133 if (!c->entries || !c->table_array) {
134 qemu_vfree(c->table_array);
135 g_free(c->entries);
136 g_free(c);
137 c = NULL;
140 return c;
143 int qcow2_cache_destroy(BlockDriverState *bs, Qcow2Cache *c)
145 int i;
147 for (i = 0; i < c->size; i++) {
148 assert(c->entries[i].ref == 0);
151 qemu_vfree(c->table_array);
152 g_free(c->entries);
153 g_free(c);
155 return 0;
158 static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
160 int ret;
162 ret = qcow2_cache_flush(bs, c->depends);
163 if (ret < 0) {
164 return ret;
167 c->depends = NULL;
168 c->depends_on_flush = false;
170 return 0;
173 static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
175 BDRVQcowState *s = bs->opaque;
176 int ret = 0;
178 if (!c->entries[i].dirty || !c->entries[i].offset) {
179 return 0;
182 trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
183 c == s->l2_table_cache, i);
185 if (c->depends) {
186 ret = qcow2_cache_flush_dependency(bs, c);
187 } else if (c->depends_on_flush) {
188 ret = bdrv_flush(bs->file);
189 if (ret >= 0) {
190 c->depends_on_flush = false;
194 if (ret < 0) {
195 return ret;
198 if (c == s->refcount_block_cache) {
199 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK,
200 c->entries[i].offset, s->cluster_size);
201 } else if (c == s->l2_table_cache) {
202 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
203 c->entries[i].offset, s->cluster_size);
204 } else {
205 ret = qcow2_pre_write_overlap_check(bs, 0,
206 c->entries[i].offset, s->cluster_size);
209 if (ret < 0) {
210 return ret;
213 if (c == s->refcount_block_cache) {
214 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
215 } else if (c == s->l2_table_cache) {
216 BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
219 ret = bdrv_pwrite(bs->file, c->entries[i].offset,
220 qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
221 if (ret < 0) {
222 return ret;
225 c->entries[i].dirty = false;
227 return 0;
230 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
232 BDRVQcowState *s = bs->opaque;
233 int result = 0;
234 int ret;
235 int i;
237 trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
239 for (i = 0; i < c->size; i++) {
240 ret = qcow2_cache_entry_flush(bs, c, i);
241 if (ret < 0 && result != -ENOSPC) {
242 result = ret;
246 if (result == 0) {
247 ret = bdrv_flush(bs->file);
248 if (ret < 0) {
249 result = ret;
253 return result;
256 int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
257 Qcow2Cache *dependency)
259 int ret;
261 if (dependency->depends) {
262 ret = qcow2_cache_flush_dependency(bs, dependency);
263 if (ret < 0) {
264 return ret;
268 if (c->depends && (c->depends != dependency)) {
269 ret = qcow2_cache_flush_dependency(bs, c);
270 if (ret < 0) {
271 return ret;
275 c->depends = dependency;
276 return 0;
279 void qcow2_cache_depends_on_flush(Qcow2Cache *c)
281 c->depends_on_flush = true;
284 int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
286 int ret, i;
288 ret = qcow2_cache_flush(bs, c);
289 if (ret < 0) {
290 return ret;
293 for (i = 0; i < c->size; i++) {
294 assert(c->entries[i].ref == 0);
295 c->entries[i].offset = 0;
296 c->entries[i].lru_counter = 0;
299 qcow2_cache_table_release(bs, c, 0, c->size);
301 c->lru_counter = 0;
303 return 0;
306 static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
307 uint64_t offset, void **table, bool read_from_disk)
309 BDRVQcowState *s = bs->opaque;
310 int i;
311 int ret;
312 int lookup_index;
313 uint64_t min_lru_counter = UINT64_MAX;
314 int min_lru_index = -1;
316 trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
317 offset, read_from_disk);
319 /* Check if the table is already cached */
320 i = lookup_index = (offset / s->cluster_size * 4) % c->size;
321 do {
322 const Qcow2CachedTable *t = &c->entries[i];
323 if (t->offset == offset) {
324 goto found;
326 if (t->ref == 0 && t->lru_counter < min_lru_counter) {
327 min_lru_counter = t->lru_counter;
328 min_lru_index = i;
330 if (++i == c->size) {
331 i = 0;
333 } while (i != lookup_index);
335 if (min_lru_index == -1) {
336 /* This can't happen in current synchronous code, but leave the check
337 * here as a reminder for whoever starts using AIO with the cache */
338 abort();
341 /* Cache miss: write a table back and replace it */
342 i = min_lru_index;
343 trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
344 c == s->l2_table_cache, i);
346 ret = qcow2_cache_entry_flush(bs, c, i);
347 if (ret < 0) {
348 return ret;
351 trace_qcow2_cache_get_read(qemu_coroutine_self(),
352 c == s->l2_table_cache, i);
353 c->entries[i].offset = 0;
354 if (read_from_disk) {
355 if (c == s->l2_table_cache) {
356 BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
359 ret = bdrv_pread(bs->file, offset, qcow2_cache_get_table_addr(bs, c, i),
360 s->cluster_size);
361 if (ret < 0) {
362 return ret;
366 c->entries[i].offset = offset;
368 /* And return the right table */
369 found:
370 c->entries[i].ref++;
371 *table = qcow2_cache_get_table_addr(bs, c, i);
373 trace_qcow2_cache_get_done(qemu_coroutine_self(),
374 c == s->l2_table_cache, i);
376 return 0;
379 int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
380 void **table)
382 return qcow2_cache_do_get(bs, c, offset, table, true);
385 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
386 void **table)
388 return qcow2_cache_do_get(bs, c, offset, table, false);
391 void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
393 int i = qcow2_cache_get_table_idx(bs, c, *table);
395 c->entries[i].ref--;
396 *table = NULL;
398 if (c->entries[i].ref == 0) {
399 c->entries[i].lru_counter = ++c->lru_counter;
402 assert(c->entries[i].ref >= 0);
405 void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
406 void *table)
408 int i = qcow2_cache_get_table_idx(bs, c, table);
409 assert(c->entries[i].offset != 0);
410 c->entries[i].dirty = true;