4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2011-2015 Red Hat Inc
8 * Juan Quintela <quintela@redhat.com>
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 #include "qemu/osdep.h"
32 #include "qemu/cutils.h"
33 #include "qemu/bitops.h"
34 #include "qemu/bitmap.h"
35 #include "qemu/main-loop.h"
38 #include "migration.h"
40 #include "migration/register.h"
41 #include "migration/misc.h"
42 #include "qemu-file.h"
43 #include "postcopy-ram.h"
44 #include "page_cache.h"
45 #include "qemu/error-report.h"
46 #include "qapi/error.h"
47 #include "qapi/qapi-events-migration.h"
48 #include "qapi/qmp/qerror.h"
50 #include "exec/ram_addr.h"
51 #include "exec/target_page.h"
52 #include "qemu/rcu_queue.h"
53 #include "migration/colo.h"
55 #include "sysemu/sysemu.h"
56 #include "qemu/uuid.h"
59 /***********************************************************/
60 /* ram save/restore */
62 /* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
63 * worked for pages that where filled with the same char. We switched
64 * it to only search for the zero value. And to avoid confusion with
65 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
68 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
69 #define RAM_SAVE_FLAG_ZERO 0x02
70 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
71 #define RAM_SAVE_FLAG_PAGE 0x08
72 #define RAM_SAVE_FLAG_EOS 0x10
73 #define RAM_SAVE_FLAG_CONTINUE 0x20
74 #define RAM_SAVE_FLAG_XBZRLE 0x40
75 /* 0x80 is reserved in migration.h start with 0x100 next */
76 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
78 static inline bool is_zero_range(uint8_t *p
, uint64_t size
)
80 return buffer_is_zero(p
, size
);
83 XBZRLECacheStats xbzrle_counters
;
85 /* struct contains XBZRLE cache and a static page
86 used by the compression */
88 /* buffer used for XBZRLE encoding */
90 /* buffer for storing page content */
92 /* Cache for XBZRLE, Protected by lock. */
95 /* it will store a page full of zeros */
96 uint8_t *zero_target_page
;
97 /* buffer used for XBZRLE decoding */
101 static void XBZRLE_cache_lock(void)
103 if (migrate_use_xbzrle())
104 qemu_mutex_lock(&XBZRLE
.lock
);
107 static void XBZRLE_cache_unlock(void)
109 if (migrate_use_xbzrle())
110 qemu_mutex_unlock(&XBZRLE
.lock
);
114 * xbzrle_cache_resize: resize the xbzrle cache
116 * This function is called from qmp_migrate_set_cache_size in main
117 * thread, possibly while a migration is in progress. A running
118 * migration may be using the cache and might finish during this call,
119 * hence changes to the cache are protected by XBZRLE.lock().
121 * Returns 0 for success or -1 for error
123 * @new_size: new cache size
124 * @errp: set *errp if the check failed, with reason
126 int xbzrle_cache_resize(int64_t new_size
, Error
**errp
)
128 PageCache
*new_cache
;
131 /* Check for truncation */
132 if (new_size
!= (size_t)new_size
) {
133 error_setg(errp
, QERR_INVALID_PARAMETER_VALUE
, "cache size",
134 "exceeding address space");
138 if (new_size
== migrate_xbzrle_cache_size()) {
145 if (XBZRLE
.cache
!= NULL
) {
146 new_cache
= cache_init(new_size
, TARGET_PAGE_SIZE
, errp
);
152 cache_fini(XBZRLE
.cache
);
153 XBZRLE
.cache
= new_cache
;
156 XBZRLE_cache_unlock();
160 /* Should be holding either ram_list.mutex, or the RCU lock. */
161 #define RAMBLOCK_FOREACH_MIGRATABLE(block) \
162 RAMBLOCK_FOREACH(block) \
163 if (!qemu_ram_is_migratable(block)) {} else
165 static void ramblock_recv_map_init(void)
169 RAMBLOCK_FOREACH_MIGRATABLE(rb
) {
170 assert(!rb
->receivedmap
);
171 rb
->receivedmap
= bitmap_new(rb
->max_length
>> qemu_target_page_bits());
175 int ramblock_recv_bitmap_test(RAMBlock
*rb
, void *host_addr
)
177 return test_bit(ramblock_recv_bitmap_offset(host_addr
, rb
),
181 bool ramblock_recv_bitmap_test_byte_offset(RAMBlock
*rb
, uint64_t byte_offset
)
183 return test_bit(byte_offset
>> TARGET_PAGE_BITS
, rb
->receivedmap
);
186 void ramblock_recv_bitmap_set(RAMBlock
*rb
, void *host_addr
)
188 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr
, rb
), rb
->receivedmap
);
191 void ramblock_recv_bitmap_set_range(RAMBlock
*rb
, void *host_addr
,
194 bitmap_set_atomic(rb
->receivedmap
,
195 ramblock_recv_bitmap_offset(host_addr
, rb
),
199 #define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL)
202 * Format: bitmap_size (8 bytes) + whole_bitmap (N bytes).
204 * Returns >0 if success with sent bytes, or <0 if error.
206 int64_t ramblock_recv_bitmap_send(QEMUFile
*file
,
207 const char *block_name
)
209 RAMBlock
*block
= qemu_ram_block_by_name(block_name
);
210 unsigned long *le_bitmap
, nbits
;
214 error_report("%s: invalid block name: %s", __func__
, block_name
);
218 nbits
= block
->used_length
>> TARGET_PAGE_BITS
;
221 * Make sure the tmp bitmap buffer is big enough, e.g., on 32bit
222 * machines we may need 4 more bytes for padding (see below
223 * comment). So extend it a bit before hand.
225 le_bitmap
= bitmap_new(nbits
+ BITS_PER_LONG
);
228 * Always use little endian when sending the bitmap. This is
229 * required that when source and destination VMs are not using the
230 * same endianess. (Note: big endian won't work.)
232 bitmap_to_le(le_bitmap
, block
->receivedmap
, nbits
);
234 /* Size of the bitmap, in bytes */
238 * size is always aligned to 8 bytes for 64bit machines, but it
239 * may not be true for 32bit machines. We need this padding to
240 * make sure the migration can survive even between 32bit and
243 size
= ROUND_UP(size
, 8);
245 qemu_put_be64(file
, size
);
246 qemu_put_buffer(file
, (const uint8_t *)le_bitmap
, size
);
248 * Mark as an end, in case the middle part is screwed up due to
249 * some "misterious" reason.
251 qemu_put_be64(file
, RAMBLOCK_RECV_BITMAP_ENDING
);
256 if (qemu_file_get_error(file
)) {
257 return qemu_file_get_error(file
);
260 return size
+ sizeof(size
);
264 * An outstanding page request, on the source, having been received
267 struct RAMSrcPageRequest
{
272 QSIMPLEQ_ENTRY(RAMSrcPageRequest
) next_req
;
275 /* State of RAM for migration */
277 /* QEMUFile used for this migration */
279 /* Last block that we have visited searching for dirty pages */
280 RAMBlock
*last_seen_block
;
281 /* Last block from where we have sent data */
282 RAMBlock
*last_sent_block
;
283 /* Last dirty target page we have sent */
284 ram_addr_t last_page
;
285 /* last ram version we have seen */
286 uint32_t last_version
;
287 /* We are in the first round */
289 /* How many times we have dirty too many pages */
290 int dirty_rate_high_cnt
;
291 /* these variables are used for bitmap sync */
292 /* last time we did a full bitmap_sync */
293 int64_t time_last_bitmap_sync
;
294 /* bytes transferred at start_time */
295 uint64_t bytes_xfer_prev
;
296 /* number of dirty pages since start_time */
297 uint64_t num_dirty_pages_period
;
298 /* xbzrle misses since the beginning of the period */
299 uint64_t xbzrle_cache_miss_prev
;
300 /* number of iterations at the beginning of period */
301 uint64_t iterations_prev
;
302 /* Iterations since start */
304 /* number of dirty bits in the bitmap */
305 uint64_t migration_dirty_pages
;
306 /* protects modification of the bitmap */
307 QemuMutex bitmap_mutex
;
308 /* The RAMBlock used in the last src_page_requests */
309 RAMBlock
*last_req_rb
;
310 /* Queue of outstanding page requests from the destination */
311 QemuMutex src_page_req_mutex
;
312 QSIMPLEQ_HEAD(src_page_requests
, RAMSrcPageRequest
) src_page_requests
;
314 typedef struct RAMState RAMState
;
316 static RAMState
*ram_state
;
318 uint64_t ram_bytes_remaining(void)
320 return ram_state
? (ram_state
->migration_dirty_pages
* TARGET_PAGE_SIZE
) :
324 MigrationStats ram_counters
;
326 /* used by the search for pages to send */
327 struct PageSearchStatus
{
328 /* Current block being searched */
330 /* Current page to search from */
332 /* Set once we wrap around */
335 typedef struct PageSearchStatus PageSearchStatus
;
337 struct CompressParam
{
346 /* internally used fields */
350 typedef struct CompressParam CompressParam
;
352 struct DecompressParam
{
362 typedef struct DecompressParam DecompressParam
;
364 static CompressParam
*comp_param
;
365 static QemuThread
*compress_threads
;
366 /* comp_done_cond is used to wake up the migration thread when
367 * one of the compression threads has finished the compression.
368 * comp_done_lock is used to co-work with comp_done_cond.
370 static QemuMutex comp_done_lock
;
371 static QemuCond comp_done_cond
;
372 /* The empty QEMUFileOps will be used by file in CompressParam */
373 static const QEMUFileOps empty_ops
= { };
375 static QEMUFile
*decomp_file
;
376 static DecompressParam
*decomp_param
;
377 static QemuThread
*decompress_threads
;
378 static QemuMutex decomp_done_lock
;
379 static QemuCond decomp_done_cond
;
381 static int do_compress_ram_page(QEMUFile
*f
, z_stream
*stream
, RAMBlock
*block
,
382 ram_addr_t offset
, uint8_t *source_buf
);
384 static void *do_data_compress(void *opaque
)
386 CompressParam
*param
= opaque
;
390 qemu_mutex_lock(¶m
->mutex
);
391 while (!param
->quit
) {
393 block
= param
->block
;
394 offset
= param
->offset
;
396 qemu_mutex_unlock(¶m
->mutex
);
398 do_compress_ram_page(param
->file
, ¶m
->stream
, block
, offset
,
401 qemu_mutex_lock(&comp_done_lock
);
403 qemu_cond_signal(&comp_done_cond
);
404 qemu_mutex_unlock(&comp_done_lock
);
406 qemu_mutex_lock(¶m
->mutex
);
408 qemu_cond_wait(¶m
->cond
, ¶m
->mutex
);
411 qemu_mutex_unlock(¶m
->mutex
);
416 static inline void terminate_compression_threads(void)
418 int idx
, thread_count
;
420 thread_count
= migrate_compress_threads();
422 for (idx
= 0; idx
< thread_count
; idx
++) {
423 qemu_mutex_lock(&comp_param
[idx
].mutex
);
424 comp_param
[idx
].quit
= true;
425 qemu_cond_signal(&comp_param
[idx
].cond
);
426 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
430 static void compress_threads_save_cleanup(void)
434 if (!migrate_use_compression()) {
437 terminate_compression_threads();
438 thread_count
= migrate_compress_threads();
439 for (i
= 0; i
< thread_count
; i
++) {
441 * we use it as a indicator which shows if the thread is
442 * properly init'd or not
444 if (!comp_param
[i
].file
) {
447 qemu_thread_join(compress_threads
+ i
);
448 qemu_mutex_destroy(&comp_param
[i
].mutex
);
449 qemu_cond_destroy(&comp_param
[i
].cond
);
450 deflateEnd(&comp_param
[i
].stream
);
451 g_free(comp_param
[i
].originbuf
);
452 qemu_fclose(comp_param
[i
].file
);
453 comp_param
[i
].file
= NULL
;
455 qemu_mutex_destroy(&comp_done_lock
);
456 qemu_cond_destroy(&comp_done_cond
);
457 g_free(compress_threads
);
459 compress_threads
= NULL
;
463 static int compress_threads_save_setup(void)
467 if (!migrate_use_compression()) {
470 thread_count
= migrate_compress_threads();
471 compress_threads
= g_new0(QemuThread
, thread_count
);
472 comp_param
= g_new0(CompressParam
, thread_count
);
473 qemu_cond_init(&comp_done_cond
);
474 qemu_mutex_init(&comp_done_lock
);
475 for (i
= 0; i
< thread_count
; i
++) {
476 comp_param
[i
].originbuf
= g_try_malloc(TARGET_PAGE_SIZE
);
477 if (!comp_param
[i
].originbuf
) {
481 if (deflateInit(&comp_param
[i
].stream
,
482 migrate_compress_level()) != Z_OK
) {
483 g_free(comp_param
[i
].originbuf
);
487 /* comp_param[i].file is just used as a dummy buffer to save data,
488 * set its ops to empty.
490 comp_param
[i
].file
= qemu_fopen_ops(NULL
, &empty_ops
);
491 comp_param
[i
].done
= true;
492 comp_param
[i
].quit
= false;
493 qemu_mutex_init(&comp_param
[i
].mutex
);
494 qemu_cond_init(&comp_param
[i
].cond
);
495 qemu_thread_create(compress_threads
+ i
, "compress",
496 do_data_compress
, comp_param
+ i
,
497 QEMU_THREAD_JOINABLE
);
502 compress_threads_save_cleanup();
508 #define MULTIFD_MAGIC 0x11223344U
509 #define MULTIFD_VERSION 1
514 unsigned char uuid
[16]; /* QemuUUID */
516 } __attribute__((packed
)) MultiFDInit_t
;
519 /* this fields are not changed once the thread is created */
522 /* channel thread name */
524 /* channel thread id */
526 /* communication channel */
528 /* sem where to wait for more work */
530 /* this mutex protects the following parameters */
532 /* is this channel thread running */
534 /* should this thread finish */
539 /* this fields are not changed once the thread is created */
542 /* channel thread name */
544 /* channel thread id */
546 /* communication channel */
548 /* sem where to wait for more work */
550 /* this mutex protects the following parameters */
552 /* is this channel thread running */
554 /* should this thread finish */
558 static int multifd_send_initial_packet(MultiFDSendParams
*p
, Error
**errp
)
563 msg
.magic
= cpu_to_be32(MULTIFD_MAGIC
);
564 msg
.version
= cpu_to_be32(MULTIFD_VERSION
);
566 memcpy(msg
.uuid
, &qemu_uuid
.data
, sizeof(msg
.uuid
));
568 ret
= qio_channel_write_all(p
->c
, (char *)&msg
, sizeof(msg
), errp
);
575 static int multifd_recv_initial_packet(QIOChannel
*c
, Error
**errp
)
580 ret
= qio_channel_read_all(c
, (char *)&msg
, sizeof(msg
), errp
);
585 be32_to_cpus(&msg
.magic
);
586 be32_to_cpus(&msg
.version
);
588 if (msg
.magic
!= MULTIFD_MAGIC
) {
589 error_setg(errp
, "multifd: received packet magic %x "
590 "expected %x", msg
.magic
, MULTIFD_MAGIC
);
594 if (msg
.version
!= MULTIFD_VERSION
) {
595 error_setg(errp
, "multifd: received packet version %d "
596 "expected %d", msg
.version
, MULTIFD_VERSION
);
600 if (memcmp(msg
.uuid
, &qemu_uuid
, sizeof(qemu_uuid
))) {
601 char *uuid
= qemu_uuid_unparse_strdup(&qemu_uuid
);
602 char *msg_uuid
= qemu_uuid_unparse_strdup((const QemuUUID
*)msg
.uuid
);
604 error_setg(errp
, "multifd: received uuid '%s' and expected "
605 "uuid '%s' for channel %hhd", msg_uuid
, uuid
, msg
.id
);
611 if (msg
.id
> migrate_multifd_channels()) {
612 error_setg(errp
, "multifd: received channel version %d "
613 "expected %d", msg
.version
, MULTIFD_VERSION
);
621 MultiFDSendParams
*params
;
622 /* number of created threads */
624 } *multifd_send_state
;
626 static void multifd_send_terminate_threads(Error
*err
)
631 MigrationState
*s
= migrate_get_current();
632 migrate_set_error(s
, err
);
633 if (s
->state
== MIGRATION_STATUS_SETUP
||
634 s
->state
== MIGRATION_STATUS_PRE_SWITCHOVER
||
635 s
->state
== MIGRATION_STATUS_DEVICE
||
636 s
->state
== MIGRATION_STATUS_ACTIVE
) {
637 migrate_set_state(&s
->state
, s
->state
,
638 MIGRATION_STATUS_FAILED
);
642 for (i
= 0; i
< migrate_multifd_channels(); i
++) {
643 MultiFDSendParams
*p
= &multifd_send_state
->params
[i
];
645 qemu_mutex_lock(&p
->mutex
);
647 qemu_sem_post(&p
->sem
);
648 qemu_mutex_unlock(&p
->mutex
);
652 int multifd_save_cleanup(Error
**errp
)
657 if (!migrate_use_multifd()) {
660 multifd_send_terminate_threads(NULL
);
661 for (i
= 0; i
< migrate_multifd_channels(); i
++) {
662 MultiFDSendParams
*p
= &multifd_send_state
->params
[i
];
665 qemu_thread_join(&p
->thread
);
667 socket_send_channel_destroy(p
->c
);
669 qemu_mutex_destroy(&p
->mutex
);
670 qemu_sem_destroy(&p
->sem
);
674 g_free(multifd_send_state
->params
);
675 multifd_send_state
->params
= NULL
;
676 g_free(multifd_send_state
);
677 multifd_send_state
= NULL
;
681 static void *multifd_send_thread(void *opaque
)
683 MultiFDSendParams
*p
= opaque
;
684 Error
*local_err
= NULL
;
686 if (multifd_send_initial_packet(p
, &local_err
) < 0) {
691 qemu_mutex_lock(&p
->mutex
);
693 qemu_mutex_unlock(&p
->mutex
);
696 qemu_mutex_unlock(&p
->mutex
);
697 qemu_sem_wait(&p
->sem
);
702 multifd_send_terminate_threads(local_err
);
705 qemu_mutex_lock(&p
->mutex
);
707 qemu_mutex_unlock(&p
->mutex
);
712 static void multifd_new_send_channel_async(QIOTask
*task
, gpointer opaque
)
714 MultiFDSendParams
*p
= opaque
;
715 QIOChannel
*sioc
= QIO_CHANNEL(qio_task_get_source(task
));
716 Error
*local_err
= NULL
;
718 if (qio_task_propagate_error(task
, &local_err
)) {
719 if (multifd_save_cleanup(&local_err
) != 0) {
720 migrate_set_error(migrate_get_current(), local_err
);
723 p
->c
= QIO_CHANNEL(sioc
);
724 qio_channel_set_delay(p
->c
, false);
726 qemu_thread_create(&p
->thread
, p
->name
, multifd_send_thread
, p
,
727 QEMU_THREAD_JOINABLE
);
729 atomic_inc(&multifd_send_state
->count
);
733 int multifd_save_setup(void)
738 if (!migrate_use_multifd()) {
741 thread_count
= migrate_multifd_channels();
742 multifd_send_state
= g_malloc0(sizeof(*multifd_send_state
));
743 multifd_send_state
->params
= g_new0(MultiFDSendParams
, thread_count
);
744 atomic_set(&multifd_send_state
->count
, 0);
745 for (i
= 0; i
< thread_count
; i
++) {
746 MultiFDSendParams
*p
= &multifd_send_state
->params
[i
];
748 qemu_mutex_init(&p
->mutex
);
749 qemu_sem_init(&p
->sem
, 0);
752 p
->name
= g_strdup_printf("multifdsend_%d", i
);
753 socket_send_channel_create(multifd_new_send_channel_async
, p
);
759 MultiFDRecvParams
*params
;
760 /* number of created threads */
762 } *multifd_recv_state
;
764 static void multifd_recv_terminate_threads(Error
*err
)
769 MigrationState
*s
= migrate_get_current();
770 migrate_set_error(s
, err
);
771 if (s
->state
== MIGRATION_STATUS_SETUP
||
772 s
->state
== MIGRATION_STATUS_ACTIVE
) {
773 migrate_set_state(&s
->state
, s
->state
,
774 MIGRATION_STATUS_FAILED
);
778 for (i
= 0; i
< migrate_multifd_channels(); i
++) {
779 MultiFDRecvParams
*p
= &multifd_recv_state
->params
[i
];
781 qemu_mutex_lock(&p
->mutex
);
783 qemu_sem_post(&p
->sem
);
784 qemu_mutex_unlock(&p
->mutex
);
788 int multifd_load_cleanup(Error
**errp
)
793 if (!migrate_use_multifd()) {
796 multifd_recv_terminate_threads(NULL
);
797 for (i
= 0; i
< migrate_multifd_channels(); i
++) {
798 MultiFDRecvParams
*p
= &multifd_recv_state
->params
[i
];
801 qemu_thread_join(&p
->thread
);
803 object_unref(OBJECT(p
->c
));
805 qemu_mutex_destroy(&p
->mutex
);
806 qemu_sem_destroy(&p
->sem
);
810 g_free(multifd_recv_state
->params
);
811 multifd_recv_state
->params
= NULL
;
812 g_free(multifd_recv_state
);
813 multifd_recv_state
= NULL
;
818 static void *multifd_recv_thread(void *opaque
)
820 MultiFDRecvParams
*p
= opaque
;
823 qemu_mutex_lock(&p
->mutex
);
825 qemu_mutex_unlock(&p
->mutex
);
828 qemu_mutex_unlock(&p
->mutex
);
829 qemu_sem_wait(&p
->sem
);
832 qemu_mutex_lock(&p
->mutex
);
834 qemu_mutex_unlock(&p
->mutex
);
839 int multifd_load_setup(void)
844 if (!migrate_use_multifd()) {
847 thread_count
= migrate_multifd_channels();
848 multifd_recv_state
= g_malloc0(sizeof(*multifd_recv_state
));
849 multifd_recv_state
->params
= g_new0(MultiFDRecvParams
, thread_count
);
850 atomic_set(&multifd_recv_state
->count
, 0);
851 for (i
= 0; i
< thread_count
; i
++) {
852 MultiFDRecvParams
*p
= &multifd_recv_state
->params
[i
];
854 qemu_mutex_init(&p
->mutex
);
855 qemu_sem_init(&p
->sem
, 0);
858 p
->name
= g_strdup_printf("multifdrecv_%d", i
);
863 bool multifd_recv_all_channels_created(void)
865 int thread_count
= migrate_multifd_channels();
867 if (!migrate_use_multifd()) {
871 return thread_count
== atomic_read(&multifd_recv_state
->count
);
874 void multifd_recv_new_channel(QIOChannel
*ioc
)
876 MultiFDRecvParams
*p
;
877 Error
*local_err
= NULL
;
880 id
= multifd_recv_initial_packet(ioc
, &local_err
);
882 multifd_recv_terminate_threads(local_err
);
886 p
= &multifd_recv_state
->params
[id
];
888 error_setg(&local_err
, "multifd: received id '%d' already setup'",
890 multifd_recv_terminate_threads(local_err
);
894 object_ref(OBJECT(ioc
));
897 qemu_thread_create(&p
->thread
, p
->name
, multifd_recv_thread
, p
,
898 QEMU_THREAD_JOINABLE
);
899 atomic_inc(&multifd_recv_state
->count
);
900 if (multifd_recv_state
->count
== migrate_multifd_channels()) {
901 migration_incoming_process();
906 * save_page_header: write page header to wire
908 * If this is the 1st block, it also writes the block identification
910 * Returns the number of bytes written
912 * @f: QEMUFile where to send the data
913 * @block: block that contains the page we want to send
914 * @offset: offset inside the block for the page
915 * in the lower bits, it contains flags
917 static size_t save_page_header(RAMState
*rs
, QEMUFile
*f
, RAMBlock
*block
,
922 if (block
== rs
->last_sent_block
) {
923 offset
|= RAM_SAVE_FLAG_CONTINUE
;
925 qemu_put_be64(f
, offset
);
928 if (!(offset
& RAM_SAVE_FLAG_CONTINUE
)) {
929 len
= strlen(block
->idstr
);
930 qemu_put_byte(f
, len
);
931 qemu_put_buffer(f
, (uint8_t *)block
->idstr
, len
);
933 rs
->last_sent_block
= block
;
939 * mig_throttle_guest_down: throotle down the guest
941 * Reduce amount of guest cpu execution to hopefully slow down memory
942 * writes. If guest dirty memory rate is reduced below the rate at
943 * which we can transfer pages to the destination then we should be
944 * able to complete migration. Some workloads dirty memory way too
945 * fast and will not effectively converge, even with auto-converge.
947 static void mig_throttle_guest_down(void)
949 MigrationState
*s
= migrate_get_current();
950 uint64_t pct_initial
= s
->parameters
.cpu_throttle_initial
;
951 uint64_t pct_icrement
= s
->parameters
.cpu_throttle_increment
;
953 /* We have not started throttling yet. Let's start it. */
954 if (!cpu_throttle_active()) {
955 cpu_throttle_set(pct_initial
);
957 /* Throttling already on, just increase the rate */
958 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement
);
963 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
965 * @rs: current RAM state
966 * @current_addr: address for the zero page
968 * Update the xbzrle cache to reflect a page that's been sent as all 0.
969 * The important thing is that a stale (not-yet-0'd) page be replaced
971 * As a bonus, if the page wasn't in the cache it gets added so that
972 * when a small write is made into the 0'd page it gets XBZRLE sent.
974 static void xbzrle_cache_zero_page(RAMState
*rs
, ram_addr_t current_addr
)
976 if (rs
->ram_bulk_stage
|| !migrate_use_xbzrle()) {
980 /* We don't care if this fails to allocate a new cache page
981 * as long as it updated an old one */
982 cache_insert(XBZRLE
.cache
, current_addr
, XBZRLE
.zero_target_page
,
983 ram_counters
.dirty_sync_count
);
986 #define ENCODING_FLAG_XBZRLE 0x1
989 * save_xbzrle_page: compress and send current page
991 * Returns: 1 means that we wrote the page
992 * 0 means that page is identical to the one already sent
993 * -1 means that xbzrle would be longer than normal
995 * @rs: current RAM state
996 * @current_data: pointer to the address of the page contents
997 * @current_addr: addr of the page
998 * @block: block that contains the page we want to send
999 * @offset: offset inside the block for the page
1000 * @last_stage: if we are at the completion stage
1002 static int save_xbzrle_page(RAMState
*rs
, uint8_t **current_data
,
1003 ram_addr_t current_addr
, RAMBlock
*block
,
1004 ram_addr_t offset
, bool last_stage
)
1006 int encoded_len
= 0, bytes_xbzrle
;
1007 uint8_t *prev_cached_page
;
1009 if (!cache_is_cached(XBZRLE
.cache
, current_addr
,
1010 ram_counters
.dirty_sync_count
)) {
1011 xbzrle_counters
.cache_miss
++;
1013 if (cache_insert(XBZRLE
.cache
, current_addr
, *current_data
,
1014 ram_counters
.dirty_sync_count
) == -1) {
1017 /* update *current_data when the page has been
1018 inserted into cache */
1019 *current_data
= get_cached_data(XBZRLE
.cache
, current_addr
);
1025 prev_cached_page
= get_cached_data(XBZRLE
.cache
, current_addr
);
1027 /* save current buffer into memory */
1028 memcpy(XBZRLE
.current_buf
, *current_data
, TARGET_PAGE_SIZE
);
1030 /* XBZRLE encoding (if there is no overflow) */
1031 encoded_len
= xbzrle_encode_buffer(prev_cached_page
, XBZRLE
.current_buf
,
1032 TARGET_PAGE_SIZE
, XBZRLE
.encoded_buf
,
1034 if (encoded_len
== 0) {
1035 trace_save_xbzrle_page_skipping();
1037 } else if (encoded_len
== -1) {
1038 trace_save_xbzrle_page_overflow();
1039 xbzrle_counters
.overflow
++;
1040 /* update data in the cache */
1042 memcpy(prev_cached_page
, *current_data
, TARGET_PAGE_SIZE
);
1043 *current_data
= prev_cached_page
;
1048 /* we need to update the data in the cache, in order to get the same data */
1050 memcpy(prev_cached_page
, XBZRLE
.current_buf
, TARGET_PAGE_SIZE
);
1053 /* Send XBZRLE based compressed page */
1054 bytes_xbzrle
= save_page_header(rs
, rs
->f
, block
,
1055 offset
| RAM_SAVE_FLAG_XBZRLE
);
1056 qemu_put_byte(rs
->f
, ENCODING_FLAG_XBZRLE
);
1057 qemu_put_be16(rs
->f
, encoded_len
);
1058 qemu_put_buffer(rs
->f
, XBZRLE
.encoded_buf
, encoded_len
);
1059 bytes_xbzrle
+= encoded_len
+ 1 + 2;
1060 xbzrle_counters
.pages
++;
1061 xbzrle_counters
.bytes
+= bytes_xbzrle
;
1062 ram_counters
.transferred
+= bytes_xbzrle
;
1068 * migration_bitmap_find_dirty: find the next dirty page from start
1070 * Called with rcu_read_lock() to protect migration_bitmap
1072 * Returns the byte offset within memory region of the start of a dirty page
1074 * @rs: current RAM state
1075 * @rb: RAMBlock where to search for dirty pages
1076 * @start: page where we start the search
1079 unsigned long migration_bitmap_find_dirty(RAMState
*rs
, RAMBlock
*rb
,
1080 unsigned long start
)
1082 unsigned long size
= rb
->used_length
>> TARGET_PAGE_BITS
;
1083 unsigned long *bitmap
= rb
->bmap
;
1086 if (!qemu_ram_is_migratable(rb
)) {
1090 if (rs
->ram_bulk_stage
&& start
> 0) {
1093 next
= find_next_bit(bitmap
, size
, start
);
1099 static inline bool migration_bitmap_clear_dirty(RAMState
*rs
,
1105 ret
= test_and_clear_bit(page
, rb
->bmap
);
1108 rs
->migration_dirty_pages
--;
1113 static void migration_bitmap_sync_range(RAMState
*rs
, RAMBlock
*rb
,
1114 ram_addr_t start
, ram_addr_t length
)
1116 rs
->migration_dirty_pages
+=
1117 cpu_physical_memory_sync_dirty_bitmap(rb
, start
, length
,
1118 &rs
->num_dirty_pages_period
);
1122 * ram_pagesize_summary: calculate all the pagesizes of a VM
1124 * Returns a summary bitmap of the page sizes of all RAMBlocks
1126 * For VMs with just normal pages this is equivalent to the host page
1127 * size. If it's got some huge pages then it's the OR of all the
1128 * different page sizes.
1130 uint64_t ram_pagesize_summary(void)
1133 uint64_t summary
= 0;
1135 RAMBLOCK_FOREACH_MIGRATABLE(block
) {
1136 summary
|= block
->page_size
;
1142 static void migration_bitmap_sync(RAMState
*rs
)
1146 uint64_t bytes_xfer_now
;
1148 ram_counters
.dirty_sync_count
++;
1150 if (!rs
->time_last_bitmap_sync
) {
1151 rs
->time_last_bitmap_sync
= qemu_clock_get_ms(QEMU_CLOCK_REALTIME
);
1154 trace_migration_bitmap_sync_start();
1155 memory_global_dirty_log_sync();
1157 qemu_mutex_lock(&rs
->bitmap_mutex
);
1159 RAMBLOCK_FOREACH_MIGRATABLE(block
) {
1160 migration_bitmap_sync_range(rs
, block
, 0, block
->used_length
);
1163 qemu_mutex_unlock(&rs
->bitmap_mutex
);
1165 trace_migration_bitmap_sync_end(rs
->num_dirty_pages_period
);
1167 end_time
= qemu_clock_get_ms(QEMU_CLOCK_REALTIME
);
1169 /* more than 1 second = 1000 millisecons */
1170 if (end_time
> rs
->time_last_bitmap_sync
+ 1000) {
1171 /* calculate period counters */
1172 ram_counters
.dirty_pages_rate
= rs
->num_dirty_pages_period
* 1000
1173 / (end_time
- rs
->time_last_bitmap_sync
);
1174 bytes_xfer_now
= ram_counters
.transferred
;
1176 /* During block migration the auto-converge logic incorrectly detects
1177 * that ram migration makes no progress. Avoid this by disabling the
1178 * throttling logic during the bulk phase of block migration. */
1179 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
1180 /* The following detection logic can be refined later. For now:
1181 Check to see if the dirtied bytes is 50% more than the approx.
1182 amount of bytes that just got transferred since the last time we
1183 were in this routine. If that happens twice, start or increase
1186 if ((rs
->num_dirty_pages_period
* TARGET_PAGE_SIZE
>
1187 (bytes_xfer_now
- rs
->bytes_xfer_prev
) / 2) &&
1188 (++rs
->dirty_rate_high_cnt
>= 2)) {
1189 trace_migration_throttle();
1190 rs
->dirty_rate_high_cnt
= 0;
1191 mig_throttle_guest_down();
1195 if (migrate_use_xbzrle()) {
1196 if (rs
->iterations_prev
!= rs
->iterations
) {
1197 xbzrle_counters
.cache_miss_rate
=
1198 (double)(xbzrle_counters
.cache_miss
-
1199 rs
->xbzrle_cache_miss_prev
) /
1200 (rs
->iterations
- rs
->iterations_prev
);
1202 rs
->iterations_prev
= rs
->iterations
;
1203 rs
->xbzrle_cache_miss_prev
= xbzrle_counters
.cache_miss
;
1206 /* reset period counters */
1207 rs
->time_last_bitmap_sync
= end_time
;
1208 rs
->num_dirty_pages_period
= 0;
1209 rs
->bytes_xfer_prev
= bytes_xfer_now
;
1211 if (migrate_use_events()) {
1212 qapi_event_send_migration_pass(ram_counters
.dirty_sync_count
, NULL
);
1217 * save_zero_page: send the zero page to the stream
1219 * Returns the number of pages written.
1221 * @rs: current RAM state
1222 * @block: block that contains the page we want to send
1223 * @offset: offset inside the block for the page
1225 static int save_zero_page(RAMState
*rs
, RAMBlock
*block
, ram_addr_t offset
)
1227 uint8_t *p
= block
->host
+ offset
;
1230 if (is_zero_range(p
, TARGET_PAGE_SIZE
)) {
1231 ram_counters
.duplicate
++;
1232 ram_counters
.transferred
+=
1233 save_page_header(rs
, rs
->f
, block
, offset
| RAM_SAVE_FLAG_ZERO
);
1234 qemu_put_byte(rs
->f
, 0);
1235 ram_counters
.transferred
+= 1;
1242 static void ram_release_pages(const char *rbname
, uint64_t offset
, int pages
)
1244 if (!migrate_release_ram() || !migration_in_postcopy()) {
1248 ram_discard_range(rbname
, offset
, pages
<< TARGET_PAGE_BITS
);
1252 * @pages: the number of pages written by the control path,
1254 * > 0 - number of pages written
1256 * Return true if the pages has been saved, otherwise false is returned.
1258 static bool control_save_page(RAMState
*rs
, RAMBlock
*block
, ram_addr_t offset
,
1261 uint64_t bytes_xmit
= 0;
1265 ret
= ram_control_save_page(rs
->f
, block
->offset
, offset
, TARGET_PAGE_SIZE
,
1267 if (ret
== RAM_SAVE_CONTROL_NOT_SUPP
) {
1272 ram_counters
.transferred
+= bytes_xmit
;
1276 if (ret
== RAM_SAVE_CONTROL_DELAYED
) {
1280 if (bytes_xmit
> 0) {
1281 ram_counters
.normal
++;
1282 } else if (bytes_xmit
== 0) {
1283 ram_counters
.duplicate
++;
1290 * directly send the page to the stream
1292 * Returns the number of pages written.
1294 * @rs: current RAM state
1295 * @block: block that contains the page we want to send
1296 * @offset: offset inside the block for the page
1297 * @buf: the page to be sent
1298 * @async: send to page asyncly
1300 static int save_normal_page(RAMState
*rs
, RAMBlock
*block
, ram_addr_t offset
,
1301 uint8_t *buf
, bool async
)
1303 ram_counters
.transferred
+= save_page_header(rs
, rs
->f
, block
,
1304 offset
| RAM_SAVE_FLAG_PAGE
);
1306 qemu_put_buffer_async(rs
->f
, buf
, TARGET_PAGE_SIZE
,
1307 migrate_release_ram() &
1308 migration_in_postcopy());
1310 qemu_put_buffer(rs
->f
, buf
, TARGET_PAGE_SIZE
);
1312 ram_counters
.transferred
+= TARGET_PAGE_SIZE
;
1313 ram_counters
.normal
++;
1318 * ram_save_page: send the given page to the stream
1320 * Returns the number of pages written.
1322 * >=0 - Number of pages written - this might legally be 0
1323 * if xbzrle noticed the page was the same.
1325 * @rs: current RAM state
1326 * @block: block that contains the page we want to send
1327 * @offset: offset inside the block for the page
1328 * @last_stage: if we are at the completion stage
1330 static int ram_save_page(RAMState
*rs
, PageSearchStatus
*pss
, bool last_stage
)
1334 bool send_async
= true;
1335 RAMBlock
*block
= pss
->block
;
1336 ram_addr_t offset
= pss
->page
<< TARGET_PAGE_BITS
;
1337 ram_addr_t current_addr
= block
->offset
+ offset
;
1339 p
= block
->host
+ offset
;
1340 trace_ram_save_page(block
->idstr
, (uint64_t)offset
, p
);
1342 XBZRLE_cache_lock();
1343 if (!rs
->ram_bulk_stage
&& !migration_in_postcopy() &&
1344 migrate_use_xbzrle()) {
1345 pages
= save_xbzrle_page(rs
, &p
, current_addr
, block
,
1346 offset
, last_stage
);
1348 /* Can't send this cached data async, since the cache page
1349 * might get updated before it gets to the wire
1355 /* XBZRLE overflow or normal page */
1357 pages
= save_normal_page(rs
, block
, offset
, p
, send_async
);
1360 XBZRLE_cache_unlock();
1365 static int do_compress_ram_page(QEMUFile
*f
, z_stream
*stream
, RAMBlock
*block
,
1366 ram_addr_t offset
, uint8_t *source_buf
)
1368 RAMState
*rs
= ram_state
;
1369 int bytes_sent
, blen
;
1370 uint8_t *p
= block
->host
+ (offset
& TARGET_PAGE_MASK
);
1372 bytes_sent
= save_page_header(rs
, f
, block
, offset
|
1373 RAM_SAVE_FLAG_COMPRESS_PAGE
);
1376 * copy it to a internal buffer to avoid it being modified by VM
1377 * so that we can catch up the error during compression and
1380 memcpy(source_buf
, p
, TARGET_PAGE_SIZE
);
1381 blen
= qemu_put_compression_data(f
, stream
, source_buf
, TARGET_PAGE_SIZE
);
1384 qemu_file_set_error(migrate_get_current()->to_dst_file
, blen
);
1385 error_report("compressed data failed!");
1388 ram_release_pages(block
->idstr
, offset
& TARGET_PAGE_MASK
, 1);
1394 static void flush_compressed_data(RAMState
*rs
)
1396 int idx
, len
, thread_count
;
1398 if (!migrate_use_compression()) {
1401 thread_count
= migrate_compress_threads();
1403 qemu_mutex_lock(&comp_done_lock
);
1404 for (idx
= 0; idx
< thread_count
; idx
++) {
1405 while (!comp_param
[idx
].done
) {
1406 qemu_cond_wait(&comp_done_cond
, &comp_done_lock
);
1409 qemu_mutex_unlock(&comp_done_lock
);
1411 for (idx
= 0; idx
< thread_count
; idx
++) {
1412 qemu_mutex_lock(&comp_param
[idx
].mutex
);
1413 if (!comp_param
[idx
].quit
) {
1414 len
= qemu_put_qemu_file(rs
->f
, comp_param
[idx
].file
);
1415 ram_counters
.transferred
+= len
;
1417 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
1421 static inline void set_compress_params(CompressParam
*param
, RAMBlock
*block
,
1424 param
->block
= block
;
1425 param
->offset
= offset
;
1428 static int compress_page_with_multi_thread(RAMState
*rs
, RAMBlock
*block
,
1431 int idx
, thread_count
, bytes_xmit
= -1, pages
= -1;
1433 thread_count
= migrate_compress_threads();
1434 qemu_mutex_lock(&comp_done_lock
);
1436 for (idx
= 0; idx
< thread_count
; idx
++) {
1437 if (comp_param
[idx
].done
) {
1438 comp_param
[idx
].done
= false;
1439 bytes_xmit
= qemu_put_qemu_file(rs
->f
, comp_param
[idx
].file
);
1440 qemu_mutex_lock(&comp_param
[idx
].mutex
);
1441 set_compress_params(&comp_param
[idx
], block
, offset
);
1442 qemu_cond_signal(&comp_param
[idx
].cond
);
1443 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
1445 ram_counters
.normal
++;
1446 ram_counters
.transferred
+= bytes_xmit
;
1453 qemu_cond_wait(&comp_done_cond
, &comp_done_lock
);
1456 qemu_mutex_unlock(&comp_done_lock
);
1462 * find_dirty_block: find the next dirty page and update any state
1463 * associated with the search process.
1465 * Returns if a page is found
1467 * @rs: current RAM state
1468 * @pss: data about the state of the current dirty page scan
1469 * @again: set to false if the search has scanned the whole of RAM
1471 static bool find_dirty_block(RAMState
*rs
, PageSearchStatus
*pss
, bool *again
)
1473 pss
->page
= migration_bitmap_find_dirty(rs
, pss
->block
, pss
->page
);
1474 if (pss
->complete_round
&& pss
->block
== rs
->last_seen_block
&&
1475 pss
->page
>= rs
->last_page
) {
1477 * We've been once around the RAM and haven't found anything.
1483 if ((pss
->page
<< TARGET_PAGE_BITS
) >= pss
->block
->used_length
) {
1484 /* Didn't find anything in this RAM Block */
1486 pss
->block
= QLIST_NEXT_RCU(pss
->block
, next
);
1488 /* Hit the end of the list */
1489 pss
->block
= QLIST_FIRST_RCU(&ram_list
.blocks
);
1490 /* Flag that we've looped */
1491 pss
->complete_round
= true;
1492 rs
->ram_bulk_stage
= false;
1493 if (migrate_use_xbzrle()) {
1494 /* If xbzrle is on, stop using the data compression at this
1495 * point. In theory, xbzrle can do better than compression.
1497 flush_compressed_data(rs
);
1500 /* Didn't find anything this time, but try again on the new block */
1504 /* Can go around again, but... */
1506 /* We've found something so probably don't need to */
1512 * unqueue_page: gets a page of the queue
1514 * Helper for 'get_queued_page' - gets a page off the queue
1516 * Returns the block of the page (or NULL if none available)
1518 * @rs: current RAM state
1519 * @offset: used to return the offset within the RAMBlock
1521 static RAMBlock
*unqueue_page(RAMState
*rs
, ram_addr_t
*offset
)
1523 RAMBlock
*block
= NULL
;
1525 qemu_mutex_lock(&rs
->src_page_req_mutex
);
1526 if (!QSIMPLEQ_EMPTY(&rs
->src_page_requests
)) {
1527 struct RAMSrcPageRequest
*entry
=
1528 QSIMPLEQ_FIRST(&rs
->src_page_requests
);
1530 *offset
= entry
->offset
;
1532 if (entry
->len
> TARGET_PAGE_SIZE
) {
1533 entry
->len
-= TARGET_PAGE_SIZE
;
1534 entry
->offset
+= TARGET_PAGE_SIZE
;
1536 memory_region_unref(block
->mr
);
1537 QSIMPLEQ_REMOVE_HEAD(&rs
->src_page_requests
, next_req
);
1541 qemu_mutex_unlock(&rs
->src_page_req_mutex
);
1547 * get_queued_page: unqueue a page from the postocpy requests
1549 * Skips pages that are already sent (!dirty)
1551 * Returns if a queued page is found
1553 * @rs: current RAM state
1554 * @pss: data about the state of the current dirty page scan
1556 static bool get_queued_page(RAMState
*rs
, PageSearchStatus
*pss
)
1563 block
= unqueue_page(rs
, &offset
);
1565 * We're sending this page, and since it's postcopy nothing else
1566 * will dirty it, and we must make sure it doesn't get sent again
1567 * even if this queue request was received after the background
1568 * search already sent it.
1573 page
= offset
>> TARGET_PAGE_BITS
;
1574 dirty
= test_bit(page
, block
->bmap
);
1576 trace_get_queued_page_not_dirty(block
->idstr
, (uint64_t)offset
,
1577 page
, test_bit(page
, block
->unsentmap
));
1579 trace_get_queued_page(block
->idstr
, (uint64_t)offset
, page
);
1583 } while (block
&& !dirty
);
1587 * As soon as we start servicing pages out of order, then we have
1588 * to kill the bulk stage, since the bulk stage assumes
1589 * in (migration_bitmap_find_and_reset_dirty) that every page is
1590 * dirty, that's no longer true.
1592 rs
->ram_bulk_stage
= false;
1595 * We want the background search to continue from the queued page
1596 * since the guest is likely to want other pages near to the page
1597 * it just requested.
1600 pss
->page
= offset
>> TARGET_PAGE_BITS
;
1607 * migration_page_queue_free: drop any remaining pages in the ram
1610 * It should be empty at the end anyway, but in error cases there may
1611 * be some left. in case that there is any page left, we drop it.
1614 static void migration_page_queue_free(RAMState
*rs
)
1616 struct RAMSrcPageRequest
*mspr
, *next_mspr
;
1617 /* This queue generally should be empty - but in the case of a failed
1618 * migration might have some droppings in.
1621 QSIMPLEQ_FOREACH_SAFE(mspr
, &rs
->src_page_requests
, next_req
, next_mspr
) {
1622 memory_region_unref(mspr
->rb
->mr
);
1623 QSIMPLEQ_REMOVE_HEAD(&rs
->src_page_requests
, next_req
);
1630 * ram_save_queue_pages: queue the page for transmission
1632 * A request from postcopy destination for example.
1634 * Returns zero on success or negative on error
1636 * @rbname: Name of the RAMBLock of the request. NULL means the
1637 * same that last one.
1638 * @start: starting address from the start of the RAMBlock
1639 * @len: length (in bytes) to send
1641 int ram_save_queue_pages(const char *rbname
, ram_addr_t start
, ram_addr_t len
)
1644 RAMState
*rs
= ram_state
;
1646 ram_counters
.postcopy_requests
++;
1649 /* Reuse last RAMBlock */
1650 ramblock
= rs
->last_req_rb
;
1654 * Shouldn't happen, we can't reuse the last RAMBlock if
1655 * it's the 1st request.
1657 error_report("ram_save_queue_pages no previous block");
1661 ramblock
= qemu_ram_block_by_name(rbname
);
1664 /* We shouldn't be asked for a non-existent RAMBlock */
1665 error_report("ram_save_queue_pages no block '%s'", rbname
);
1668 rs
->last_req_rb
= ramblock
;
1670 trace_ram_save_queue_pages(ramblock
->idstr
, start
, len
);
1671 if (start
+len
> ramblock
->used_length
) {
1672 error_report("%s request overrun start=" RAM_ADDR_FMT
" len="
1673 RAM_ADDR_FMT
" blocklen=" RAM_ADDR_FMT
,
1674 __func__
, start
, len
, ramblock
->used_length
);
1678 struct RAMSrcPageRequest
*new_entry
=
1679 g_malloc0(sizeof(struct RAMSrcPageRequest
));
1680 new_entry
->rb
= ramblock
;
1681 new_entry
->offset
= start
;
1682 new_entry
->len
= len
;
1684 memory_region_ref(ramblock
->mr
);
1685 qemu_mutex_lock(&rs
->src_page_req_mutex
);
1686 QSIMPLEQ_INSERT_TAIL(&rs
->src_page_requests
, new_entry
, next_req
);
1687 qemu_mutex_unlock(&rs
->src_page_req_mutex
);
1697 static bool save_page_use_compression(RAMState
*rs
)
1699 if (!migrate_use_compression()) {
1704 * If xbzrle is on, stop using the data compression after first
1705 * round of migration even if compression is enabled. In theory,
1706 * xbzrle can do better than compression.
1708 if (rs
->ram_bulk_stage
|| !migrate_use_xbzrle()) {
1716 * ram_save_target_page: save one target page
1718 * Returns the number of pages written
1720 * @rs: current RAM state
1721 * @pss: data about the page we want to send
1722 * @last_stage: if we are at the completion stage
1724 static int ram_save_target_page(RAMState
*rs
, PageSearchStatus
*pss
,
1727 RAMBlock
*block
= pss
->block
;
1728 ram_addr_t offset
= pss
->page
<< TARGET_PAGE_BITS
;
1731 if (control_save_page(rs
, block
, offset
, &res
)) {
1736 * When starting the process of a new block, the first page of
1737 * the block should be sent out before other pages in the same
1738 * block, and all the pages in last block should have been sent
1739 * out, keeping this order is important, because the 'cont' flag
1740 * is used to avoid resending the block name.
1742 if (block
!= rs
->last_sent_block
&& save_page_use_compression(rs
)) {
1743 flush_compressed_data(rs
);
1746 res
= save_zero_page(rs
, block
, offset
);
1748 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
1749 * page would be stale
1751 if (!save_page_use_compression(rs
)) {
1752 XBZRLE_cache_lock();
1753 xbzrle_cache_zero_page(rs
, block
->offset
+ offset
);
1754 XBZRLE_cache_unlock();
1756 ram_release_pages(block
->idstr
, offset
, res
);
1761 * Make sure the first page is sent out before other pages.
1763 * we post it as normal page as compression will take much
1766 if (block
== rs
->last_sent_block
&& save_page_use_compression(rs
)) {
1767 return compress_page_with_multi_thread(rs
, block
, offset
);
1770 return ram_save_page(rs
, pss
, last_stage
);
1774 * ram_save_host_page: save a whole host page
1776 * Starting at *offset send pages up to the end of the current host
1777 * page. It's valid for the initial offset to point into the middle of
1778 * a host page in which case the remainder of the hostpage is sent.
1779 * Only dirty target pages are sent. Note that the host page size may
1780 * be a huge page for this block.
1781 * The saving stops at the boundary of the used_length of the block
1782 * if the RAMBlock isn't a multiple of the host page size.
1784 * Returns the number of pages written or negative on error
1786 * @rs: current RAM state
1787 * @ms: current migration state
1788 * @pss: data about the page we want to send
1789 * @last_stage: if we are at the completion stage
1791 static int ram_save_host_page(RAMState
*rs
, PageSearchStatus
*pss
,
1794 int tmppages
, pages
= 0;
1795 size_t pagesize_bits
=
1796 qemu_ram_pagesize(pss
->block
) >> TARGET_PAGE_BITS
;
1798 if (!qemu_ram_is_migratable(pss
->block
)) {
1799 error_report("block %s should not be migrated !", pss
->block
->idstr
);
1804 /* Check the pages is dirty and if it is send it */
1805 if (!migration_bitmap_clear_dirty(rs
, pss
->block
, pss
->page
)) {
1810 tmppages
= ram_save_target_page(rs
, pss
, last_stage
);
1816 if (pss
->block
->unsentmap
) {
1817 clear_bit(pss
->page
, pss
->block
->unsentmap
);
1821 } while ((pss
->page
& (pagesize_bits
- 1)) &&
1822 offset_in_ramblock(pss
->block
, pss
->page
<< TARGET_PAGE_BITS
));
1824 /* The offset we leave with is the last one we looked at */
1830 * ram_find_and_save_block: finds a dirty page and sends it to f
1832 * Called within an RCU critical section.
1834 * Returns the number of pages written where zero means no dirty pages
1836 * @rs: current RAM state
1837 * @last_stage: if we are at the completion stage
1839 * On systems where host-page-size > target-page-size it will send all the
1840 * pages in a host page that are dirty.
1843 static int ram_find_and_save_block(RAMState
*rs
, bool last_stage
)
1845 PageSearchStatus pss
;
1849 /* No dirty page as there is zero RAM */
1850 if (!ram_bytes_total()) {
1854 pss
.block
= rs
->last_seen_block
;
1855 pss
.page
= rs
->last_page
;
1856 pss
.complete_round
= false;
1859 pss
.block
= QLIST_FIRST_RCU(&ram_list
.blocks
);
1864 found
= get_queued_page(rs
, &pss
);
1867 /* priority queue empty, so just search for something dirty */
1868 found
= find_dirty_block(rs
, &pss
, &again
);
1872 pages
= ram_save_host_page(rs
, &pss
, last_stage
);
1874 } while (!pages
&& again
);
1876 rs
->last_seen_block
= pss
.block
;
1877 rs
->last_page
= pss
.page
;
1882 void acct_update_position(QEMUFile
*f
, size_t size
, bool zero
)
1884 uint64_t pages
= size
/ TARGET_PAGE_SIZE
;
1887 ram_counters
.duplicate
+= pages
;
1889 ram_counters
.normal
+= pages
;
1890 ram_counters
.transferred
+= size
;
1891 qemu_update_position(f
, size
);
1895 uint64_t ram_bytes_total(void)
1901 RAMBLOCK_FOREACH_MIGRATABLE(block
) {
1902 total
+= block
->used_length
;
1908 static void xbzrle_load_setup(void)
1910 XBZRLE
.decoded_buf
= g_malloc(TARGET_PAGE_SIZE
);
1913 static void xbzrle_load_cleanup(void)
1915 g_free(XBZRLE
.decoded_buf
);
1916 XBZRLE
.decoded_buf
= NULL
;
1919 static void ram_state_cleanup(RAMState
**rsp
)
1922 migration_page_queue_free(*rsp
);
1923 qemu_mutex_destroy(&(*rsp
)->bitmap_mutex
);
1924 qemu_mutex_destroy(&(*rsp
)->src_page_req_mutex
);
1930 static void xbzrle_cleanup(void)
1932 XBZRLE_cache_lock();
1934 cache_fini(XBZRLE
.cache
);
1935 g_free(XBZRLE
.encoded_buf
);
1936 g_free(XBZRLE
.current_buf
);
1937 g_free(XBZRLE
.zero_target_page
);
1938 XBZRLE
.cache
= NULL
;
1939 XBZRLE
.encoded_buf
= NULL
;
1940 XBZRLE
.current_buf
= NULL
;
1941 XBZRLE
.zero_target_page
= NULL
;
1943 XBZRLE_cache_unlock();
1946 static void ram_save_cleanup(void *opaque
)
1948 RAMState
**rsp
= opaque
;
1951 /* caller have hold iothread lock or is in a bh, so there is
1952 * no writing race against this migration_bitmap
1954 memory_global_dirty_log_stop();
1956 RAMBLOCK_FOREACH_MIGRATABLE(block
) {
1957 g_free(block
->bmap
);
1959 g_free(block
->unsentmap
);
1960 block
->unsentmap
= NULL
;
1964 compress_threads_save_cleanup();
1965 ram_state_cleanup(rsp
);
1968 static void ram_state_reset(RAMState
*rs
)
1970 rs
->last_seen_block
= NULL
;
1971 rs
->last_sent_block
= NULL
;
1973 rs
->last_version
= ram_list
.version
;
1974 rs
->ram_bulk_stage
= true;
1977 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1980 * 'expected' is the value you expect the bitmap mostly to be full
1981 * of; it won't bother printing lines that are all this value.
1982 * If 'todump' is null the migration bitmap is dumped.
1984 void ram_debug_dump_bitmap(unsigned long *todump
, bool expected
,
1985 unsigned long pages
)
1988 int64_t linelen
= 128;
1991 for (cur
= 0; cur
< pages
; cur
+= linelen
) {
1995 * Last line; catch the case where the line length
1996 * is longer than remaining ram
1998 if (cur
+ linelen
> pages
) {
1999 linelen
= pages
- cur
;
2001 for (curb
= 0; curb
< linelen
; curb
++) {
2002 bool thisbit
= test_bit(cur
+ curb
, todump
);
2003 linebuf
[curb
] = thisbit
? '1' : '.';
2004 found
= found
|| (thisbit
!= expected
);
2007 linebuf
[curb
] = '\0';
2008 fprintf(stderr
, "0x%08" PRIx64
" : %s\n", cur
, linebuf
);
2013 /* **** functions for postcopy ***** */
2015 void ram_postcopy_migrated_memory_release(MigrationState
*ms
)
2017 struct RAMBlock
*block
;
2019 RAMBLOCK_FOREACH_MIGRATABLE(block
) {
2020 unsigned long *bitmap
= block
->bmap
;
2021 unsigned long range
= block
->used_length
>> TARGET_PAGE_BITS
;
2022 unsigned long run_start
= find_next_zero_bit(bitmap
, range
, 0);
2024 while (run_start
< range
) {
2025 unsigned long run_end
= find_next_bit(bitmap
, range
, run_start
+ 1);
2026 ram_discard_range(block
->idstr
, run_start
<< TARGET_PAGE_BITS
,
2027 (run_end
- run_start
) << TARGET_PAGE_BITS
);
2028 run_start
= find_next_zero_bit(bitmap
, range
, run_end
+ 1);
2034 * postcopy_send_discard_bm_ram: discard a RAMBlock
2036 * Returns zero on success
2038 * Callback from postcopy_each_ram_send_discard for each RAMBlock
2039 * Note: At this point the 'unsentmap' is the processed bitmap combined
2040 * with the dirtymap; so a '1' means it's either dirty or unsent.
2042 * @ms: current migration state
2043 * @pds: state for postcopy
2044 * @start: RAMBlock starting page
2045 * @length: RAMBlock size
2047 static int postcopy_send_discard_bm_ram(MigrationState
*ms
,
2048 PostcopyDiscardState
*pds
,
2051 unsigned long end
= block
->used_length
>> TARGET_PAGE_BITS
;
2052 unsigned long current
;
2053 unsigned long *unsentmap
= block
->unsentmap
;
2055 for (current
= 0; current
< end
; ) {
2056 unsigned long one
= find_next_bit(unsentmap
, end
, current
);
2059 unsigned long zero
= find_next_zero_bit(unsentmap
, end
, one
+ 1);
2060 unsigned long discard_length
;
2063 discard_length
= end
- one
;
2065 discard_length
= zero
- one
;
2067 if (discard_length
) {
2068 postcopy_discard_send_range(ms
, pds
, one
, discard_length
);
2070 current
= one
+ discard_length
;
2080 * postcopy_each_ram_send_discard: discard all RAMBlocks
2082 * Returns 0 for success or negative for error
2084 * Utility for the outgoing postcopy code.
2085 * Calls postcopy_send_discard_bm_ram for each RAMBlock
2086 * passing it bitmap indexes and name.
2087 * (qemu_ram_foreach_block ends up passing unscaled lengths
2088 * which would mean postcopy code would have to deal with target page)
2090 * @ms: current migration state
2092 static int postcopy_each_ram_send_discard(MigrationState
*ms
)
2094 struct RAMBlock
*block
;
2097 RAMBLOCK_FOREACH_MIGRATABLE(block
) {
2098 PostcopyDiscardState
*pds
=
2099 postcopy_discard_send_init(ms
, block
->idstr
);
2102 * Postcopy sends chunks of bitmap over the wire, but it
2103 * just needs indexes at this point, avoids it having
2104 * target page specific code.
2106 ret
= postcopy_send_discard_bm_ram(ms
, pds
, block
);
2107 postcopy_discard_send_finish(ms
, pds
);
2117 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
2119 * Helper for postcopy_chunk_hostpages; it's called twice to
2120 * canonicalize the two bitmaps, that are similar, but one is
2123 * Postcopy requires that all target pages in a hostpage are dirty or
2124 * clean, not a mix. This function canonicalizes the bitmaps.
2126 * @ms: current migration state
2127 * @unsent_pass: if true we need to canonicalize partially unsent host pages
2128 * otherwise we need to canonicalize partially dirty host pages
2129 * @block: block that contains the page we want to canonicalize
2130 * @pds: state for postcopy
2132 static void postcopy_chunk_hostpages_pass(MigrationState
*ms
, bool unsent_pass
,
2134 PostcopyDiscardState
*pds
)
2136 RAMState
*rs
= ram_state
;
2137 unsigned long *bitmap
= block
->bmap
;
2138 unsigned long *unsentmap
= block
->unsentmap
;
2139 unsigned int host_ratio
= block
->page_size
/ TARGET_PAGE_SIZE
;
2140 unsigned long pages
= block
->used_length
>> TARGET_PAGE_BITS
;
2141 unsigned long run_start
;
2143 if (block
->page_size
== TARGET_PAGE_SIZE
) {
2144 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
2149 /* Find a sent page */
2150 run_start
= find_next_zero_bit(unsentmap
, pages
, 0);
2152 /* Find a dirty page */
2153 run_start
= find_next_bit(bitmap
, pages
, 0);
2156 while (run_start
< pages
) {
2157 bool do_fixup
= false;
2158 unsigned long fixup_start_addr
;
2159 unsigned long host_offset
;
2162 * If the start of this run of pages is in the middle of a host
2163 * page, then we need to fixup this host page.
2165 host_offset
= run_start
% host_ratio
;
2168 run_start
-= host_offset
;
2169 fixup_start_addr
= run_start
;
2170 /* For the next pass */
2171 run_start
= run_start
+ host_ratio
;
2173 /* Find the end of this run */
2174 unsigned long run_end
;
2176 run_end
= find_next_bit(unsentmap
, pages
, run_start
+ 1);
2178 run_end
= find_next_zero_bit(bitmap
, pages
, run_start
+ 1);
2181 * If the end isn't at the start of a host page, then the
2182 * run doesn't finish at the end of a host page
2183 * and we need to discard.
2185 host_offset
= run_end
% host_ratio
;
2188 fixup_start_addr
= run_end
- host_offset
;
2190 * This host page has gone, the next loop iteration starts
2191 * from after the fixup
2193 run_start
= fixup_start_addr
+ host_ratio
;
2196 * No discards on this iteration, next loop starts from
2197 * next sent/dirty page
2199 run_start
= run_end
+ 1;
2206 /* Tell the destination to discard this page */
2207 if (unsent_pass
|| !test_bit(fixup_start_addr
, unsentmap
)) {
2208 /* For the unsent_pass we:
2209 * discard partially sent pages
2210 * For the !unsent_pass (dirty) we:
2211 * discard partially dirty pages that were sent
2212 * (any partially sent pages were already discarded
2213 * by the previous unsent_pass)
2215 postcopy_discard_send_range(ms
, pds
, fixup_start_addr
,
2219 /* Clean up the bitmap */
2220 for (page
= fixup_start_addr
;
2221 page
< fixup_start_addr
+ host_ratio
; page
++) {
2222 /* All pages in this host page are now not sent */
2223 set_bit(page
, unsentmap
);
2226 * Remark them as dirty, updating the count for any pages
2227 * that weren't previously dirty.
2229 rs
->migration_dirty_pages
+= !test_and_set_bit(page
, bitmap
);
2234 /* Find the next sent page for the next iteration */
2235 run_start
= find_next_zero_bit(unsentmap
, pages
, run_start
);
2237 /* Find the next dirty page for the next iteration */
2238 run_start
= find_next_bit(bitmap
, pages
, run_start
);
2244 * postcopy_chuck_hostpages: discrad any partially sent host page
2246 * Utility for the outgoing postcopy code.
2248 * Discard any partially sent host-page size chunks, mark any partially
2249 * dirty host-page size chunks as all dirty. In this case the host-page
2250 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
2252 * Returns zero on success
2254 * @ms: current migration state
2255 * @block: block we want to work with
2257 static int postcopy_chunk_hostpages(MigrationState
*ms
, RAMBlock
*block
)
2259 PostcopyDiscardState
*pds
=
2260 postcopy_discard_send_init(ms
, block
->idstr
);
2262 /* First pass: Discard all partially sent host pages */
2263 postcopy_chunk_hostpages_pass(ms
, true, block
, pds
);
2265 * Second pass: Ensure that all partially dirty host pages are made
2268 postcopy_chunk_hostpages_pass(ms
, false, block
, pds
);
2270 postcopy_discard_send_finish(ms
, pds
);
2275 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
2277 * Returns zero on success
2279 * Transmit the set of pages to be discarded after precopy to the target
2280 * these are pages that:
2281 * a) Have been previously transmitted but are now dirty again
2282 * b) Pages that have never been transmitted, this ensures that
2283 * any pages on the destination that have been mapped by background
2284 * tasks get discarded (transparent huge pages is the specific concern)
2285 * Hopefully this is pretty sparse
2287 * @ms: current migration state
2289 int ram_postcopy_send_discard_bitmap(MigrationState
*ms
)
2291 RAMState
*rs
= ram_state
;
2297 /* This should be our last sync, the src is now paused */
2298 migration_bitmap_sync(rs
);
2300 /* Easiest way to make sure we don't resume in the middle of a host-page */
2301 rs
->last_seen_block
= NULL
;
2302 rs
->last_sent_block
= NULL
;
2305 RAMBLOCK_FOREACH_MIGRATABLE(block
) {
2306 unsigned long pages
= block
->used_length
>> TARGET_PAGE_BITS
;
2307 unsigned long *bitmap
= block
->bmap
;
2308 unsigned long *unsentmap
= block
->unsentmap
;
2311 /* We don't have a safe way to resize the sentmap, so
2312 * if the bitmap was resized it will be NULL at this
2315 error_report("migration ram resized during precopy phase");
2319 /* Deal with TPS != HPS and huge pages */
2320 ret
= postcopy_chunk_hostpages(ms
, block
);
2327 * Update the unsentmap to be unsentmap = unsentmap | dirty
2329 bitmap_or(unsentmap
, unsentmap
, bitmap
, pages
);
2330 #ifdef DEBUG_POSTCOPY
2331 ram_debug_dump_bitmap(unsentmap
, true, pages
);
2334 trace_ram_postcopy_send_discard_bitmap();
2336 ret
= postcopy_each_ram_send_discard(ms
);
2343 * ram_discard_range: discard dirtied pages at the beginning of postcopy
2345 * Returns zero on success
2347 * @rbname: name of the RAMBlock of the request. NULL means the
2348 * same that last one.
2349 * @start: RAMBlock starting page
2350 * @length: RAMBlock size
2352 int ram_discard_range(const char *rbname
, uint64_t start
, size_t length
)
2356 trace_ram_discard_range(rbname
, start
, length
);
2359 RAMBlock
*rb
= qemu_ram_block_by_name(rbname
);
2362 error_report("ram_discard_range: Failed to find block '%s'", rbname
);
2366 bitmap_clear(rb
->receivedmap
, start
>> qemu_target_page_bits(),
2367 length
>> qemu_target_page_bits());
2368 ret
= ram_block_discard_range(rb
, start
, length
);
2377 * For every allocation, we will try not to crash the VM if the
2378 * allocation failed.
2380 static int xbzrle_init(void)
2382 Error
*local_err
= NULL
;
2384 if (!migrate_use_xbzrle()) {
2388 XBZRLE_cache_lock();
2390 XBZRLE
.zero_target_page
= g_try_malloc0(TARGET_PAGE_SIZE
);
2391 if (!XBZRLE
.zero_target_page
) {
2392 error_report("%s: Error allocating zero page", __func__
);
2396 XBZRLE
.cache
= cache_init(migrate_xbzrle_cache_size(),
2397 TARGET_PAGE_SIZE
, &local_err
);
2398 if (!XBZRLE
.cache
) {
2399 error_report_err(local_err
);
2400 goto free_zero_page
;
2403 XBZRLE
.encoded_buf
= g_try_malloc0(TARGET_PAGE_SIZE
);
2404 if (!XBZRLE
.encoded_buf
) {
2405 error_report("%s: Error allocating encoded_buf", __func__
);
2409 XBZRLE
.current_buf
= g_try_malloc(TARGET_PAGE_SIZE
);
2410 if (!XBZRLE
.current_buf
) {
2411 error_report("%s: Error allocating current_buf", __func__
);
2412 goto free_encoded_buf
;
2415 /* We are all good */
2416 XBZRLE_cache_unlock();
2420 g_free(XBZRLE
.encoded_buf
);
2421 XBZRLE
.encoded_buf
= NULL
;
2423 cache_fini(XBZRLE
.cache
);
2424 XBZRLE
.cache
= NULL
;
2426 g_free(XBZRLE
.zero_target_page
);
2427 XBZRLE
.zero_target_page
= NULL
;
2429 XBZRLE_cache_unlock();
2433 static int ram_state_init(RAMState
**rsp
)
2435 *rsp
= g_try_new0(RAMState
, 1);
2438 error_report("%s: Init ramstate fail", __func__
);
2442 qemu_mutex_init(&(*rsp
)->bitmap_mutex
);
2443 qemu_mutex_init(&(*rsp
)->src_page_req_mutex
);
2444 QSIMPLEQ_INIT(&(*rsp
)->src_page_requests
);
2447 * Count the total number of pages used by ram blocks not including any
2448 * gaps due to alignment or unplugs.
2450 (*rsp
)->migration_dirty_pages
= ram_bytes_total() >> TARGET_PAGE_BITS
;
2452 ram_state_reset(*rsp
);
2457 static void ram_list_init_bitmaps(void)
2460 unsigned long pages
;
2462 /* Skip setting bitmap if there is no RAM */
2463 if (ram_bytes_total()) {
2464 RAMBLOCK_FOREACH_MIGRATABLE(block
) {
2465 pages
= block
->max_length
>> TARGET_PAGE_BITS
;
2466 block
->bmap
= bitmap_new(pages
);
2467 bitmap_set(block
->bmap
, 0, pages
);
2468 if (migrate_postcopy_ram()) {
2469 block
->unsentmap
= bitmap_new(pages
);
2470 bitmap_set(block
->unsentmap
, 0, pages
);
2476 static void ram_init_bitmaps(RAMState
*rs
)
2478 /* For memory_global_dirty_log_start below. */
2479 qemu_mutex_lock_iothread();
2480 qemu_mutex_lock_ramlist();
2483 ram_list_init_bitmaps();
2484 memory_global_dirty_log_start();
2485 migration_bitmap_sync(rs
);
2488 qemu_mutex_unlock_ramlist();
2489 qemu_mutex_unlock_iothread();
2492 static int ram_init_all(RAMState
**rsp
)
2494 if (ram_state_init(rsp
)) {
2498 if (xbzrle_init()) {
2499 ram_state_cleanup(rsp
);
2503 ram_init_bitmaps(*rsp
);
2508 static void ram_state_resume_prepare(RAMState
*rs
, QEMUFile
*out
)
2514 * Postcopy is not using xbzrle/compression, so no need for that.
2515 * Also, since source are already halted, we don't need to care
2516 * about dirty page logging as well.
2519 RAMBLOCK_FOREACH(block
) {
2520 pages
+= bitmap_count_one(block
->bmap
,
2521 block
->used_length
>> TARGET_PAGE_BITS
);
2524 /* This may not be aligned with current bitmaps. Recalculate. */
2525 rs
->migration_dirty_pages
= pages
;
2527 rs
->last_seen_block
= NULL
;
2528 rs
->last_sent_block
= NULL
;
2530 rs
->last_version
= ram_list
.version
;
2532 * Disable the bulk stage, otherwise we'll resend the whole RAM no
2533 * matter what we have sent.
2535 rs
->ram_bulk_stage
= false;
2537 /* Update RAMState cache of output QEMUFile */
2540 trace_ram_state_resume_prepare(pages
);
2544 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
2545 * long-running RCU critical section. When rcu-reclaims in the code
2546 * start to become numerous it will be necessary to reduce the
2547 * granularity of these critical sections.
2551 * ram_save_setup: Setup RAM for migration
2553 * Returns zero to indicate success and negative for error
2555 * @f: QEMUFile where to send the data
2556 * @opaque: RAMState pointer
2558 static int ram_save_setup(QEMUFile
*f
, void *opaque
)
2560 RAMState
**rsp
= opaque
;
2563 if (compress_threads_save_setup()) {
2567 /* migration has already setup the bitmap, reuse it. */
2568 if (!migration_in_colo_state()) {
2569 if (ram_init_all(rsp
) != 0) {
2570 compress_threads_save_cleanup();
2578 qemu_put_be64(f
, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE
);
2580 RAMBLOCK_FOREACH_MIGRATABLE(block
) {
2581 qemu_put_byte(f
, strlen(block
->idstr
));
2582 qemu_put_buffer(f
, (uint8_t *)block
->idstr
, strlen(block
->idstr
));
2583 qemu_put_be64(f
, block
->used_length
);
2584 if (migrate_postcopy_ram() && block
->page_size
!= qemu_host_page_size
) {
2585 qemu_put_be64(f
, block
->page_size
);
2591 ram_control_before_iterate(f
, RAM_CONTROL_SETUP
);
2592 ram_control_after_iterate(f
, RAM_CONTROL_SETUP
);
2594 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
2600 * ram_save_iterate: iterative stage for migration
2602 * Returns zero to indicate success and negative for error
2604 * @f: QEMUFile where to send the data
2605 * @opaque: RAMState pointer
2607 static int ram_save_iterate(QEMUFile
*f
, void *opaque
)
2609 RAMState
**temp
= opaque
;
2610 RAMState
*rs
= *temp
;
2616 if (blk_mig_bulk_active()) {
2617 /* Avoid transferring ram during bulk phase of block migration as
2618 * the bulk phase will usually take a long time and transferring
2619 * ram updates during that time is pointless. */
2624 if (ram_list
.version
!= rs
->last_version
) {
2625 ram_state_reset(rs
);
2628 /* Read version before ram_list.blocks */
2631 ram_control_before_iterate(f
, RAM_CONTROL_ROUND
);
2633 t0
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
2635 while ((ret
= qemu_file_rate_limit(f
)) == 0) {
2638 pages
= ram_find_and_save_block(rs
, false);
2639 /* no more pages to sent */
2646 /* we want to check in the 1st loop, just in case it was the 1st time
2647 and we had to sync the dirty bitmap.
2648 qemu_get_clock_ns() is a bit expensive, so we only check each some
2651 if ((i
& 63) == 0) {
2652 uint64_t t1
= (qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - t0
) / 1000000;
2653 if (t1
> MAX_WAIT
) {
2654 trace_ram_save_iterate_big_wait(t1
, i
);
2660 flush_compressed_data(rs
);
2664 * Must occur before EOS (or any QEMUFile operation)
2665 * because of RDMA protocol.
2667 ram_control_after_iterate(f
, RAM_CONTROL_ROUND
);
2670 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
2671 ram_counters
.transferred
+= 8;
2673 ret
= qemu_file_get_error(f
);
2682 * ram_save_complete: function called to send the remaining amount of ram
2684 * Returns zero to indicate success
2686 * Called with iothread lock
2688 * @f: QEMUFile where to send the data
2689 * @opaque: RAMState pointer
2691 static int ram_save_complete(QEMUFile
*f
, void *opaque
)
2693 RAMState
**temp
= opaque
;
2694 RAMState
*rs
= *temp
;
2698 if (!migration_in_postcopy()) {
2699 migration_bitmap_sync(rs
);
2702 ram_control_before_iterate(f
, RAM_CONTROL_FINISH
);
2704 /* try transferring iterative blocks of memory */
2706 /* flush all remaining blocks regardless of rate limiting */
2710 pages
= ram_find_and_save_block(rs
, !migration_in_colo_state());
2711 /* no more blocks to sent */
2717 flush_compressed_data(rs
);
2718 ram_control_after_iterate(f
, RAM_CONTROL_FINISH
);
2722 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
2727 static void ram_save_pending(QEMUFile
*f
, void *opaque
, uint64_t max_size
,
2728 uint64_t *res_precopy_only
,
2729 uint64_t *res_compatible
,
2730 uint64_t *res_postcopy_only
)
2732 RAMState
**temp
= opaque
;
2733 RAMState
*rs
= *temp
;
2734 uint64_t remaining_size
;
2736 remaining_size
= rs
->migration_dirty_pages
* TARGET_PAGE_SIZE
;
2738 if (!migration_in_postcopy() &&
2739 remaining_size
< max_size
) {
2740 qemu_mutex_lock_iothread();
2742 migration_bitmap_sync(rs
);
2744 qemu_mutex_unlock_iothread();
2745 remaining_size
= rs
->migration_dirty_pages
* TARGET_PAGE_SIZE
;
2748 if (migrate_postcopy_ram()) {
2749 /* We can do postcopy, and all the data is postcopiable */
2750 *res_compatible
+= remaining_size
;
2752 *res_precopy_only
+= remaining_size
;
2756 static int load_xbzrle(QEMUFile
*f
, ram_addr_t addr
, void *host
)
2758 unsigned int xh_len
;
2760 uint8_t *loaded_data
;
2762 /* extract RLE header */
2763 xh_flags
= qemu_get_byte(f
);
2764 xh_len
= qemu_get_be16(f
);
2766 if (xh_flags
!= ENCODING_FLAG_XBZRLE
) {
2767 error_report("Failed to load XBZRLE page - wrong compression!");
2771 if (xh_len
> TARGET_PAGE_SIZE
) {
2772 error_report("Failed to load XBZRLE page - len overflow!");
2775 loaded_data
= XBZRLE
.decoded_buf
;
2776 /* load data and decode */
2777 /* it can change loaded_data to point to an internal buffer */
2778 qemu_get_buffer_in_place(f
, &loaded_data
, xh_len
);
2781 if (xbzrle_decode_buffer(loaded_data
, xh_len
, host
,
2782 TARGET_PAGE_SIZE
) == -1) {
2783 error_report("Failed to load XBZRLE page - decode error!");
2791 * ram_block_from_stream: read a RAMBlock id from the migration stream
2793 * Must be called from within a rcu critical section.
2795 * Returns a pointer from within the RCU-protected ram_list.
2797 * @f: QEMUFile where to read the data from
2798 * @flags: Page flags (mostly to see if it's a continuation of previous block)
2800 static inline RAMBlock
*ram_block_from_stream(QEMUFile
*f
, int flags
)
2802 static RAMBlock
*block
= NULL
;
2806 if (flags
& RAM_SAVE_FLAG_CONTINUE
) {
2808 error_report("Ack, bad migration stream!");
2814 len
= qemu_get_byte(f
);
2815 qemu_get_buffer(f
, (uint8_t *)id
, len
);
2818 block
= qemu_ram_block_by_name(id
);
2820 error_report("Can't find block %s", id
);
2824 if (!qemu_ram_is_migratable(block
)) {
2825 error_report("block %s should not be migrated !", id
);
2832 static inline void *host_from_ram_block_offset(RAMBlock
*block
,
2835 if (!offset_in_ramblock(block
, offset
)) {
2839 return block
->host
+ offset
;
2843 * ram_handle_compressed: handle the zero page case
2845 * If a page (or a whole RDMA chunk) has been
2846 * determined to be zero, then zap it.
2848 * @host: host address for the zero page
2849 * @ch: what the page is filled from. We only support zero
2850 * @size: size of the zero page
2852 void ram_handle_compressed(void *host
, uint8_t ch
, uint64_t size
)
2854 if (ch
!= 0 || !is_zero_range(host
, size
)) {
2855 memset(host
, ch
, size
);
2859 /* return the size after decompression, or negative value on error */
2861 qemu_uncompress_data(z_stream
*stream
, uint8_t *dest
, size_t dest_len
,
2862 const uint8_t *source
, size_t source_len
)
2866 err
= inflateReset(stream
);
2871 stream
->avail_in
= source_len
;
2872 stream
->next_in
= (uint8_t *)source
;
2873 stream
->avail_out
= dest_len
;
2874 stream
->next_out
= dest
;
2876 err
= inflate(stream
, Z_NO_FLUSH
);
2877 if (err
!= Z_STREAM_END
) {
2881 return stream
->total_out
;
2884 static void *do_data_decompress(void *opaque
)
2886 DecompressParam
*param
= opaque
;
2887 unsigned long pagesize
;
2891 qemu_mutex_lock(¶m
->mutex
);
2892 while (!param
->quit
) {
2897 qemu_mutex_unlock(¶m
->mutex
);
2899 pagesize
= TARGET_PAGE_SIZE
;
2901 ret
= qemu_uncompress_data(¶m
->stream
, des
, pagesize
,
2902 param
->compbuf
, len
);
2903 if (ret
< 0 && migrate_get_current()->decompress_error_check
) {
2904 error_report("decompress data failed");
2905 qemu_file_set_error(decomp_file
, ret
);
2908 qemu_mutex_lock(&decomp_done_lock
);
2910 qemu_cond_signal(&decomp_done_cond
);
2911 qemu_mutex_unlock(&decomp_done_lock
);
2913 qemu_mutex_lock(¶m
->mutex
);
2915 qemu_cond_wait(¶m
->cond
, ¶m
->mutex
);
2918 qemu_mutex_unlock(¶m
->mutex
);
2923 static int wait_for_decompress_done(void)
2925 int idx
, thread_count
;
2927 if (!migrate_use_compression()) {
2931 thread_count
= migrate_decompress_threads();
2932 qemu_mutex_lock(&decomp_done_lock
);
2933 for (idx
= 0; idx
< thread_count
; idx
++) {
2934 while (!decomp_param
[idx
].done
) {
2935 qemu_cond_wait(&decomp_done_cond
, &decomp_done_lock
);
2938 qemu_mutex_unlock(&decomp_done_lock
);
2939 return qemu_file_get_error(decomp_file
);
2942 static void compress_threads_load_cleanup(void)
2944 int i
, thread_count
;
2946 if (!migrate_use_compression()) {
2949 thread_count
= migrate_decompress_threads();
2950 for (i
= 0; i
< thread_count
; i
++) {
2952 * we use it as a indicator which shows if the thread is
2953 * properly init'd or not
2955 if (!decomp_param
[i
].compbuf
) {
2959 qemu_mutex_lock(&decomp_param
[i
].mutex
);
2960 decomp_param
[i
].quit
= true;
2961 qemu_cond_signal(&decomp_param
[i
].cond
);
2962 qemu_mutex_unlock(&decomp_param
[i
].mutex
);
2964 for (i
= 0; i
< thread_count
; i
++) {
2965 if (!decomp_param
[i
].compbuf
) {
2969 qemu_thread_join(decompress_threads
+ i
);
2970 qemu_mutex_destroy(&decomp_param
[i
].mutex
);
2971 qemu_cond_destroy(&decomp_param
[i
].cond
);
2972 inflateEnd(&decomp_param
[i
].stream
);
2973 g_free(decomp_param
[i
].compbuf
);
2974 decomp_param
[i
].compbuf
= NULL
;
2976 g_free(decompress_threads
);
2977 g_free(decomp_param
);
2978 decompress_threads
= NULL
;
2979 decomp_param
= NULL
;
2983 static int compress_threads_load_setup(QEMUFile
*f
)
2985 int i
, thread_count
;
2987 if (!migrate_use_compression()) {
2991 thread_count
= migrate_decompress_threads();
2992 decompress_threads
= g_new0(QemuThread
, thread_count
);
2993 decomp_param
= g_new0(DecompressParam
, thread_count
);
2994 qemu_mutex_init(&decomp_done_lock
);
2995 qemu_cond_init(&decomp_done_cond
);
2997 for (i
= 0; i
< thread_count
; i
++) {
2998 if (inflateInit(&decomp_param
[i
].stream
) != Z_OK
) {
3002 decomp_param
[i
].compbuf
= g_malloc0(compressBound(TARGET_PAGE_SIZE
));
3003 qemu_mutex_init(&decomp_param
[i
].mutex
);
3004 qemu_cond_init(&decomp_param
[i
].cond
);
3005 decomp_param
[i
].done
= true;
3006 decomp_param
[i
].quit
= false;
3007 qemu_thread_create(decompress_threads
+ i
, "decompress",
3008 do_data_decompress
, decomp_param
+ i
,
3009 QEMU_THREAD_JOINABLE
);
3013 compress_threads_load_cleanup();
3017 static void decompress_data_with_multi_threads(QEMUFile
*f
,
3018 void *host
, int len
)
3020 int idx
, thread_count
;
3022 thread_count
= migrate_decompress_threads();
3023 qemu_mutex_lock(&decomp_done_lock
);
3025 for (idx
= 0; idx
< thread_count
; idx
++) {
3026 if (decomp_param
[idx
].done
) {
3027 decomp_param
[idx
].done
= false;
3028 qemu_mutex_lock(&decomp_param
[idx
].mutex
);
3029 qemu_get_buffer(f
, decomp_param
[idx
].compbuf
, len
);
3030 decomp_param
[idx
].des
= host
;
3031 decomp_param
[idx
].len
= len
;
3032 qemu_cond_signal(&decomp_param
[idx
].cond
);
3033 qemu_mutex_unlock(&decomp_param
[idx
].mutex
);
3037 if (idx
< thread_count
) {
3040 qemu_cond_wait(&decomp_done_cond
, &decomp_done_lock
);
3043 qemu_mutex_unlock(&decomp_done_lock
);
3047 * ram_load_setup: Setup RAM for migration incoming side
3049 * Returns zero to indicate success and negative for error
3051 * @f: QEMUFile where to receive the data
3052 * @opaque: RAMState pointer
3054 static int ram_load_setup(QEMUFile
*f
, void *opaque
)
3056 if (compress_threads_load_setup(f
)) {
3060 xbzrle_load_setup();
3061 ramblock_recv_map_init();
3065 static int ram_load_cleanup(void *opaque
)
3068 xbzrle_load_cleanup();
3069 compress_threads_load_cleanup();
3071 RAMBLOCK_FOREACH_MIGRATABLE(rb
) {
3072 g_free(rb
->receivedmap
);
3073 rb
->receivedmap
= NULL
;
3079 * ram_postcopy_incoming_init: allocate postcopy data structures
3081 * Returns 0 for success and negative if there was one error
3083 * @mis: current migration incoming state
3085 * Allocate data structures etc needed by incoming migration with
3086 * postcopy-ram. postcopy-ram's similarly names
3087 * postcopy_ram_incoming_init does the work.
3089 int ram_postcopy_incoming_init(MigrationIncomingState
*mis
)
3091 unsigned long ram_pages
= last_ram_page();
3093 return postcopy_ram_incoming_init(mis
, ram_pages
);
3097 * ram_load_postcopy: load a page in postcopy case
3099 * Returns 0 for success or -errno in case of error
3101 * Called in postcopy mode by ram_load().
3102 * rcu_read_lock is taken prior to this being called.
3104 * @f: QEMUFile where to send the data
3106 static int ram_load_postcopy(QEMUFile
*f
)
3108 int flags
= 0, ret
= 0;
3109 bool place_needed
= false;
3110 bool matching_page_sizes
= false;
3111 MigrationIncomingState
*mis
= migration_incoming_get_current();
3112 /* Temporary page that is later 'placed' */
3113 void *postcopy_host_page
= postcopy_get_tmp_page(mis
);
3114 void *last_host
= NULL
;
3115 bool all_zero
= false;
3117 while (!ret
&& !(flags
& RAM_SAVE_FLAG_EOS
)) {
3120 void *page_buffer
= NULL
;
3121 void *place_source
= NULL
;
3122 RAMBlock
*block
= NULL
;
3125 addr
= qemu_get_be64(f
);
3128 * If qemu file error, we should stop here, and then "addr"
3131 ret
= qemu_file_get_error(f
);
3136 flags
= addr
& ~TARGET_PAGE_MASK
;
3137 addr
&= TARGET_PAGE_MASK
;
3139 trace_ram_load_postcopy_loop((uint64_t)addr
, flags
);
3140 place_needed
= false;
3141 if (flags
& (RAM_SAVE_FLAG_ZERO
| RAM_SAVE_FLAG_PAGE
)) {
3142 block
= ram_block_from_stream(f
, flags
);
3144 host
= host_from_ram_block_offset(block
, addr
);
3146 error_report("Illegal RAM offset " RAM_ADDR_FMT
, addr
);
3150 matching_page_sizes
= block
->page_size
== TARGET_PAGE_SIZE
;
3152 * Postcopy requires that we place whole host pages atomically;
3153 * these may be huge pages for RAMBlocks that are backed by
3155 * To make it atomic, the data is read into a temporary page
3156 * that's moved into place later.
3157 * The migration protocol uses, possibly smaller, target-pages
3158 * however the source ensures it always sends all the components
3159 * of a host page in order.
3161 page_buffer
= postcopy_host_page
+
3162 ((uintptr_t)host
& (block
->page_size
- 1));
3163 /* If all TP are zero then we can optimise the place */
3164 if (!((uintptr_t)host
& (block
->page_size
- 1))) {
3167 /* not the 1st TP within the HP */
3168 if (host
!= (last_host
+ TARGET_PAGE_SIZE
)) {
3169 error_report("Non-sequential target page %p/%p",
3178 * If it's the last part of a host page then we place the host
3181 place_needed
= (((uintptr_t)host
+ TARGET_PAGE_SIZE
) &
3182 (block
->page_size
- 1)) == 0;
3183 place_source
= postcopy_host_page
;
3187 switch (flags
& ~RAM_SAVE_FLAG_CONTINUE
) {
3188 case RAM_SAVE_FLAG_ZERO
:
3189 ch
= qemu_get_byte(f
);
3190 memset(page_buffer
, ch
, TARGET_PAGE_SIZE
);
3196 case RAM_SAVE_FLAG_PAGE
:
3198 if (!place_needed
|| !matching_page_sizes
) {
3199 qemu_get_buffer(f
, page_buffer
, TARGET_PAGE_SIZE
);
3201 /* Avoids the qemu_file copy during postcopy, which is
3202 * going to do a copy later; can only do it when we
3203 * do this read in one go (matching page sizes)
3205 qemu_get_buffer_in_place(f
, (uint8_t **)&place_source
,
3209 case RAM_SAVE_FLAG_EOS
:
3213 error_report("Unknown combination of migration flags: %#x"
3214 " (postcopy mode)", flags
);
3219 /* Detect for any possible file errors */
3220 if (!ret
&& qemu_file_get_error(f
)) {
3221 ret
= qemu_file_get_error(f
);
3224 if (!ret
&& place_needed
) {
3225 /* This gets called at the last target page in the host page */
3226 void *place_dest
= host
+ TARGET_PAGE_SIZE
- block
->page_size
;
3229 ret
= postcopy_place_page_zero(mis
, place_dest
,
3232 ret
= postcopy_place_page(mis
, place_dest
,
3233 place_source
, block
);
3241 static bool postcopy_is_advised(void)
3243 PostcopyState ps
= postcopy_state_get();
3244 return ps
>= POSTCOPY_INCOMING_ADVISE
&& ps
< POSTCOPY_INCOMING_END
;
3247 static bool postcopy_is_running(void)
3249 PostcopyState ps
= postcopy_state_get();
3250 return ps
>= POSTCOPY_INCOMING_LISTENING
&& ps
< POSTCOPY_INCOMING_END
;
3253 static int ram_load(QEMUFile
*f
, void *opaque
, int version_id
)
3255 int flags
= 0, ret
= 0, invalid_flags
= 0;
3256 static uint64_t seq_iter
;
3259 * If system is running in postcopy mode, page inserts to host memory must
3262 bool postcopy_running
= postcopy_is_running();
3263 /* ADVISE is earlier, it shows the source has the postcopy capability on */
3264 bool postcopy_advised
= postcopy_is_advised();
3268 if (version_id
!= 4) {
3272 if (!migrate_use_compression()) {
3273 invalid_flags
|= RAM_SAVE_FLAG_COMPRESS_PAGE
;
3275 /* This RCU critical section can be very long running.
3276 * When RCU reclaims in the code start to become numerous,
3277 * it will be necessary to reduce the granularity of this
3282 if (postcopy_running
) {
3283 ret
= ram_load_postcopy(f
);
3286 while (!postcopy_running
&& !ret
&& !(flags
& RAM_SAVE_FLAG_EOS
)) {
3287 ram_addr_t addr
, total_ram_bytes
;
3291 addr
= qemu_get_be64(f
);
3292 flags
= addr
& ~TARGET_PAGE_MASK
;
3293 addr
&= TARGET_PAGE_MASK
;
3295 if (flags
& invalid_flags
) {
3296 if (flags
& invalid_flags
& RAM_SAVE_FLAG_COMPRESS_PAGE
) {
3297 error_report("Received an unexpected compressed page");
3304 if (flags
& (RAM_SAVE_FLAG_ZERO
| RAM_SAVE_FLAG_PAGE
|
3305 RAM_SAVE_FLAG_COMPRESS_PAGE
| RAM_SAVE_FLAG_XBZRLE
)) {
3306 RAMBlock
*block
= ram_block_from_stream(f
, flags
);
3308 host
= host_from_ram_block_offset(block
, addr
);
3310 error_report("Illegal RAM offset " RAM_ADDR_FMT
, addr
);
3314 ramblock_recv_bitmap_set(block
, host
);
3315 trace_ram_load_loop(block
->idstr
, (uint64_t)addr
, flags
, host
);
3318 switch (flags
& ~RAM_SAVE_FLAG_CONTINUE
) {
3319 case RAM_SAVE_FLAG_MEM_SIZE
:
3320 /* Synchronize RAM block list */
3321 total_ram_bytes
= addr
;
3322 while (!ret
&& total_ram_bytes
) {
3327 len
= qemu_get_byte(f
);
3328 qemu_get_buffer(f
, (uint8_t *)id
, len
);
3330 length
= qemu_get_be64(f
);
3332 block
= qemu_ram_block_by_name(id
);
3333 if (block
&& !qemu_ram_is_migratable(block
)) {
3334 error_report("block %s should not be migrated !", id
);
3337 if (length
!= block
->used_length
) {
3338 Error
*local_err
= NULL
;
3340 ret
= qemu_ram_resize(block
, length
,
3343 error_report_err(local_err
);
3346 /* For postcopy we need to check hugepage sizes match */
3347 if (postcopy_advised
&&
3348 block
->page_size
!= qemu_host_page_size
) {
3349 uint64_t remote_page_size
= qemu_get_be64(f
);
3350 if (remote_page_size
!= block
->page_size
) {
3351 error_report("Mismatched RAM page size %s "
3352 "(local) %zd != %" PRId64
,
3353 id
, block
->page_size
,
3358 ram_control_load_hook(f
, RAM_CONTROL_BLOCK_REG
,
3361 error_report("Unknown ramblock \"%s\", cannot "
3362 "accept migration", id
);
3366 total_ram_bytes
-= length
;
3370 case RAM_SAVE_FLAG_ZERO
:
3371 ch
= qemu_get_byte(f
);
3372 ram_handle_compressed(host
, ch
, TARGET_PAGE_SIZE
);
3375 case RAM_SAVE_FLAG_PAGE
:
3376 qemu_get_buffer(f
, host
, TARGET_PAGE_SIZE
);
3379 case RAM_SAVE_FLAG_COMPRESS_PAGE
:
3380 len
= qemu_get_be32(f
);
3381 if (len
< 0 || len
> compressBound(TARGET_PAGE_SIZE
)) {
3382 error_report("Invalid compressed data length: %d", len
);
3386 decompress_data_with_multi_threads(f
, host
, len
);
3389 case RAM_SAVE_FLAG_XBZRLE
:
3390 if (load_xbzrle(f
, addr
, host
) < 0) {
3391 error_report("Failed to decompress XBZRLE page at "
3392 RAM_ADDR_FMT
, addr
);
3397 case RAM_SAVE_FLAG_EOS
:
3401 if (flags
& RAM_SAVE_FLAG_HOOK
) {
3402 ram_control_load_hook(f
, RAM_CONTROL_HOOK
, NULL
);
3404 error_report("Unknown combination of migration flags: %#x",
3410 ret
= qemu_file_get_error(f
);
3414 ret
|= wait_for_decompress_done();
3416 trace_ram_load_complete(ret
, seq_iter
);
3420 static bool ram_has_postcopy(void *opaque
)
3422 return migrate_postcopy_ram();
3425 /* Sync all the dirty bitmap with destination VM. */
3426 static int ram_dirty_bitmap_sync_all(MigrationState
*s
, RAMState
*rs
)
3429 QEMUFile
*file
= s
->to_dst_file
;
3430 int ramblock_count
= 0;
3432 trace_ram_dirty_bitmap_sync_start();
3434 RAMBLOCK_FOREACH(block
) {
3435 qemu_savevm_send_recv_bitmap(file
, block
->idstr
);
3436 trace_ram_dirty_bitmap_request(block
->idstr
);
3440 trace_ram_dirty_bitmap_sync_wait();
3442 /* Wait until all the ramblocks' dirty bitmap synced */
3443 while (ramblock_count
--) {
3444 qemu_sem_wait(&s
->rp_state
.rp_sem
);
3447 trace_ram_dirty_bitmap_sync_complete();
3452 static void ram_dirty_bitmap_reload_notify(MigrationState
*s
)
3454 qemu_sem_post(&s
->rp_state
.rp_sem
);
3458 * Read the received bitmap, revert it as the initial dirty bitmap.
3459 * This is only used when the postcopy migration is paused but wants
3460 * to resume from a middle point.
3462 int ram_dirty_bitmap_reload(MigrationState
*s
, RAMBlock
*block
)
3465 QEMUFile
*file
= s
->rp_state
.from_dst_file
;
3466 unsigned long *le_bitmap
, nbits
= block
->used_length
>> TARGET_PAGE_BITS
;
3467 uint64_t local_size
= nbits
/ 8;
3468 uint64_t size
, end_mark
;
3470 trace_ram_dirty_bitmap_reload_begin(block
->idstr
);
3472 if (s
->state
!= MIGRATION_STATUS_POSTCOPY_RECOVER
) {
3473 error_report("%s: incorrect state %s", __func__
,
3474 MigrationStatus_str(s
->state
));
3479 * Note: see comments in ramblock_recv_bitmap_send() on why we
3480 * need the endianess convertion, and the paddings.
3482 local_size
= ROUND_UP(local_size
, 8);
3485 le_bitmap
= bitmap_new(nbits
+ BITS_PER_LONG
);
3487 size
= qemu_get_be64(file
);
3489 /* The size of the bitmap should match with our ramblock */
3490 if (size
!= local_size
) {
3491 error_report("%s: ramblock '%s' bitmap size mismatch "
3492 "(0x%"PRIx64
" != 0x%"PRIx64
")", __func__
,
3493 block
->idstr
, size
, local_size
);
3498 size
= qemu_get_buffer(file
, (uint8_t *)le_bitmap
, local_size
);
3499 end_mark
= qemu_get_be64(file
);
3501 ret
= qemu_file_get_error(file
);
3502 if (ret
|| size
!= local_size
) {
3503 error_report("%s: read bitmap failed for ramblock '%s': %d"
3504 " (size 0x%"PRIx64
", got: 0x%"PRIx64
")",
3505 __func__
, block
->idstr
, ret
, local_size
, size
);
3510 if (end_mark
!= RAMBLOCK_RECV_BITMAP_ENDING
) {
3511 error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIu64
,
3512 __func__
, block
->idstr
, end_mark
);
3518 * Endianess convertion. We are during postcopy (though paused).
3519 * The dirty bitmap won't change. We can directly modify it.
3521 bitmap_from_le(block
->bmap
, le_bitmap
, nbits
);
3524 * What we received is "received bitmap". Revert it as the initial
3525 * dirty bitmap for this ramblock.
3527 bitmap_complement(block
->bmap
, block
->bmap
, nbits
);
3529 trace_ram_dirty_bitmap_reload_complete(block
->idstr
);
3532 * We succeeded to sync bitmap for current ramblock. If this is
3533 * the last one to sync, we need to notify the main send thread.
3535 ram_dirty_bitmap_reload_notify(s
);
3543 static int ram_resume_prepare(MigrationState
*s
, void *opaque
)
3545 RAMState
*rs
= *(RAMState
**)opaque
;
3548 ret
= ram_dirty_bitmap_sync_all(s
, rs
);
3553 ram_state_resume_prepare(rs
, s
->to_dst_file
);
3558 static SaveVMHandlers savevm_ram_handlers
= {
3559 .save_setup
= ram_save_setup
,
3560 .save_live_iterate
= ram_save_iterate
,
3561 .save_live_complete_postcopy
= ram_save_complete
,
3562 .save_live_complete_precopy
= ram_save_complete
,
3563 .has_postcopy
= ram_has_postcopy
,
3564 .save_live_pending
= ram_save_pending
,
3565 .load_state
= ram_load
,
3566 .save_cleanup
= ram_save_cleanup
,
3567 .load_setup
= ram_load_setup
,
3568 .load_cleanup
= ram_load_cleanup
,
3569 .resume_prepare
= ram_resume_prepare
,
3572 void ram_mig_init(void)
3574 qemu_mutex_init(&XBZRLE
.lock
);
3575 register_savevm_live(NULL
, "ram", 0, 4, &savevm_ram_handlers
, &ram_state
);