4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2011-2015 Red Hat Inc
8 * Juan Quintela <quintela@redhat.com>
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
28 #include "qemu/osdep.h"
29 #include "qemu-common.h"
32 #include "qapi-event.h"
33 #include "qemu/cutils.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "qemu/timer.h"
37 #include "qemu/main-loop.h"
39 #include "migration/migration.h"
40 #include "migration/qemu-file.h"
41 #include "migration/vmstate.h"
42 #include "postcopy-ram.h"
43 #include "exec/address-spaces.h"
44 #include "migration/page_cache.h"
45 #include "qemu/error-report.h"
47 #include "exec/ram_addr.h"
48 #include "qemu/rcu_queue.h"
49 #include "migration/colo.h"
51 /***********************************************************/
52 /* ram save/restore */
54 /* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
55 * worked for pages that where filled with the same char. We switched
56 * it to only search for the zero value. And to avoid confusion with
57 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
60 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
61 #define RAM_SAVE_FLAG_ZERO 0x02
62 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
63 #define RAM_SAVE_FLAG_PAGE 0x08
64 #define RAM_SAVE_FLAG_EOS 0x10
65 #define RAM_SAVE_FLAG_CONTINUE 0x20
66 #define RAM_SAVE_FLAG_XBZRLE 0x40
67 /* 0x80 is reserved in migration.h start with 0x100 next */
68 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
70 static uint8_t *ZERO_TARGET_PAGE
;
72 static inline bool is_zero_range(uint8_t *p
, uint64_t size
)
74 return buffer_is_zero(p
, size
);
77 /* struct contains XBZRLE cache and a static page
78 used by the compression */
80 /* buffer used for XBZRLE encoding */
82 /* buffer for storing page content */
84 /* Cache for XBZRLE, Protected by lock. */
89 /* buffer used for XBZRLE decoding */
90 static uint8_t *xbzrle_decoded_buf
;
92 static void XBZRLE_cache_lock(void)
94 if (migrate_use_xbzrle())
95 qemu_mutex_lock(&XBZRLE
.lock
);
98 static void XBZRLE_cache_unlock(void)
100 if (migrate_use_xbzrle())
101 qemu_mutex_unlock(&XBZRLE
.lock
);
105 * xbzrle_cache_resize: resize the xbzrle cache
107 * This function is called from qmp_migrate_set_cache_size in main
108 * thread, possibly while a migration is in progress. A running
109 * migration may be using the cache and might finish during this call,
110 * hence changes to the cache are protected by XBZRLE.lock().
112 * Returns the new_size or negative in case of error.
114 * @new_size: new cache size
116 int64_t xbzrle_cache_resize(int64_t new_size
)
118 PageCache
*new_cache
;
121 if (new_size
< TARGET_PAGE_SIZE
) {
127 if (XBZRLE
.cache
!= NULL
) {
128 if (pow2floor(new_size
) == migrate_xbzrle_cache_size()) {
131 new_cache
= cache_init(new_size
/ TARGET_PAGE_SIZE
,
134 error_report("Error creating cache");
139 cache_fini(XBZRLE
.cache
);
140 XBZRLE
.cache
= new_cache
;
144 ret
= pow2floor(new_size
);
146 XBZRLE_cache_unlock();
151 * An outstanding page request, on the source, having been received
154 struct RAMSrcPageRequest
{
159 QSIMPLEQ_ENTRY(RAMSrcPageRequest
) next_req
;
162 /* State of RAM for migration */
164 /* QEMUFile used for this migration */
166 /* Last block that we have visited searching for dirty pages */
167 RAMBlock
*last_seen_block
;
168 /* Last block from where we have sent data */
169 RAMBlock
*last_sent_block
;
170 /* Last dirty target page we have sent */
171 ram_addr_t last_page
;
172 /* last ram version we have seen */
173 uint32_t last_version
;
174 /* We are in the first round */
176 /* How many times we have dirty too many pages */
177 int dirty_rate_high_cnt
;
178 /* How many times we have synchronized the bitmap */
179 uint64_t bitmap_sync_count
;
180 /* these variables are used for bitmap sync */
181 /* last time we did a full bitmap_sync */
182 int64_t time_last_bitmap_sync
;
183 /* bytes transferred at start_time */
184 uint64_t bytes_xfer_prev
;
185 /* number of dirty pages since start_time */
186 uint64_t num_dirty_pages_period
;
187 /* xbzrle misses since the beginning of the period */
188 uint64_t xbzrle_cache_miss_prev
;
189 /* number of iterations at the beginning of period */
190 uint64_t iterations_prev
;
191 /* Accounting fields */
192 /* number of zero pages. It used to be pages filled by the same char. */
194 /* number of normal transferred pages */
196 /* Iterations since start */
198 /* xbzrle transmitted bytes. Notice that this is with
199 * compression, they can't be calculated from the pages */
200 uint64_t xbzrle_bytes
;
201 /* xbzrle transmmited pages */
202 uint64_t xbzrle_pages
;
203 /* xbzrle number of cache miss */
204 uint64_t xbzrle_cache_miss
;
205 /* xbzrle miss rate */
206 double xbzrle_cache_miss_rate
;
207 /* xbzrle number of overflows */
208 uint64_t xbzrle_overflows
;
209 /* number of dirty bits in the bitmap */
210 uint64_t migration_dirty_pages
;
211 /* total number of bytes transferred */
212 uint64_t bytes_transferred
;
213 /* number of dirtied pages in the last second */
214 uint64_t dirty_pages_rate
;
215 /* Count of requests incoming from destination */
216 uint64_t postcopy_requests
;
217 /* protects modification of the bitmap */
218 QemuMutex bitmap_mutex
;
219 /* The RAMBlock used in the last src_page_requests */
220 RAMBlock
*last_req_rb
;
221 /* Queue of outstanding page requests from the destination */
222 QemuMutex src_page_req_mutex
;
223 QSIMPLEQ_HEAD(src_page_requests
, RAMSrcPageRequest
) src_page_requests
;
225 typedef struct RAMState RAMState
;
227 static RAMState ram_state
;
229 uint64_t dup_mig_pages_transferred(void)
231 return ram_state
.zero_pages
;
234 uint64_t norm_mig_pages_transferred(void)
236 return ram_state
.norm_pages
;
239 uint64_t xbzrle_mig_bytes_transferred(void)
241 return ram_state
.xbzrle_bytes
;
244 uint64_t xbzrle_mig_pages_transferred(void)
246 return ram_state
.xbzrle_pages
;
249 uint64_t xbzrle_mig_pages_cache_miss(void)
251 return ram_state
.xbzrle_cache_miss
;
254 double xbzrle_mig_cache_miss_rate(void)
256 return ram_state
.xbzrle_cache_miss_rate
;
259 uint64_t xbzrle_mig_pages_overflow(void)
261 return ram_state
.xbzrle_overflows
;
264 uint64_t ram_bytes_transferred(void)
266 return ram_state
.bytes_transferred
;
269 uint64_t ram_bytes_remaining(void)
271 return ram_state
.migration_dirty_pages
* TARGET_PAGE_SIZE
;
274 uint64_t ram_dirty_sync_count(void)
276 return ram_state
.bitmap_sync_count
;
279 uint64_t ram_dirty_pages_rate(void)
281 return ram_state
.dirty_pages_rate
;
284 uint64_t ram_postcopy_requests(void)
286 return ram_state
.postcopy_requests
;
289 /* used by the search for pages to send */
290 struct PageSearchStatus
{
291 /* Current block being searched */
293 /* Current page to search from */
295 /* Set once we wrap around */
298 typedef struct PageSearchStatus PageSearchStatus
;
300 struct CompressParam
{
309 typedef struct CompressParam CompressParam
;
311 struct DecompressParam
{
320 typedef struct DecompressParam DecompressParam
;
322 static CompressParam
*comp_param
;
323 static QemuThread
*compress_threads
;
324 /* comp_done_cond is used to wake up the migration thread when
325 * one of the compression threads has finished the compression.
326 * comp_done_lock is used to co-work with comp_done_cond.
328 static QemuMutex comp_done_lock
;
329 static QemuCond comp_done_cond
;
330 /* The empty QEMUFileOps will be used by file in CompressParam */
331 static const QEMUFileOps empty_ops
= { };
333 static DecompressParam
*decomp_param
;
334 static QemuThread
*decompress_threads
;
335 static QemuMutex decomp_done_lock
;
336 static QemuCond decomp_done_cond
;
338 static int do_compress_ram_page(QEMUFile
*f
, RAMBlock
*block
,
341 static void *do_data_compress(void *opaque
)
343 CompressParam
*param
= opaque
;
347 qemu_mutex_lock(¶m
->mutex
);
348 while (!param
->quit
) {
350 block
= param
->block
;
351 offset
= param
->offset
;
353 qemu_mutex_unlock(¶m
->mutex
);
355 do_compress_ram_page(param
->file
, block
, offset
);
357 qemu_mutex_lock(&comp_done_lock
);
359 qemu_cond_signal(&comp_done_cond
);
360 qemu_mutex_unlock(&comp_done_lock
);
362 qemu_mutex_lock(¶m
->mutex
);
364 qemu_cond_wait(¶m
->cond
, ¶m
->mutex
);
367 qemu_mutex_unlock(¶m
->mutex
);
372 static inline void terminate_compression_threads(void)
374 int idx
, thread_count
;
376 thread_count
= migrate_compress_threads();
378 for (idx
= 0; idx
< thread_count
; idx
++) {
379 qemu_mutex_lock(&comp_param
[idx
].mutex
);
380 comp_param
[idx
].quit
= true;
381 qemu_cond_signal(&comp_param
[idx
].cond
);
382 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
386 void migrate_compress_threads_join(void)
390 if (!migrate_use_compression()) {
393 terminate_compression_threads();
394 thread_count
= migrate_compress_threads();
395 for (i
= 0; i
< thread_count
; i
++) {
396 qemu_thread_join(compress_threads
+ i
);
397 qemu_fclose(comp_param
[i
].file
);
398 qemu_mutex_destroy(&comp_param
[i
].mutex
);
399 qemu_cond_destroy(&comp_param
[i
].cond
);
401 qemu_mutex_destroy(&comp_done_lock
);
402 qemu_cond_destroy(&comp_done_cond
);
403 g_free(compress_threads
);
405 compress_threads
= NULL
;
409 void migrate_compress_threads_create(void)
413 if (!migrate_use_compression()) {
416 thread_count
= migrate_compress_threads();
417 compress_threads
= g_new0(QemuThread
, thread_count
);
418 comp_param
= g_new0(CompressParam
, thread_count
);
419 qemu_cond_init(&comp_done_cond
);
420 qemu_mutex_init(&comp_done_lock
);
421 for (i
= 0; i
< thread_count
; i
++) {
422 /* comp_param[i].file is just used as a dummy buffer to save data,
423 * set its ops to empty.
425 comp_param
[i
].file
= qemu_fopen_ops(NULL
, &empty_ops
);
426 comp_param
[i
].done
= true;
427 comp_param
[i
].quit
= false;
428 qemu_mutex_init(&comp_param
[i
].mutex
);
429 qemu_cond_init(&comp_param
[i
].cond
);
430 qemu_thread_create(compress_threads
+ i
, "compress",
431 do_data_compress
, comp_param
+ i
,
432 QEMU_THREAD_JOINABLE
);
437 * save_page_header: write page header to wire
439 * If this is the 1st block, it also writes the block identification
441 * Returns the number of bytes written
443 * @f: QEMUFile where to send the data
444 * @block: block that contains the page we want to send
445 * @offset: offset inside the block for the page
446 * in the lower bits, it contains flags
448 static size_t save_page_header(RAMState
*rs
, QEMUFile
*f
, RAMBlock
*block
,
453 if (block
== rs
->last_sent_block
) {
454 offset
|= RAM_SAVE_FLAG_CONTINUE
;
456 qemu_put_be64(f
, offset
);
459 if (!(offset
& RAM_SAVE_FLAG_CONTINUE
)) {
460 len
= strlen(block
->idstr
);
461 qemu_put_byte(f
, len
);
462 qemu_put_buffer(f
, (uint8_t *)block
->idstr
, len
);
464 rs
->last_sent_block
= block
;
470 * mig_throttle_guest_down: throotle down the guest
472 * Reduce amount of guest cpu execution to hopefully slow down memory
473 * writes. If guest dirty memory rate is reduced below the rate at
474 * which we can transfer pages to the destination then we should be
475 * able to complete migration. Some workloads dirty memory way too
476 * fast and will not effectively converge, even with auto-converge.
478 static void mig_throttle_guest_down(void)
480 MigrationState
*s
= migrate_get_current();
481 uint64_t pct_initial
= s
->parameters
.cpu_throttle_initial
;
482 uint64_t pct_icrement
= s
->parameters
.cpu_throttle_increment
;
484 /* We have not started throttling yet. Let's start it. */
485 if (!cpu_throttle_active()) {
486 cpu_throttle_set(pct_initial
);
488 /* Throttling already on, just increase the rate */
489 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement
);
494 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
496 * @rs: current RAM state
497 * @current_addr: address for the zero page
499 * Update the xbzrle cache to reflect a page that's been sent as all 0.
500 * The important thing is that a stale (not-yet-0'd) page be replaced
502 * As a bonus, if the page wasn't in the cache it gets added so that
503 * when a small write is made into the 0'd page it gets XBZRLE sent.
505 static void xbzrle_cache_zero_page(RAMState
*rs
, ram_addr_t current_addr
)
507 if (rs
->ram_bulk_stage
|| !migrate_use_xbzrle()) {
511 /* We don't care if this fails to allocate a new cache page
512 * as long as it updated an old one */
513 cache_insert(XBZRLE
.cache
, current_addr
, ZERO_TARGET_PAGE
,
514 rs
->bitmap_sync_count
);
517 #define ENCODING_FLAG_XBZRLE 0x1
520 * save_xbzrle_page: compress and send current page
522 * Returns: 1 means that we wrote the page
523 * 0 means that page is identical to the one already sent
524 * -1 means that xbzrle would be longer than normal
526 * @rs: current RAM state
527 * @current_data: pointer to the address of the page contents
528 * @current_addr: addr of the page
529 * @block: block that contains the page we want to send
530 * @offset: offset inside the block for the page
531 * @last_stage: if we are at the completion stage
533 static int save_xbzrle_page(RAMState
*rs
, uint8_t **current_data
,
534 ram_addr_t current_addr
, RAMBlock
*block
,
535 ram_addr_t offset
, bool last_stage
)
537 int encoded_len
= 0, bytes_xbzrle
;
538 uint8_t *prev_cached_page
;
540 if (!cache_is_cached(XBZRLE
.cache
, current_addr
, rs
->bitmap_sync_count
)) {
541 rs
->xbzrle_cache_miss
++;
543 if (cache_insert(XBZRLE
.cache
, current_addr
, *current_data
,
544 rs
->bitmap_sync_count
) == -1) {
547 /* update *current_data when the page has been
548 inserted into cache */
549 *current_data
= get_cached_data(XBZRLE
.cache
, current_addr
);
555 prev_cached_page
= get_cached_data(XBZRLE
.cache
, current_addr
);
557 /* save current buffer into memory */
558 memcpy(XBZRLE
.current_buf
, *current_data
, TARGET_PAGE_SIZE
);
560 /* XBZRLE encoding (if there is no overflow) */
561 encoded_len
= xbzrle_encode_buffer(prev_cached_page
, XBZRLE
.current_buf
,
562 TARGET_PAGE_SIZE
, XBZRLE
.encoded_buf
,
564 if (encoded_len
== 0) {
565 trace_save_xbzrle_page_skipping();
567 } else if (encoded_len
== -1) {
568 trace_save_xbzrle_page_overflow();
569 rs
->xbzrle_overflows
++;
570 /* update data in the cache */
572 memcpy(prev_cached_page
, *current_data
, TARGET_PAGE_SIZE
);
573 *current_data
= prev_cached_page
;
578 /* we need to update the data in the cache, in order to get the same data */
580 memcpy(prev_cached_page
, XBZRLE
.current_buf
, TARGET_PAGE_SIZE
);
583 /* Send XBZRLE based compressed page */
584 bytes_xbzrle
= save_page_header(rs
, rs
->f
, block
,
585 offset
| RAM_SAVE_FLAG_XBZRLE
);
586 qemu_put_byte(rs
->f
, ENCODING_FLAG_XBZRLE
);
587 qemu_put_be16(rs
->f
, encoded_len
);
588 qemu_put_buffer(rs
->f
, XBZRLE
.encoded_buf
, encoded_len
);
589 bytes_xbzrle
+= encoded_len
+ 1 + 2;
591 rs
->xbzrle_bytes
+= bytes_xbzrle
;
592 rs
->bytes_transferred
+= bytes_xbzrle
;
598 * migration_bitmap_find_dirty: find the next dirty page from start
600 * Called with rcu_read_lock() to protect migration_bitmap
602 * Returns the byte offset within memory region of the start of a dirty page
604 * @rs: current RAM state
605 * @rb: RAMBlock where to search for dirty pages
606 * @start: page where we start the search
609 unsigned long migration_bitmap_find_dirty(RAMState
*rs
, RAMBlock
*rb
,
612 unsigned long size
= rb
->used_length
>> TARGET_PAGE_BITS
;
613 unsigned long *bitmap
= rb
->bmap
;
616 if (rs
->ram_bulk_stage
&& start
> 0) {
619 next
= find_next_bit(bitmap
, size
, start
);
625 static inline bool migration_bitmap_clear_dirty(RAMState
*rs
,
631 ret
= test_and_clear_bit(page
, rb
->bmap
);
634 rs
->migration_dirty_pages
--;
639 static void migration_bitmap_sync_range(RAMState
*rs
, RAMBlock
*rb
,
640 ram_addr_t start
, ram_addr_t length
)
642 rs
->migration_dirty_pages
+=
643 cpu_physical_memory_sync_dirty_bitmap(rb
, start
, length
,
644 &rs
->num_dirty_pages_period
);
648 * ram_pagesize_summary: calculate all the pagesizes of a VM
650 * Returns a summary bitmap of the page sizes of all RAMBlocks
652 * For VMs with just normal pages this is equivalent to the host page
653 * size. If it's got some huge pages then it's the OR of all the
654 * different page sizes.
656 uint64_t ram_pagesize_summary(void)
659 uint64_t summary
= 0;
661 RAMBLOCK_FOREACH(block
) {
662 summary
|= block
->page_size
;
668 static void migration_bitmap_sync(RAMState
*rs
)
672 uint64_t bytes_xfer_now
;
674 rs
->bitmap_sync_count
++;
676 if (!rs
->time_last_bitmap_sync
) {
677 rs
->time_last_bitmap_sync
= qemu_clock_get_ms(QEMU_CLOCK_REALTIME
);
680 trace_migration_bitmap_sync_start();
681 memory_global_dirty_log_sync();
683 qemu_mutex_lock(&rs
->bitmap_mutex
);
685 RAMBLOCK_FOREACH(block
) {
686 migration_bitmap_sync_range(rs
, block
, 0, block
->used_length
);
689 qemu_mutex_unlock(&rs
->bitmap_mutex
);
691 trace_migration_bitmap_sync_end(rs
->num_dirty_pages_period
);
693 end_time
= qemu_clock_get_ms(QEMU_CLOCK_REALTIME
);
695 /* more than 1 second = 1000 millisecons */
696 if (end_time
> rs
->time_last_bitmap_sync
+ 1000) {
697 /* calculate period counters */
698 rs
->dirty_pages_rate
= rs
->num_dirty_pages_period
* 1000
699 / (end_time
- rs
->time_last_bitmap_sync
);
700 bytes_xfer_now
= ram_bytes_transferred();
702 if (migrate_auto_converge()) {
703 /* The following detection logic can be refined later. For now:
704 Check to see if the dirtied bytes is 50% more than the approx.
705 amount of bytes that just got transferred since the last time we
706 were in this routine. If that happens twice, start or increase
709 if ((rs
->num_dirty_pages_period
* TARGET_PAGE_SIZE
>
710 (bytes_xfer_now
- rs
->bytes_xfer_prev
) / 2) &&
711 (++rs
->dirty_rate_high_cnt
>= 2)) {
712 trace_migration_throttle();
713 rs
->dirty_rate_high_cnt
= 0;
714 mig_throttle_guest_down();
718 if (migrate_use_xbzrle()) {
719 if (rs
->iterations_prev
!= rs
->iterations
) {
720 rs
->xbzrle_cache_miss_rate
=
721 (double)(rs
->xbzrle_cache_miss
-
722 rs
->xbzrle_cache_miss_prev
) /
723 (rs
->iterations
- rs
->iterations_prev
);
725 rs
->iterations_prev
= rs
->iterations
;
726 rs
->xbzrle_cache_miss_prev
= rs
->xbzrle_cache_miss
;
729 /* reset period counters */
730 rs
->time_last_bitmap_sync
= end_time
;
731 rs
->num_dirty_pages_period
= 0;
732 rs
->bytes_xfer_prev
= bytes_xfer_now
;
734 if (migrate_use_events()) {
735 qapi_event_send_migration_pass(rs
->bitmap_sync_count
, NULL
);
740 * save_zero_page: send the zero page to the stream
742 * Returns the number of pages written.
744 * @rs: current RAM state
745 * @block: block that contains the page we want to send
746 * @offset: offset inside the block for the page
747 * @p: pointer to the page
749 static int save_zero_page(RAMState
*rs
, RAMBlock
*block
, ram_addr_t offset
,
754 if (is_zero_range(p
, TARGET_PAGE_SIZE
)) {
756 rs
->bytes_transferred
+=
757 save_page_header(rs
, rs
->f
, block
, offset
| RAM_SAVE_FLAG_ZERO
);
758 qemu_put_byte(rs
->f
, 0);
759 rs
->bytes_transferred
+= 1;
766 static void ram_release_pages(const char *rbname
, uint64_t offset
, int pages
)
768 if (!migrate_release_ram() || !migration_in_postcopy()) {
772 ram_discard_range(rbname
, offset
, pages
<< TARGET_PAGE_BITS
);
776 * ram_save_page: send the given page to the stream
778 * Returns the number of pages written.
780 * >=0 - Number of pages written - this might legally be 0
781 * if xbzrle noticed the page was the same.
783 * @rs: current RAM state
784 * @block: block that contains the page we want to send
785 * @offset: offset inside the block for the page
786 * @last_stage: if we are at the completion stage
788 static int ram_save_page(RAMState
*rs
, PageSearchStatus
*pss
, bool last_stage
)
792 ram_addr_t current_addr
;
795 bool send_async
= true;
796 RAMBlock
*block
= pss
->block
;
797 ram_addr_t offset
= pss
->page
<< TARGET_PAGE_BITS
;
799 p
= block
->host
+ offset
;
800 trace_ram_save_page(block
->idstr
, (uint64_t)offset
, p
);
802 /* In doubt sent page as normal */
804 ret
= ram_control_save_page(rs
->f
, block
->offset
,
805 offset
, TARGET_PAGE_SIZE
, &bytes_xmit
);
807 rs
->bytes_transferred
+= bytes_xmit
;
813 current_addr
= block
->offset
+ offset
;
815 if (ret
!= RAM_SAVE_CONTROL_NOT_SUPP
) {
816 if (ret
!= RAM_SAVE_CONTROL_DELAYED
) {
817 if (bytes_xmit
> 0) {
819 } else if (bytes_xmit
== 0) {
824 pages
= save_zero_page(rs
, block
, offset
, p
);
826 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
827 * page would be stale
829 xbzrle_cache_zero_page(rs
, current_addr
);
830 ram_release_pages(block
->idstr
, offset
, pages
);
831 } else if (!rs
->ram_bulk_stage
&&
832 !migration_in_postcopy() && migrate_use_xbzrle()) {
833 pages
= save_xbzrle_page(rs
, &p
, current_addr
, block
,
836 /* Can't send this cached data async, since the cache page
837 * might get updated before it gets to the wire
844 /* XBZRLE overflow or normal page */
846 rs
->bytes_transferred
+= save_page_header(rs
, rs
->f
, block
,
847 offset
| RAM_SAVE_FLAG_PAGE
);
849 qemu_put_buffer_async(rs
->f
, p
, TARGET_PAGE_SIZE
,
850 migrate_release_ram() &
851 migration_in_postcopy());
853 qemu_put_buffer(rs
->f
, p
, TARGET_PAGE_SIZE
);
855 rs
->bytes_transferred
+= TARGET_PAGE_SIZE
;
860 XBZRLE_cache_unlock();
865 static int do_compress_ram_page(QEMUFile
*f
, RAMBlock
*block
,
868 RAMState
*rs
= &ram_state
;
869 int bytes_sent
, blen
;
870 uint8_t *p
= block
->host
+ (offset
& TARGET_PAGE_MASK
);
872 bytes_sent
= save_page_header(rs
, f
, block
, offset
|
873 RAM_SAVE_FLAG_COMPRESS_PAGE
);
874 blen
= qemu_put_compression_data(f
, p
, TARGET_PAGE_SIZE
,
875 migrate_compress_level());
878 qemu_file_set_error(migrate_get_current()->to_dst_file
, blen
);
879 error_report("compressed data failed!");
882 ram_release_pages(block
->idstr
, offset
& TARGET_PAGE_MASK
, 1);
888 static void flush_compressed_data(RAMState
*rs
)
890 int idx
, len
, thread_count
;
892 if (!migrate_use_compression()) {
895 thread_count
= migrate_compress_threads();
897 qemu_mutex_lock(&comp_done_lock
);
898 for (idx
= 0; idx
< thread_count
; idx
++) {
899 while (!comp_param
[idx
].done
) {
900 qemu_cond_wait(&comp_done_cond
, &comp_done_lock
);
903 qemu_mutex_unlock(&comp_done_lock
);
905 for (idx
= 0; idx
< thread_count
; idx
++) {
906 qemu_mutex_lock(&comp_param
[idx
].mutex
);
907 if (!comp_param
[idx
].quit
) {
908 len
= qemu_put_qemu_file(rs
->f
, comp_param
[idx
].file
);
909 rs
->bytes_transferred
+= len
;
911 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
915 static inline void set_compress_params(CompressParam
*param
, RAMBlock
*block
,
918 param
->block
= block
;
919 param
->offset
= offset
;
922 static int compress_page_with_multi_thread(RAMState
*rs
, RAMBlock
*block
,
925 int idx
, thread_count
, bytes_xmit
= -1, pages
= -1;
927 thread_count
= migrate_compress_threads();
928 qemu_mutex_lock(&comp_done_lock
);
930 for (idx
= 0; idx
< thread_count
; idx
++) {
931 if (comp_param
[idx
].done
) {
932 comp_param
[idx
].done
= false;
933 bytes_xmit
= qemu_put_qemu_file(rs
->f
, comp_param
[idx
].file
);
934 qemu_mutex_lock(&comp_param
[idx
].mutex
);
935 set_compress_params(&comp_param
[idx
], block
, offset
);
936 qemu_cond_signal(&comp_param
[idx
].cond
);
937 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
940 rs
->bytes_transferred
+= bytes_xmit
;
947 qemu_cond_wait(&comp_done_cond
, &comp_done_lock
);
950 qemu_mutex_unlock(&comp_done_lock
);
956 * ram_save_compressed_page: compress the given page and send it to the stream
958 * Returns the number of pages written.
960 * @rs: current RAM state
961 * @block: block that contains the page we want to send
962 * @offset: offset inside the block for the page
963 * @last_stage: if we are at the completion stage
965 static int ram_save_compressed_page(RAMState
*rs
, PageSearchStatus
*pss
,
969 uint64_t bytes_xmit
= 0;
972 RAMBlock
*block
= pss
->block
;
973 ram_addr_t offset
= pss
->page
<< TARGET_PAGE_BITS
;
975 p
= block
->host
+ offset
;
977 ret
= ram_control_save_page(rs
->f
, block
->offset
,
978 offset
, TARGET_PAGE_SIZE
, &bytes_xmit
);
980 rs
->bytes_transferred
+= bytes_xmit
;
983 if (ret
!= RAM_SAVE_CONTROL_NOT_SUPP
) {
984 if (ret
!= RAM_SAVE_CONTROL_DELAYED
) {
985 if (bytes_xmit
> 0) {
987 } else if (bytes_xmit
== 0) {
992 /* When starting the process of a new block, the first page of
993 * the block should be sent out before other pages in the same
994 * block, and all the pages in last block should have been sent
995 * out, keeping this order is important, because the 'cont' flag
996 * is used to avoid resending the block name.
998 if (block
!= rs
->last_sent_block
) {
999 flush_compressed_data(rs
);
1000 pages
= save_zero_page(rs
, block
, offset
, p
);
1002 /* Make sure the first page is sent out before other pages */
1003 bytes_xmit
= save_page_header(rs
, rs
->f
, block
, offset
|
1004 RAM_SAVE_FLAG_COMPRESS_PAGE
);
1005 blen
= qemu_put_compression_data(rs
->f
, p
, TARGET_PAGE_SIZE
,
1006 migrate_compress_level());
1008 rs
->bytes_transferred
+= bytes_xmit
+ blen
;
1012 qemu_file_set_error(rs
->f
, blen
);
1013 error_report("compressed data failed!");
1017 ram_release_pages(block
->idstr
, offset
, pages
);
1020 pages
= save_zero_page(rs
, block
, offset
, p
);
1022 pages
= compress_page_with_multi_thread(rs
, block
, offset
);
1024 ram_release_pages(block
->idstr
, offset
, pages
);
1033 * find_dirty_block: find the next dirty page and update any state
1034 * associated with the search process.
1036 * Returns if a page is found
1038 * @rs: current RAM state
1039 * @pss: data about the state of the current dirty page scan
1040 * @again: set to false if the search has scanned the whole of RAM
1042 static bool find_dirty_block(RAMState
*rs
, PageSearchStatus
*pss
, bool *again
)
1044 pss
->page
= migration_bitmap_find_dirty(rs
, pss
->block
, pss
->page
);
1045 if (pss
->complete_round
&& pss
->block
== rs
->last_seen_block
&&
1046 pss
->page
>= rs
->last_page
) {
1048 * We've been once around the RAM and haven't found anything.
1054 if ((pss
->page
<< TARGET_PAGE_BITS
) >= pss
->block
->used_length
) {
1055 /* Didn't find anything in this RAM Block */
1057 pss
->block
= QLIST_NEXT_RCU(pss
->block
, next
);
1059 /* Hit the end of the list */
1060 pss
->block
= QLIST_FIRST_RCU(&ram_list
.blocks
);
1061 /* Flag that we've looped */
1062 pss
->complete_round
= true;
1063 rs
->ram_bulk_stage
= false;
1064 if (migrate_use_xbzrle()) {
1065 /* If xbzrle is on, stop using the data compression at this
1066 * point. In theory, xbzrle can do better than compression.
1068 flush_compressed_data(rs
);
1071 /* Didn't find anything this time, but try again on the new block */
1075 /* Can go around again, but... */
1077 /* We've found something so probably don't need to */
1083 * unqueue_page: gets a page of the queue
1085 * Helper for 'get_queued_page' - gets a page off the queue
1087 * Returns the block of the page (or NULL if none available)
1089 * @rs: current RAM state
1090 * @offset: used to return the offset within the RAMBlock
1092 static RAMBlock
*unqueue_page(RAMState
*rs
, ram_addr_t
*offset
)
1094 RAMBlock
*block
= NULL
;
1096 qemu_mutex_lock(&rs
->src_page_req_mutex
);
1097 if (!QSIMPLEQ_EMPTY(&rs
->src_page_requests
)) {
1098 struct RAMSrcPageRequest
*entry
=
1099 QSIMPLEQ_FIRST(&rs
->src_page_requests
);
1101 *offset
= entry
->offset
;
1103 if (entry
->len
> TARGET_PAGE_SIZE
) {
1104 entry
->len
-= TARGET_PAGE_SIZE
;
1105 entry
->offset
+= TARGET_PAGE_SIZE
;
1107 memory_region_unref(block
->mr
);
1108 QSIMPLEQ_REMOVE_HEAD(&rs
->src_page_requests
, next_req
);
1112 qemu_mutex_unlock(&rs
->src_page_req_mutex
);
1118 * get_queued_page: unqueue a page from the postocpy requests
1120 * Skips pages that are already sent (!dirty)
1122 * Returns if a queued page is found
1124 * @rs: current RAM state
1125 * @pss: data about the state of the current dirty page scan
1127 static bool get_queued_page(RAMState
*rs
, PageSearchStatus
*pss
)
1134 block
= unqueue_page(rs
, &offset
);
1136 * We're sending this page, and since it's postcopy nothing else
1137 * will dirty it, and we must make sure it doesn't get sent again
1138 * even if this queue request was received after the background
1139 * search already sent it.
1144 page
= offset
>> TARGET_PAGE_BITS
;
1145 dirty
= test_bit(page
, block
->bmap
);
1147 trace_get_queued_page_not_dirty(block
->idstr
, (uint64_t)offset
,
1148 page
, test_bit(page
, block
->unsentmap
));
1150 trace_get_queued_page(block
->idstr
, (uint64_t)offset
, page
);
1154 } while (block
&& !dirty
);
1158 * As soon as we start servicing pages out of order, then we have
1159 * to kill the bulk stage, since the bulk stage assumes
1160 * in (migration_bitmap_find_and_reset_dirty) that every page is
1161 * dirty, that's no longer true.
1163 rs
->ram_bulk_stage
= false;
1166 * We want the background search to continue from the queued page
1167 * since the guest is likely to want other pages near to the page
1168 * it just requested.
1171 pss
->page
= offset
>> TARGET_PAGE_BITS
;
1178 * migration_page_queue_free: drop any remaining pages in the ram
1181 * It should be empty at the end anyway, but in error cases there may
1182 * be some left. in case that there is any page left, we drop it.
1185 void migration_page_queue_free(void)
1187 struct RAMSrcPageRequest
*mspr
, *next_mspr
;
1188 RAMState
*rs
= &ram_state
;
1189 /* This queue generally should be empty - but in the case of a failed
1190 * migration might have some droppings in.
1193 QSIMPLEQ_FOREACH_SAFE(mspr
, &rs
->src_page_requests
, next_req
, next_mspr
) {
1194 memory_region_unref(mspr
->rb
->mr
);
1195 QSIMPLEQ_REMOVE_HEAD(&rs
->src_page_requests
, next_req
);
1202 * ram_save_queue_pages: queue the page for transmission
1204 * A request from postcopy destination for example.
1206 * Returns zero on success or negative on error
1208 * @rbname: Name of the RAMBLock of the request. NULL means the
1209 * same that last one.
1210 * @start: starting address from the start of the RAMBlock
1211 * @len: length (in bytes) to send
1213 int ram_save_queue_pages(const char *rbname
, ram_addr_t start
, ram_addr_t len
)
1216 RAMState
*rs
= &ram_state
;
1218 rs
->postcopy_requests
++;
1221 /* Reuse last RAMBlock */
1222 ramblock
= rs
->last_req_rb
;
1226 * Shouldn't happen, we can't reuse the last RAMBlock if
1227 * it's the 1st request.
1229 error_report("ram_save_queue_pages no previous block");
1233 ramblock
= qemu_ram_block_by_name(rbname
);
1236 /* We shouldn't be asked for a non-existent RAMBlock */
1237 error_report("ram_save_queue_pages no block '%s'", rbname
);
1240 rs
->last_req_rb
= ramblock
;
1242 trace_ram_save_queue_pages(ramblock
->idstr
, start
, len
);
1243 if (start
+len
> ramblock
->used_length
) {
1244 error_report("%s request overrun start=" RAM_ADDR_FMT
" len="
1245 RAM_ADDR_FMT
" blocklen=" RAM_ADDR_FMT
,
1246 __func__
, start
, len
, ramblock
->used_length
);
1250 struct RAMSrcPageRequest
*new_entry
=
1251 g_malloc0(sizeof(struct RAMSrcPageRequest
));
1252 new_entry
->rb
= ramblock
;
1253 new_entry
->offset
= start
;
1254 new_entry
->len
= len
;
1256 memory_region_ref(ramblock
->mr
);
1257 qemu_mutex_lock(&rs
->src_page_req_mutex
);
1258 QSIMPLEQ_INSERT_TAIL(&rs
->src_page_requests
, new_entry
, next_req
);
1259 qemu_mutex_unlock(&rs
->src_page_req_mutex
);
1270 * ram_save_target_page: save one target page
1272 * Returns the number of pages written
1274 * @rs: current RAM state
1275 * @ms: current migration state
1276 * @pss: data about the page we want to send
1277 * @last_stage: if we are at the completion stage
1279 static int ram_save_target_page(RAMState
*rs
, PageSearchStatus
*pss
,
1284 /* Check the pages is dirty and if it is send it */
1285 if (migration_bitmap_clear_dirty(rs
, pss
->block
, pss
->page
)) {
1287 * If xbzrle is on, stop using the data compression after first
1288 * round of migration even if compression is enabled. In theory,
1289 * xbzrle can do better than compression.
1291 if (migrate_use_compression() &&
1292 (rs
->ram_bulk_stage
|| !migrate_use_xbzrle())) {
1293 res
= ram_save_compressed_page(rs
, pss
, last_stage
);
1295 res
= ram_save_page(rs
, pss
, last_stage
);
1301 if (pss
->block
->unsentmap
) {
1302 clear_bit(pss
->page
, pss
->block
->unsentmap
);
1310 * ram_save_host_page: save a whole host page
1312 * Starting at *offset send pages up to the end of the current host
1313 * page. It's valid for the initial offset to point into the middle of
1314 * a host page in which case the remainder of the hostpage is sent.
1315 * Only dirty target pages are sent. Note that the host page size may
1316 * be a huge page for this block.
1317 * The saving stops at the boundary of the used_length of the block
1318 * if the RAMBlock isn't a multiple of the host page size.
1320 * Returns the number of pages written or negative on error
1322 * @rs: current RAM state
1323 * @ms: current migration state
1324 * @pss: data about the page we want to send
1325 * @last_stage: if we are at the completion stage
1327 static int ram_save_host_page(RAMState
*rs
, PageSearchStatus
*pss
,
1330 int tmppages
, pages
= 0;
1331 size_t pagesize_bits
=
1332 qemu_ram_pagesize(pss
->block
) >> TARGET_PAGE_BITS
;
1335 tmppages
= ram_save_target_page(rs
, pss
, last_stage
);
1342 } while ((pss
->page
& (pagesize_bits
- 1)) &&
1343 offset_in_ramblock(pss
->block
, pss
->page
<< TARGET_PAGE_BITS
));
1345 /* The offset we leave with is the last one we looked at */
1351 * ram_find_and_save_block: finds a dirty page and sends it to f
1353 * Called within an RCU critical section.
1355 * Returns the number of pages written where zero means no dirty pages
1357 * @rs: current RAM state
1358 * @last_stage: if we are at the completion stage
1360 * On systems where host-page-size > target-page-size it will send all the
1361 * pages in a host page that are dirty.
1364 static int ram_find_and_save_block(RAMState
*rs
, bool last_stage
)
1366 PageSearchStatus pss
;
1370 /* No dirty page as there is zero RAM */
1371 if (!ram_bytes_total()) {
1375 pss
.block
= rs
->last_seen_block
;
1376 pss
.page
= rs
->last_page
;
1377 pss
.complete_round
= false;
1380 pss
.block
= QLIST_FIRST_RCU(&ram_list
.blocks
);
1385 found
= get_queued_page(rs
, &pss
);
1388 /* priority queue empty, so just search for something dirty */
1389 found
= find_dirty_block(rs
, &pss
, &again
);
1393 pages
= ram_save_host_page(rs
, &pss
, last_stage
);
1395 } while (!pages
&& again
);
1397 rs
->last_seen_block
= pss
.block
;
1398 rs
->last_page
= pss
.page
;
1403 void acct_update_position(QEMUFile
*f
, size_t size
, bool zero
)
1405 uint64_t pages
= size
/ TARGET_PAGE_SIZE
;
1406 RAMState
*rs
= &ram_state
;
1409 rs
->zero_pages
+= pages
;
1411 rs
->norm_pages
+= pages
;
1412 rs
->bytes_transferred
+= size
;
1413 qemu_update_position(f
, size
);
1417 uint64_t ram_bytes_total(void)
1423 RAMBLOCK_FOREACH(block
) {
1424 total
+= block
->used_length
;
1430 void free_xbzrle_decoded_buf(void)
1432 g_free(xbzrle_decoded_buf
);
1433 xbzrle_decoded_buf
= NULL
;
1436 static void ram_migration_cleanup(void *opaque
)
1440 /* caller have hold iothread lock or is in a bh, so there is
1441 * no writing race against this migration_bitmap
1443 memory_global_dirty_log_stop();
1445 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1446 g_free(block
->bmap
);
1448 g_free(block
->unsentmap
);
1449 block
->unsentmap
= NULL
;
1452 XBZRLE_cache_lock();
1454 cache_fini(XBZRLE
.cache
);
1455 g_free(XBZRLE
.encoded_buf
);
1456 g_free(XBZRLE
.current_buf
);
1457 g_free(ZERO_TARGET_PAGE
);
1458 XBZRLE
.cache
= NULL
;
1459 XBZRLE
.encoded_buf
= NULL
;
1460 XBZRLE
.current_buf
= NULL
;
1462 XBZRLE_cache_unlock();
1465 static void ram_state_reset(RAMState
*rs
)
1467 rs
->last_seen_block
= NULL
;
1468 rs
->last_sent_block
= NULL
;
1470 rs
->last_version
= ram_list
.version
;
1471 rs
->ram_bulk_stage
= true;
1474 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1477 * 'expected' is the value you expect the bitmap mostly to be full
1478 * of; it won't bother printing lines that are all this value.
1479 * If 'todump' is null the migration bitmap is dumped.
1481 void ram_debug_dump_bitmap(unsigned long *todump
, bool expected
,
1482 unsigned long pages
)
1485 int64_t linelen
= 128;
1488 for (cur
= 0; cur
< pages
; cur
+= linelen
) {
1492 * Last line; catch the case where the line length
1493 * is longer than remaining ram
1495 if (cur
+ linelen
> pages
) {
1496 linelen
= pages
- cur
;
1498 for (curb
= 0; curb
< linelen
; curb
++) {
1499 bool thisbit
= test_bit(cur
+ curb
, todump
);
1500 linebuf
[curb
] = thisbit
? '1' : '.';
1501 found
= found
|| (thisbit
!= expected
);
1504 linebuf
[curb
] = '\0';
1505 fprintf(stderr
, "0x%08" PRIx64
" : %s\n", cur
, linebuf
);
1510 /* **** functions for postcopy ***** */
1512 void ram_postcopy_migrated_memory_release(MigrationState
*ms
)
1514 struct RAMBlock
*block
;
1516 RAMBLOCK_FOREACH(block
) {
1517 unsigned long *bitmap
= block
->bmap
;
1518 unsigned long range
= block
->used_length
>> TARGET_PAGE_BITS
;
1519 unsigned long run_start
= find_next_zero_bit(bitmap
, range
, 0);
1521 while (run_start
< range
) {
1522 unsigned long run_end
= find_next_bit(bitmap
, range
, run_start
+ 1);
1523 ram_discard_range(block
->idstr
, run_start
<< TARGET_PAGE_BITS
,
1524 (run_end
- run_start
) << TARGET_PAGE_BITS
);
1525 run_start
= find_next_zero_bit(bitmap
, range
, run_end
+ 1);
1531 * postcopy_send_discard_bm_ram: discard a RAMBlock
1533 * Returns zero on success
1535 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1536 * Note: At this point the 'unsentmap' is the processed bitmap combined
1537 * with the dirtymap; so a '1' means it's either dirty or unsent.
1539 * @ms: current migration state
1540 * @pds: state for postcopy
1541 * @start: RAMBlock starting page
1542 * @length: RAMBlock size
1544 static int postcopy_send_discard_bm_ram(MigrationState
*ms
,
1545 PostcopyDiscardState
*pds
,
1548 unsigned long end
= block
->used_length
>> TARGET_PAGE_BITS
;
1549 unsigned long current
;
1550 unsigned long *unsentmap
= block
->unsentmap
;
1552 for (current
= 0; current
< end
; ) {
1553 unsigned long one
= find_next_bit(unsentmap
, end
, current
);
1556 unsigned long zero
= find_next_zero_bit(unsentmap
, end
, one
+ 1);
1557 unsigned long discard_length
;
1560 discard_length
= end
- one
;
1562 discard_length
= zero
- one
;
1564 if (discard_length
) {
1565 postcopy_discard_send_range(ms
, pds
, one
, discard_length
);
1567 current
= one
+ discard_length
;
1577 * postcopy_each_ram_send_discard: discard all RAMBlocks
1579 * Returns 0 for success or negative for error
1581 * Utility for the outgoing postcopy code.
1582 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1583 * passing it bitmap indexes and name.
1584 * (qemu_ram_foreach_block ends up passing unscaled lengths
1585 * which would mean postcopy code would have to deal with target page)
1587 * @ms: current migration state
1589 static int postcopy_each_ram_send_discard(MigrationState
*ms
)
1591 struct RAMBlock
*block
;
1594 RAMBLOCK_FOREACH(block
) {
1595 PostcopyDiscardState
*pds
=
1596 postcopy_discard_send_init(ms
, block
->idstr
);
1599 * Postcopy sends chunks of bitmap over the wire, but it
1600 * just needs indexes at this point, avoids it having
1601 * target page specific code.
1603 ret
= postcopy_send_discard_bm_ram(ms
, pds
, block
);
1604 postcopy_discard_send_finish(ms
, pds
);
1614 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
1616 * Helper for postcopy_chunk_hostpages; it's called twice to
1617 * canonicalize the two bitmaps, that are similar, but one is
1620 * Postcopy requires that all target pages in a hostpage are dirty or
1621 * clean, not a mix. This function canonicalizes the bitmaps.
1623 * @ms: current migration state
1624 * @unsent_pass: if true we need to canonicalize partially unsent host pages
1625 * otherwise we need to canonicalize partially dirty host pages
1626 * @block: block that contains the page we want to canonicalize
1627 * @pds: state for postcopy
1629 static void postcopy_chunk_hostpages_pass(MigrationState
*ms
, bool unsent_pass
,
1631 PostcopyDiscardState
*pds
)
1633 RAMState
*rs
= &ram_state
;
1634 unsigned long *bitmap
= block
->bmap
;
1635 unsigned long *unsentmap
= block
->unsentmap
;
1636 unsigned int host_ratio
= block
->page_size
/ TARGET_PAGE_SIZE
;
1637 unsigned long pages
= block
->used_length
>> TARGET_PAGE_BITS
;
1638 unsigned long run_start
;
1640 if (block
->page_size
== TARGET_PAGE_SIZE
) {
1641 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1646 /* Find a sent page */
1647 run_start
= find_next_zero_bit(unsentmap
, pages
, 0);
1649 /* Find a dirty page */
1650 run_start
= find_next_bit(bitmap
, pages
, 0);
1653 while (run_start
< pages
) {
1654 bool do_fixup
= false;
1655 unsigned long fixup_start_addr
;
1656 unsigned long host_offset
;
1659 * If the start of this run of pages is in the middle of a host
1660 * page, then we need to fixup this host page.
1662 host_offset
= run_start
% host_ratio
;
1665 run_start
-= host_offset
;
1666 fixup_start_addr
= run_start
;
1667 /* For the next pass */
1668 run_start
= run_start
+ host_ratio
;
1670 /* Find the end of this run */
1671 unsigned long run_end
;
1673 run_end
= find_next_bit(unsentmap
, pages
, run_start
+ 1);
1675 run_end
= find_next_zero_bit(bitmap
, pages
, run_start
+ 1);
1678 * If the end isn't at the start of a host page, then the
1679 * run doesn't finish at the end of a host page
1680 * and we need to discard.
1682 host_offset
= run_end
% host_ratio
;
1685 fixup_start_addr
= run_end
- host_offset
;
1687 * This host page has gone, the next loop iteration starts
1688 * from after the fixup
1690 run_start
= fixup_start_addr
+ host_ratio
;
1693 * No discards on this iteration, next loop starts from
1694 * next sent/dirty page
1696 run_start
= run_end
+ 1;
1703 /* Tell the destination to discard this page */
1704 if (unsent_pass
|| !test_bit(fixup_start_addr
, unsentmap
)) {
1705 /* For the unsent_pass we:
1706 * discard partially sent pages
1707 * For the !unsent_pass (dirty) we:
1708 * discard partially dirty pages that were sent
1709 * (any partially sent pages were already discarded
1710 * by the previous unsent_pass)
1712 postcopy_discard_send_range(ms
, pds
, fixup_start_addr
,
1716 /* Clean up the bitmap */
1717 for (page
= fixup_start_addr
;
1718 page
< fixup_start_addr
+ host_ratio
; page
++) {
1719 /* All pages in this host page are now not sent */
1720 set_bit(page
, unsentmap
);
1723 * Remark them as dirty, updating the count for any pages
1724 * that weren't previously dirty.
1726 rs
->migration_dirty_pages
+= !test_and_set_bit(page
, bitmap
);
1731 /* Find the next sent page for the next iteration */
1732 run_start
= find_next_zero_bit(unsentmap
, pages
, run_start
);
1734 /* Find the next dirty page for the next iteration */
1735 run_start
= find_next_bit(bitmap
, pages
, run_start
);
1741 * postcopy_chuck_hostpages: discrad any partially sent host page
1743 * Utility for the outgoing postcopy code.
1745 * Discard any partially sent host-page size chunks, mark any partially
1746 * dirty host-page size chunks as all dirty. In this case the host-page
1747 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
1749 * Returns zero on success
1751 * @ms: current migration state
1752 * @block: block we want to work with
1754 static int postcopy_chunk_hostpages(MigrationState
*ms
, RAMBlock
*block
)
1756 PostcopyDiscardState
*pds
=
1757 postcopy_discard_send_init(ms
, block
->idstr
);
1759 /* First pass: Discard all partially sent host pages */
1760 postcopy_chunk_hostpages_pass(ms
, true, block
, pds
);
1762 * Second pass: Ensure that all partially dirty host pages are made
1765 postcopy_chunk_hostpages_pass(ms
, false, block
, pds
);
1767 postcopy_discard_send_finish(ms
, pds
);
1772 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1774 * Returns zero on success
1776 * Transmit the set of pages to be discarded after precopy to the target
1777 * these are pages that:
1778 * a) Have been previously transmitted but are now dirty again
1779 * b) Pages that have never been transmitted, this ensures that
1780 * any pages on the destination that have been mapped by background
1781 * tasks get discarded (transparent huge pages is the specific concern)
1782 * Hopefully this is pretty sparse
1784 * @ms: current migration state
1786 int ram_postcopy_send_discard_bitmap(MigrationState
*ms
)
1788 RAMState
*rs
= &ram_state
;
1794 /* This should be our last sync, the src is now paused */
1795 migration_bitmap_sync(rs
);
1797 /* Easiest way to make sure we don't resume in the middle of a host-page */
1798 rs
->last_seen_block
= NULL
;
1799 rs
->last_sent_block
= NULL
;
1802 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1803 unsigned long pages
= block
->used_length
>> TARGET_PAGE_BITS
;
1804 unsigned long *bitmap
= block
->bmap
;
1805 unsigned long *unsentmap
= block
->unsentmap
;
1808 /* We don't have a safe way to resize the sentmap, so
1809 * if the bitmap was resized it will be NULL at this
1812 error_report("migration ram resized during precopy phase");
1816 /* Deal with TPS != HPS and huge pages */
1817 ret
= postcopy_chunk_hostpages(ms
, block
);
1824 * Update the unsentmap to be unsentmap = unsentmap | dirty
1826 bitmap_or(unsentmap
, unsentmap
, bitmap
, pages
);
1827 #ifdef DEBUG_POSTCOPY
1828 ram_debug_dump_bitmap(unsentmap
, true, pages
);
1831 trace_ram_postcopy_send_discard_bitmap();
1833 ret
= postcopy_each_ram_send_discard(ms
);
1840 * ram_discard_range: discard dirtied pages at the beginning of postcopy
1842 * Returns zero on success
1844 * @rbname: name of the RAMBlock of the request. NULL means the
1845 * same that last one.
1846 * @start: RAMBlock starting page
1847 * @length: RAMBlock size
1849 int ram_discard_range(const char *rbname
, uint64_t start
, size_t length
)
1853 trace_ram_discard_range(rbname
, start
, length
);
1856 RAMBlock
*rb
= qemu_ram_block_by_name(rbname
);
1859 error_report("ram_discard_range: Failed to find block '%s'", rbname
);
1863 ret
= ram_block_discard_range(rb
, start
, length
);
1871 static int ram_state_init(RAMState
*rs
)
1873 memset(rs
, 0, sizeof(*rs
));
1874 qemu_mutex_init(&rs
->bitmap_mutex
);
1875 qemu_mutex_init(&rs
->src_page_req_mutex
);
1876 QSIMPLEQ_INIT(&rs
->src_page_requests
);
1878 if (migrate_use_xbzrle()) {
1879 XBZRLE_cache_lock();
1880 ZERO_TARGET_PAGE
= g_malloc0(TARGET_PAGE_SIZE
);
1881 XBZRLE
.cache
= cache_init(migrate_xbzrle_cache_size() /
1884 if (!XBZRLE
.cache
) {
1885 XBZRLE_cache_unlock();
1886 error_report("Error creating cache");
1889 XBZRLE_cache_unlock();
1891 /* We prefer not to abort if there is no memory */
1892 XBZRLE
.encoded_buf
= g_try_malloc0(TARGET_PAGE_SIZE
);
1893 if (!XBZRLE
.encoded_buf
) {
1894 error_report("Error allocating encoded_buf");
1898 XBZRLE
.current_buf
= g_try_malloc(TARGET_PAGE_SIZE
);
1899 if (!XBZRLE
.current_buf
) {
1900 error_report("Error allocating current_buf");
1901 g_free(XBZRLE
.encoded_buf
);
1902 XBZRLE
.encoded_buf
= NULL
;
1907 /* For memory_global_dirty_log_start below. */
1908 qemu_mutex_lock_iothread();
1910 qemu_mutex_lock_ramlist();
1912 ram_state_reset(rs
);
1914 /* Skip setting bitmap if there is no RAM */
1915 if (ram_bytes_total()) {
1918 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1919 unsigned long pages
= block
->max_length
>> TARGET_PAGE_BITS
;
1921 block
->bmap
= bitmap_new(pages
);
1922 bitmap_set(block
->bmap
, 0, pages
);
1923 if (migrate_postcopy_ram()) {
1924 block
->unsentmap
= bitmap_new(pages
);
1925 bitmap_set(block
->unsentmap
, 0, pages
);
1931 * Count the total number of pages used by ram blocks not including any
1932 * gaps due to alignment or unplugs.
1934 rs
->migration_dirty_pages
= ram_bytes_total() >> TARGET_PAGE_BITS
;
1936 memory_global_dirty_log_start();
1937 migration_bitmap_sync(rs
);
1938 qemu_mutex_unlock_ramlist();
1939 qemu_mutex_unlock_iothread();
1946 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
1947 * long-running RCU critical section. When rcu-reclaims in the code
1948 * start to become numerous it will be necessary to reduce the
1949 * granularity of these critical sections.
1953 * ram_save_setup: Setup RAM for migration
1955 * Returns zero to indicate success and negative for error
1957 * @f: QEMUFile where to send the data
1958 * @opaque: RAMState pointer
1960 static int ram_save_setup(QEMUFile
*f
, void *opaque
)
1962 RAMState
*rs
= opaque
;
1965 /* migration has already setup the bitmap, reuse it. */
1966 if (!migration_in_colo_state()) {
1967 if (ram_state_init(rs
) < 0) {
1975 qemu_put_be64(f
, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE
);
1977 RAMBLOCK_FOREACH(block
) {
1978 qemu_put_byte(f
, strlen(block
->idstr
));
1979 qemu_put_buffer(f
, (uint8_t *)block
->idstr
, strlen(block
->idstr
));
1980 qemu_put_be64(f
, block
->used_length
);
1981 if (migrate_postcopy_ram() && block
->page_size
!= qemu_host_page_size
) {
1982 qemu_put_be64(f
, block
->page_size
);
1988 ram_control_before_iterate(f
, RAM_CONTROL_SETUP
);
1989 ram_control_after_iterate(f
, RAM_CONTROL_SETUP
);
1991 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
1997 * ram_save_iterate: iterative stage for migration
1999 * Returns zero to indicate success and negative for error
2001 * @f: QEMUFile where to send the data
2002 * @opaque: RAMState pointer
2004 static int ram_save_iterate(QEMUFile
*f
, void *opaque
)
2006 RAMState
*rs
= opaque
;
2013 if (ram_list
.version
!= rs
->last_version
) {
2014 ram_state_reset(rs
);
2017 /* Read version before ram_list.blocks */
2020 ram_control_before_iterate(f
, RAM_CONTROL_ROUND
);
2022 t0
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
2024 while ((ret
= qemu_file_rate_limit(f
)) == 0) {
2027 pages
= ram_find_and_save_block(rs
, false);
2028 /* no more pages to sent */
2035 /* we want to check in the 1st loop, just in case it was the 1st time
2036 and we had to sync the dirty bitmap.
2037 qemu_get_clock_ns() is a bit expensive, so we only check each some
2040 if ((i
& 63) == 0) {
2041 uint64_t t1
= (qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - t0
) / 1000000;
2042 if (t1
> MAX_WAIT
) {
2043 trace_ram_save_iterate_big_wait(t1
, i
);
2049 flush_compressed_data(rs
);
2053 * Must occur before EOS (or any QEMUFile operation)
2054 * because of RDMA protocol.
2056 ram_control_after_iterate(f
, RAM_CONTROL_ROUND
);
2058 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
2059 rs
->bytes_transferred
+= 8;
2061 ret
= qemu_file_get_error(f
);
2070 * ram_save_complete: function called to send the remaining amount of ram
2072 * Returns zero to indicate success
2074 * Called with iothread lock
2076 * @f: QEMUFile where to send the data
2077 * @opaque: RAMState pointer
2079 static int ram_save_complete(QEMUFile
*f
, void *opaque
)
2081 RAMState
*rs
= opaque
;
2085 if (!migration_in_postcopy()) {
2086 migration_bitmap_sync(rs
);
2089 ram_control_before_iterate(f
, RAM_CONTROL_FINISH
);
2091 /* try transferring iterative blocks of memory */
2093 /* flush all remaining blocks regardless of rate limiting */
2097 pages
= ram_find_and_save_block(rs
, !migration_in_colo_state());
2098 /* no more blocks to sent */
2104 flush_compressed_data(rs
);
2105 ram_control_after_iterate(f
, RAM_CONTROL_FINISH
);
2109 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
2114 static void ram_save_pending(QEMUFile
*f
, void *opaque
, uint64_t max_size
,
2115 uint64_t *non_postcopiable_pending
,
2116 uint64_t *postcopiable_pending
)
2118 RAMState
*rs
= opaque
;
2119 uint64_t remaining_size
;
2121 remaining_size
= rs
->migration_dirty_pages
* TARGET_PAGE_SIZE
;
2123 if (!migration_in_postcopy() &&
2124 remaining_size
< max_size
) {
2125 qemu_mutex_lock_iothread();
2127 migration_bitmap_sync(rs
);
2129 qemu_mutex_unlock_iothread();
2130 remaining_size
= rs
->migration_dirty_pages
* TARGET_PAGE_SIZE
;
2133 /* We can do postcopy, and all the data is postcopiable */
2134 *postcopiable_pending
+= remaining_size
;
2137 static int load_xbzrle(QEMUFile
*f
, ram_addr_t addr
, void *host
)
2139 unsigned int xh_len
;
2141 uint8_t *loaded_data
;
2143 if (!xbzrle_decoded_buf
) {
2144 xbzrle_decoded_buf
= g_malloc(TARGET_PAGE_SIZE
);
2146 loaded_data
= xbzrle_decoded_buf
;
2148 /* extract RLE header */
2149 xh_flags
= qemu_get_byte(f
);
2150 xh_len
= qemu_get_be16(f
);
2152 if (xh_flags
!= ENCODING_FLAG_XBZRLE
) {
2153 error_report("Failed to load XBZRLE page - wrong compression!");
2157 if (xh_len
> TARGET_PAGE_SIZE
) {
2158 error_report("Failed to load XBZRLE page - len overflow!");
2161 /* load data and decode */
2162 qemu_get_buffer_in_place(f
, &loaded_data
, xh_len
);
2165 if (xbzrle_decode_buffer(loaded_data
, xh_len
, host
,
2166 TARGET_PAGE_SIZE
) == -1) {
2167 error_report("Failed to load XBZRLE page - decode error!");
2175 * ram_block_from_stream: read a RAMBlock id from the migration stream
2177 * Must be called from within a rcu critical section.
2179 * Returns a pointer from within the RCU-protected ram_list.
2181 * @f: QEMUFile where to read the data from
2182 * @flags: Page flags (mostly to see if it's a continuation of previous block)
2184 static inline RAMBlock
*ram_block_from_stream(QEMUFile
*f
, int flags
)
2186 static RAMBlock
*block
= NULL
;
2190 if (flags
& RAM_SAVE_FLAG_CONTINUE
) {
2192 error_report("Ack, bad migration stream!");
2198 len
= qemu_get_byte(f
);
2199 qemu_get_buffer(f
, (uint8_t *)id
, len
);
2202 block
= qemu_ram_block_by_name(id
);
2204 error_report("Can't find block %s", id
);
2211 static inline void *host_from_ram_block_offset(RAMBlock
*block
,
2214 if (!offset_in_ramblock(block
, offset
)) {
2218 return block
->host
+ offset
;
2222 * ram_handle_compressed: handle the zero page case
2224 * If a page (or a whole RDMA chunk) has been
2225 * determined to be zero, then zap it.
2227 * @host: host address for the zero page
2228 * @ch: what the page is filled from. We only support zero
2229 * @size: size of the zero page
2231 void ram_handle_compressed(void *host
, uint8_t ch
, uint64_t size
)
2233 if (ch
!= 0 || !is_zero_range(host
, size
)) {
2234 memset(host
, ch
, size
);
2238 static void *do_data_decompress(void *opaque
)
2240 DecompressParam
*param
= opaque
;
2241 unsigned long pagesize
;
2245 qemu_mutex_lock(¶m
->mutex
);
2246 while (!param
->quit
) {
2251 qemu_mutex_unlock(¶m
->mutex
);
2253 pagesize
= TARGET_PAGE_SIZE
;
2254 /* uncompress() will return failed in some case, especially
2255 * when the page is dirted when doing the compression, it's
2256 * not a problem because the dirty page will be retransferred
2257 * and uncompress() won't break the data in other pages.
2259 uncompress((Bytef
*)des
, &pagesize
,
2260 (const Bytef
*)param
->compbuf
, len
);
2262 qemu_mutex_lock(&decomp_done_lock
);
2264 qemu_cond_signal(&decomp_done_cond
);
2265 qemu_mutex_unlock(&decomp_done_lock
);
2267 qemu_mutex_lock(¶m
->mutex
);
2269 qemu_cond_wait(¶m
->cond
, ¶m
->mutex
);
2272 qemu_mutex_unlock(¶m
->mutex
);
2277 static void wait_for_decompress_done(void)
2279 int idx
, thread_count
;
2281 if (!migrate_use_compression()) {
2285 thread_count
= migrate_decompress_threads();
2286 qemu_mutex_lock(&decomp_done_lock
);
2287 for (idx
= 0; idx
< thread_count
; idx
++) {
2288 while (!decomp_param
[idx
].done
) {
2289 qemu_cond_wait(&decomp_done_cond
, &decomp_done_lock
);
2292 qemu_mutex_unlock(&decomp_done_lock
);
2295 void migrate_decompress_threads_create(void)
2297 int i
, thread_count
;
2299 thread_count
= migrate_decompress_threads();
2300 decompress_threads
= g_new0(QemuThread
, thread_count
);
2301 decomp_param
= g_new0(DecompressParam
, thread_count
);
2302 qemu_mutex_init(&decomp_done_lock
);
2303 qemu_cond_init(&decomp_done_cond
);
2304 for (i
= 0; i
< thread_count
; i
++) {
2305 qemu_mutex_init(&decomp_param
[i
].mutex
);
2306 qemu_cond_init(&decomp_param
[i
].cond
);
2307 decomp_param
[i
].compbuf
= g_malloc0(compressBound(TARGET_PAGE_SIZE
));
2308 decomp_param
[i
].done
= true;
2309 decomp_param
[i
].quit
= false;
2310 qemu_thread_create(decompress_threads
+ i
, "decompress",
2311 do_data_decompress
, decomp_param
+ i
,
2312 QEMU_THREAD_JOINABLE
);
2316 void migrate_decompress_threads_join(void)
2318 int i
, thread_count
;
2320 thread_count
= migrate_decompress_threads();
2321 for (i
= 0; i
< thread_count
; i
++) {
2322 qemu_mutex_lock(&decomp_param
[i
].mutex
);
2323 decomp_param
[i
].quit
= true;
2324 qemu_cond_signal(&decomp_param
[i
].cond
);
2325 qemu_mutex_unlock(&decomp_param
[i
].mutex
);
2327 for (i
= 0; i
< thread_count
; i
++) {
2328 qemu_thread_join(decompress_threads
+ i
);
2329 qemu_mutex_destroy(&decomp_param
[i
].mutex
);
2330 qemu_cond_destroy(&decomp_param
[i
].cond
);
2331 g_free(decomp_param
[i
].compbuf
);
2333 g_free(decompress_threads
);
2334 g_free(decomp_param
);
2335 decompress_threads
= NULL
;
2336 decomp_param
= NULL
;
2339 static void decompress_data_with_multi_threads(QEMUFile
*f
,
2340 void *host
, int len
)
2342 int idx
, thread_count
;
2344 thread_count
= migrate_decompress_threads();
2345 qemu_mutex_lock(&decomp_done_lock
);
2347 for (idx
= 0; idx
< thread_count
; idx
++) {
2348 if (decomp_param
[idx
].done
) {
2349 decomp_param
[idx
].done
= false;
2350 qemu_mutex_lock(&decomp_param
[idx
].mutex
);
2351 qemu_get_buffer(f
, decomp_param
[idx
].compbuf
, len
);
2352 decomp_param
[idx
].des
= host
;
2353 decomp_param
[idx
].len
= len
;
2354 qemu_cond_signal(&decomp_param
[idx
].cond
);
2355 qemu_mutex_unlock(&decomp_param
[idx
].mutex
);
2359 if (idx
< thread_count
) {
2362 qemu_cond_wait(&decomp_done_cond
, &decomp_done_lock
);
2365 qemu_mutex_unlock(&decomp_done_lock
);
2369 * ram_postcopy_incoming_init: allocate postcopy data structures
2371 * Returns 0 for success and negative if there was one error
2373 * @mis: current migration incoming state
2375 * Allocate data structures etc needed by incoming migration with
2376 * postcopy-ram. postcopy-ram's similarly names
2377 * postcopy_ram_incoming_init does the work.
2379 int ram_postcopy_incoming_init(MigrationIncomingState
*mis
)
2381 unsigned long ram_pages
= last_ram_page();
2383 return postcopy_ram_incoming_init(mis
, ram_pages
);
2387 * ram_load_postcopy: load a page in postcopy case
2389 * Returns 0 for success or -errno in case of error
2391 * Called in postcopy mode by ram_load().
2392 * rcu_read_lock is taken prior to this being called.
2394 * @f: QEMUFile where to send the data
2396 static int ram_load_postcopy(QEMUFile
*f
)
2398 int flags
= 0, ret
= 0;
2399 bool place_needed
= false;
2400 bool matching_page_sizes
= false;
2401 MigrationIncomingState
*mis
= migration_incoming_get_current();
2402 /* Temporary page that is later 'placed' */
2403 void *postcopy_host_page
= postcopy_get_tmp_page(mis
);
2404 void *last_host
= NULL
;
2405 bool all_zero
= false;
2407 while (!ret
&& !(flags
& RAM_SAVE_FLAG_EOS
)) {
2410 void *page_buffer
= NULL
;
2411 void *place_source
= NULL
;
2412 RAMBlock
*block
= NULL
;
2415 addr
= qemu_get_be64(f
);
2416 flags
= addr
& ~TARGET_PAGE_MASK
;
2417 addr
&= TARGET_PAGE_MASK
;
2419 trace_ram_load_postcopy_loop((uint64_t)addr
, flags
);
2420 place_needed
= false;
2421 if (flags
& (RAM_SAVE_FLAG_ZERO
| RAM_SAVE_FLAG_PAGE
)) {
2422 block
= ram_block_from_stream(f
, flags
);
2424 host
= host_from_ram_block_offset(block
, addr
);
2426 error_report("Illegal RAM offset " RAM_ADDR_FMT
, addr
);
2430 matching_page_sizes
= block
->page_size
== TARGET_PAGE_SIZE
;
2432 * Postcopy requires that we place whole host pages atomically;
2433 * these may be huge pages for RAMBlocks that are backed by
2435 * To make it atomic, the data is read into a temporary page
2436 * that's moved into place later.
2437 * The migration protocol uses, possibly smaller, target-pages
2438 * however the source ensures it always sends all the components
2439 * of a host page in order.
2441 page_buffer
= postcopy_host_page
+
2442 ((uintptr_t)host
& (block
->page_size
- 1));
2443 /* If all TP are zero then we can optimise the place */
2444 if (!((uintptr_t)host
& (block
->page_size
- 1))) {
2447 /* not the 1st TP within the HP */
2448 if (host
!= (last_host
+ TARGET_PAGE_SIZE
)) {
2449 error_report("Non-sequential target page %p/%p",
2458 * If it's the last part of a host page then we place the host
2461 place_needed
= (((uintptr_t)host
+ TARGET_PAGE_SIZE
) &
2462 (block
->page_size
- 1)) == 0;
2463 place_source
= postcopy_host_page
;
2467 switch (flags
& ~RAM_SAVE_FLAG_CONTINUE
) {
2468 case RAM_SAVE_FLAG_ZERO
:
2469 ch
= qemu_get_byte(f
);
2470 memset(page_buffer
, ch
, TARGET_PAGE_SIZE
);
2476 case RAM_SAVE_FLAG_PAGE
:
2478 if (!place_needed
|| !matching_page_sizes
) {
2479 qemu_get_buffer(f
, page_buffer
, TARGET_PAGE_SIZE
);
2481 /* Avoids the qemu_file copy during postcopy, which is
2482 * going to do a copy later; can only do it when we
2483 * do this read in one go (matching page sizes)
2485 qemu_get_buffer_in_place(f
, (uint8_t **)&place_source
,
2489 case RAM_SAVE_FLAG_EOS
:
2493 error_report("Unknown combination of migration flags: %#x"
2494 " (postcopy mode)", flags
);
2499 /* This gets called at the last target page in the host page */
2500 void *place_dest
= host
+ TARGET_PAGE_SIZE
- block
->page_size
;
2503 ret
= postcopy_place_page_zero(mis
, place_dest
,
2506 ret
= postcopy_place_page(mis
, place_dest
,
2507 place_source
, block
->page_size
);
2511 ret
= qemu_file_get_error(f
);
2518 static int ram_load(QEMUFile
*f
, void *opaque
, int version_id
)
2520 int flags
= 0, ret
= 0;
2521 static uint64_t seq_iter
;
2524 * If system is running in postcopy mode, page inserts to host memory must
2527 bool postcopy_running
= postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING
;
2528 /* ADVISE is earlier, it shows the source has the postcopy capability on */
2529 bool postcopy_advised
= postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE
;
2533 if (version_id
!= 4) {
2537 /* This RCU critical section can be very long running.
2538 * When RCU reclaims in the code start to become numerous,
2539 * it will be necessary to reduce the granularity of this
2544 if (postcopy_running
) {
2545 ret
= ram_load_postcopy(f
);
2548 while (!postcopy_running
&& !ret
&& !(flags
& RAM_SAVE_FLAG_EOS
)) {
2549 ram_addr_t addr
, total_ram_bytes
;
2553 addr
= qemu_get_be64(f
);
2554 flags
= addr
& ~TARGET_PAGE_MASK
;
2555 addr
&= TARGET_PAGE_MASK
;
2557 if (flags
& (RAM_SAVE_FLAG_ZERO
| RAM_SAVE_FLAG_PAGE
|
2558 RAM_SAVE_FLAG_COMPRESS_PAGE
| RAM_SAVE_FLAG_XBZRLE
)) {
2559 RAMBlock
*block
= ram_block_from_stream(f
, flags
);
2561 host
= host_from_ram_block_offset(block
, addr
);
2563 error_report("Illegal RAM offset " RAM_ADDR_FMT
, addr
);
2567 trace_ram_load_loop(block
->idstr
, (uint64_t)addr
, flags
, host
);
2570 switch (flags
& ~RAM_SAVE_FLAG_CONTINUE
) {
2571 case RAM_SAVE_FLAG_MEM_SIZE
:
2572 /* Synchronize RAM block list */
2573 total_ram_bytes
= addr
;
2574 while (!ret
&& total_ram_bytes
) {
2579 len
= qemu_get_byte(f
);
2580 qemu_get_buffer(f
, (uint8_t *)id
, len
);
2582 length
= qemu_get_be64(f
);
2584 block
= qemu_ram_block_by_name(id
);
2586 if (length
!= block
->used_length
) {
2587 Error
*local_err
= NULL
;
2589 ret
= qemu_ram_resize(block
, length
,
2592 error_report_err(local_err
);
2595 /* For postcopy we need to check hugepage sizes match */
2596 if (postcopy_advised
&&
2597 block
->page_size
!= qemu_host_page_size
) {
2598 uint64_t remote_page_size
= qemu_get_be64(f
);
2599 if (remote_page_size
!= block
->page_size
) {
2600 error_report("Mismatched RAM page size %s "
2601 "(local) %zd != %" PRId64
,
2602 id
, block
->page_size
,
2607 ram_control_load_hook(f
, RAM_CONTROL_BLOCK_REG
,
2610 error_report("Unknown ramblock \"%s\", cannot "
2611 "accept migration", id
);
2615 total_ram_bytes
-= length
;
2619 case RAM_SAVE_FLAG_ZERO
:
2620 ch
= qemu_get_byte(f
);
2621 ram_handle_compressed(host
, ch
, TARGET_PAGE_SIZE
);
2624 case RAM_SAVE_FLAG_PAGE
:
2625 qemu_get_buffer(f
, host
, TARGET_PAGE_SIZE
);
2628 case RAM_SAVE_FLAG_COMPRESS_PAGE
:
2629 len
= qemu_get_be32(f
);
2630 if (len
< 0 || len
> compressBound(TARGET_PAGE_SIZE
)) {
2631 error_report("Invalid compressed data length: %d", len
);
2635 decompress_data_with_multi_threads(f
, host
, len
);
2638 case RAM_SAVE_FLAG_XBZRLE
:
2639 if (load_xbzrle(f
, addr
, host
) < 0) {
2640 error_report("Failed to decompress XBZRLE page at "
2641 RAM_ADDR_FMT
, addr
);
2646 case RAM_SAVE_FLAG_EOS
:
2650 if (flags
& RAM_SAVE_FLAG_HOOK
) {
2651 ram_control_load_hook(f
, RAM_CONTROL_HOOK
, NULL
);
2653 error_report("Unknown combination of migration flags: %#x",
2659 ret
= qemu_file_get_error(f
);
2663 wait_for_decompress_done();
2665 trace_ram_load_complete(ret
, seq_iter
);
2669 static SaveVMHandlers savevm_ram_handlers
= {
2670 .save_live_setup
= ram_save_setup
,
2671 .save_live_iterate
= ram_save_iterate
,
2672 .save_live_complete_postcopy
= ram_save_complete
,
2673 .save_live_complete_precopy
= ram_save_complete
,
2674 .save_live_pending
= ram_save_pending
,
2675 .load_state
= ram_load
,
2676 .cleanup
= ram_migration_cleanup
,
2679 void ram_mig_init(void)
2681 qemu_mutex_init(&XBZRLE
.lock
);
2682 register_savevm_live(NULL
, "ram", 0, 4, &savevm_ram_handlers
, &ram_state
);