2 * QEMU migration support
4 * Copyright (C) 2007 Anthony Liguori <anthony@codemonkey.ws>
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 #include "qemu_socket.h"
33 #define MIN_FINALIZE_SIZE (200 << 10)
35 typedef struct MigrationState
41 int last_updated_pages
;
43 int n_buffer
; /* number of bytes in @buffer already sent */
44 int l_buffer
; /* number of bytes in @buffer to send */
47 char buffer
[TARGET_PAGE_SIZE
+ 4 + 4];
52 int (*release
)(void *opaque
);
55 static uint32_t max_throttle
= (32 << 20);
56 static MigrationState
*current_migration
;
57 static int wait_for_message_timeout
= 3000; /* 3 seconds */
59 //#define MIGRATION_VERIFY
60 #ifdef MIGRATION_VERIFY
61 static int save_verify_memory(QEMUFile
*f
, void *opaque
);
62 static int load_verify_memory(QEMUFile
*f
, void *opaque
, int version_id
);
63 #endif /* MIGRATION_VERIFY */
65 /* QEMUFile migration implementation */
67 static void migrate_put_buffer(void *opaque
, const uint8_t *buf
, int64_t pos
, int size
)
69 MigrationState
*s
= opaque
;
75 while (offset
< size
) {
78 len
= write(s
->fd
, buf
+ offset
, size
- offset
);
80 if (errno
== EAGAIN
|| errno
== EINTR
)
82 term_printf("migration: write failed (%s)\n", strerror(errno
));
85 } else if (len
== 0) {
86 term_printf("migration: other side closed connection\n");
95 static void migrate_close(void *opaque
)
97 MigrationState
*s
= opaque
;
99 if (s
->release
&& s
->release(s
->opaque
))
103 current_migration
= NULL
;
106 /* Outgoing migration routines */
107 static void migrate_finish(MigrationState
*s
)
111 int *has_error
= s
->has_error
;
113 fcntl(s
->fd
, F_SETFL
, 0);
116 f
= qemu_fopen(s
, migrate_put_buffer
, NULL
, migrate_close
);
120 ret
= qemu_live_savevm_state(f
);
121 #ifdef MIGRATION_VERIFY
122 save_verify_memory(f
, NULL
);
123 #endif /* MIGRATION_VERIFY */
126 if (ret
!= 0 || *has_error
) {
127 term_printf("Migration failed! ret=%d error=%d\n", ret
, *has_error
);
132 qemu_free(has_error
);
133 cpu_physical_memory_set_dirty_tracking(0);
136 static int migrate_write_buffer(MigrationState
*s
)
141 if (s
->n_buffer
< s
->l_buffer
) {
144 len
= write(s
->fd
, s
->buffer
+ s
->n_buffer
, s
->l_buffer
- s
->n_buffer
);
158 s
->throttle_count
+= len
;
160 if (s
->n_buffer
< s
->l_buffer
)
164 if (s
->throttle_count
> max_throttle
) {
166 qemu_set_fd_handler2(s
->fd
, NULL
, NULL
, NULL
, NULL
);
173 static int migrate_check_convergence(MigrationState
*s
)
178 for (addr
= 0; addr
< phys_ram_size
; addr
+= TARGET_PAGE_SIZE
) {
180 if (kvm_allowed
&& (addr
>=0xa0000) && (addr
<0xc0000)) /* do not access video-addresses */
183 if (cpu_physical_memory_get_dirty(addr
, MIGRATION_DIRTY_FLAG
))
187 return ((dirty_count
* TARGET_PAGE_SIZE
) < MIN_FINALIZE_SIZE
);
190 static int ram_page_is_homogeneous(uint32_t addr
)
195 n
= TARGET_PAGE_SIZE
/ sizeof(v
);
196 p
= (uint32
*)(phys_ram_base
+ addr
);
205 static void migrate_prepare_page(MigrationState
*s
)
212 value
= cpu_to_be32(s
->addr
);
213 memcpy(s
->buffer
, &value
, 4);
215 if (ram_page_is_homogeneous(s
->addr
)) {
216 type
= 1; /* keeping ram_get_page() values */
218 value
= cpu_to_be32(s
->addr
);
219 buff
= (const char *)&value
;
223 bufflen
= TARGET_PAGE_SIZE
;
224 buff
= phys_ram_base
+ s
->addr
;
228 memcpy(s
->buffer
+ 4 + 1, phys_ram_base
+ s
->addr
, bufflen
);
230 s
->l_buffer
= 4 + 1 + bufflen
;
233 static void migrate_write(void *opaque
)
235 MigrationState
*s
= opaque
;
237 if (migrate_write_buffer(s
))
241 if (kvm_allowed
&& !*s
->has_error
)
242 *s
->has_error
= kvm_update_dirty_pages_log();
245 if (migrate_check_convergence(s
) || *s
->has_error
) {
246 qemu_del_timer(s
->timer
);
247 qemu_free_timer(s
->timer
);
248 qemu_set_fd_handler2(s
->fd
, NULL
, NULL
, NULL
, NULL
);
253 while (s
->addr
< phys_ram_size
) {
255 if (kvm_allowed
&& (s
->addr
>=0xa0000) && (s
->addr
<0xc0000)) /* do not access video-addresses */
259 if (cpu_physical_memory_get_dirty(s
->addr
, MIGRATION_DIRTY_FLAG
)) {
260 migrate_prepare_page(s
);
261 cpu_physical_memory_reset_dirty(s
->addr
, s
->addr
+ TARGET_PAGE_SIZE
, MIGRATION_DIRTY_FLAG
);
263 s
->addr
+= TARGET_PAGE_SIZE
;
267 if (migrate_write_buffer(s
))
270 s
->addr
+= TARGET_PAGE_SIZE
;
273 s
->last_updated_pages
= s
->updated_pages
;
274 s
->updated_pages
= 0;
279 static void migrate_reset_throttle(void *opaque
)
281 MigrationState
*s
= opaque
;
283 s
->bps
= s
->throttle_count
;
287 qemu_set_fd_handler2(s
->fd
, NULL
, NULL
, migrate_write
, s
);
289 s
->throttle_count
= 0;
290 qemu_mod_timer(s
->timer
, qemu_get_clock(rt_clock
) + 1000);
293 static int start_migration(MigrationState
*s
)
295 uint32_t value
= cpu_to_be32(phys_ram_size
);
296 target_phys_addr_t addr
;
299 while (offset
!= 4) {
300 ssize_t len
= write(s
->fd
, ((char *)&value
) + offset
, 4 - offset
);
301 if (len
== -1 && errno
== EINTR
)
310 fcntl(s
->fd
, F_SETFL
, O_NONBLOCK
);
312 for (addr
= 0; addr
< phys_ram_size
; addr
+= TARGET_PAGE_SIZE
) {
314 if (kvm_allowed
&& (addr
>=0xa0000) && (addr
<0xc0000)) /* do not access video-addresses */
317 if (!cpu_physical_memory_get_dirty(addr
, MIGRATION_DIRTY_FLAG
))
318 cpu_physical_memory_set_dirty(addr
);
321 if (cpu_physical_memory_set_dirty_tracking(1)) {
328 s
->updated_pages
= 0;
329 s
->last_updated_pages
= 0;
330 s
->n_buffer
= s
->l_buffer
= 0;
331 s
->timer
= qemu_new_timer(rt_clock
, migrate_reset_throttle
, s
);
333 qemu_mod_timer(s
->timer
, qemu_get_clock(rt_clock
));
334 qemu_set_fd_handler2(s
->fd
, NULL
, NULL
, migrate_write
, s
);
338 static MigrationState
*migration_init_fd(int detach
, int fd
)
342 s
= qemu_mallocz(sizeof(MigrationState
));
344 term_printf("Allocation error\n");
349 s
->has_error
= qemu_mallocz(sizeof(int));
350 if (s
->has_error
== NULL
) {
351 term_printf("malloc failed (for has_error)\n");
356 current_migration
= s
;
358 if (start_migration(s
) == -1) {
359 term_printf("Could not start migration\n");
369 typedef struct MigrationCmdState
375 static int cmd_release(void *opaque
)
377 MigrationCmdState
*c
= opaque
;
383 ret
= waitpid(c
->pid
, &status
, 0);
384 if (ret
== -1 && errno
== EINTR
)
388 term_printf("migration: waitpid failed (%s)\n", strerror(errno
));
391 /* FIXME: check and uncomment
392 * if (WIFEXITED(status))
393 * status = WEXITSTATUS(status);
398 static MigrationState
*migration_init_cmd(int detach
, const char *command
, char **argv
)
405 if (pipe(fds
) == -1) {
406 term_printf("pipe() (%s)\n", strerror(errno
));
414 term_printf("fork error (%s)\n", strerror(errno
));
419 dup2(fds
[0], STDIN_FILENO
);
420 execvp(command
, argv
);
425 for (i
= 0; argv
[i
]; i
++)
429 s
= migration_init_fd(detach
, fds
[1]);
431 MigrationCmdState
*c
= qemu_mallocz(sizeof(*c
));
434 s
->release
= cmd_release
;
441 static MigrationState
*migration_init_exec(int detach
, const char *command
)
445 argv
= qemu_mallocz(sizeof(char *) * 4);
446 argv
[0] = strdup("sh");
447 argv
[1] = strdup("-c");
448 argv
[2] = strdup(command
);
451 return migration_init_cmd(detach
, "/bin/sh", argv
);
454 static MigrationState
*migration_init_ssh(int detach
, const char *host
)
456 int qemu_argc
, daemonize
= 0, argc
, i
;
457 char **qemu_argv
, **argv
;
458 const char *incoming
= NULL
;
460 qemu_get_launch_info(&qemu_argc
, &qemu_argv
, &daemonize
, &incoming
);
462 argc
= 3 + qemu_argc
;
468 argv
= qemu_mallocz(sizeof(char *) * (argc
+ 1));
469 argv
[0] = strdup("ssh");
470 argv
[1] = strdup("-XC");
471 argv
[2] = strdup(host
);
473 for (i
= 0; i
< qemu_argc
; i
++)
474 argv
[3 + i
] = strdup(qemu_argv
[i
]);
477 argv
[3 + i
++] = strdup("-daemonize");
479 argv
[3 + i
++] = strdup("-incoming");
480 argv
[3 + i
++] = strdup("stdio");
485 return migration_init_cmd(detach
, "ssh", argv
);
488 /* (busy) wait timeout (miliseconds) for a message to arrive on fd. */
489 /* returns 0 on success error_code otherwise (18 for timeout) */
490 static int wait_for_message(const char *msg
, int fd
, int timeout
)
494 int64_t now
, expiration
, delta
; /* milliseconds */
497 now
= qemu_get_clock(rt_clock
);
498 expiration
= now
+ timeout
;
502 tv
.tv_sec
= tv
.tv_usec
= 0;
503 now
= qemu_get_clock(rt_clock
);
504 delta
= expiration
- now
;
506 tv
.tv_usec
= delta
* 1000;
507 n
= select(fd
+ 1, &rfds
, NULL
, NULL
, &tv
);
508 } while ( (n
== -1) && (errno
== EINTR
) );
512 fprintf(stderr
, "%s FAILED: ", msg
);
515 case 0: /* timeout */
516 fprintf(stderr
, "%s: timeout reached\n", msg
);
521 fprintf(stderr
, "wait_for_message: %s: select returned %d\n", msg
, n
);
523 if (!FD_ISSET(fd
, &rfds
)) {
524 fprintf(stderr
, "wait_for_message: %s: s->fd not set\n", msg
);
531 static int tcp_release(void *opaque
)
533 MigrationState
*s
= opaque
;
541 n
= wait_for_message("WAIT FOR ACK", s
->fd
, wait_for_message_timeout
);
548 len
= read(s
->fd
, &status
, 1);
549 if (len
== -1 && errno
== EINTR
)
551 if (len
!= 1 || status
!= 0)
555 len
= write(s
->fd
, &status
, 1);
556 if (len
== -1 && errno
== EINTR
)
562 return (len
!= 1 || status
!= 0);
565 static MigrationState
*migration_init_tcp(int detach
, const char *host
)
568 struct sockaddr_in addr
;
571 fd
= socket(PF_INET
, SOCK_STREAM
, 0);
573 term_printf("socket() failed %s\n", strerror(errno
));
577 addr
.sin_family
= AF_INET
;
578 if (parse_host_port(&addr
, host
) == -1) {
579 term_printf("parse_host_port() FAILED for %s\n", host
);
585 if (connect(fd
, (struct sockaddr
*)&addr
, sizeof(addr
)) == -1) {
588 term_printf("connect() failed %s\n", strerror(errno
));
593 s
= migration_init_fd(detach
, fd
);
596 s
->release
= tcp_release
;
601 /* Incoming migration */
603 static void migrate_incoming_homogeneous_page(uint32_t addr
, uint32_t v
)
608 n
= TARGET_PAGE_SIZE
/ sizeof(v
);
609 p
= (uint32
*)(phys_ram_base
+ addr
);
615 static int migrate_incoming_page(QEMUFile
*f
, uint32_t addr
)
619 switch (qemu_get_byte(f
)) {
620 case 0: /* the whole page */
621 l
= qemu_get_buffer(f
, phys_ram_base
+ addr
, TARGET_PAGE_SIZE
);
622 if (l
!= TARGET_PAGE_SIZE
)
625 case 1: /* homogeneous page -- a single byte */
626 v
= qemu_get_be32(f
);
627 migrate_incoming_homogeneous_page(addr
, v
);
636 static int migrate_incoming_fd(int fd
)
639 QEMUFile
*f
= qemu_fopen_fd(fd
);
641 extern void qemu_announce_self(void);
643 if (qemu_get_be32(f
) != phys_ram_size
)
647 addr
= qemu_get_be32(f
);
650 ret
= migrate_incoming_page(f
, addr
);
658 ret
= qemu_live_loadvm_state(f
);
659 #ifdef MIGRATION_VERIFY
660 if (ret
==0) ret
=load_verify_memory(f
, NULL
, 1);
661 #endif /* MIGRATION_VERIFY */
667 static int migrate_incoming_tcp(const char *host
)
669 struct sockaddr_in addr
;
670 socklen_t addrlen
= sizeof(addr
);
677 addr
.sin_family
= AF_INET
;
678 if (parse_host_port(&addr
, host
) == -1) {
679 fprintf(stderr
, "parse_host_port() failed for %s\n", host
);
684 fd
= socket(PF_INET
, SOCK_STREAM
, 0);
686 perror("socket failed");
691 if (setsockopt(fd
, SOL_SOCKET
, SO_REUSEADDR
, &reuse
, sizeof(reuse
)) == -1) {
692 perror("setsockopt() failed");
697 if (bind(fd
, (struct sockaddr
*)&addr
, sizeof(addr
)) == -1) {
698 perror("bind() failed");
703 if (listen(fd
, 1) == -1) {
704 perror("listen() failed");
710 sfd
= accept(fd
, (struct sockaddr
*)&addr
, &addrlen
);
714 perror("accept() failed");
719 rc
= migrate_incoming_fd(sfd
);
722 fprintf(stderr
, "migrate_incoming_fd failed (rc=%d)\n", rc
);
727 len
= write(sfd
, &status
, 1);
728 if (len
== -1 && errno
== EAGAIN
)
735 rc
= wait_for_message("WAIT FOR GO", sfd
, wait_for_message_timeout
);
741 len
= read(sfd
, &status
, 1);
742 if (len
== -1 && errno
== EAGAIN
)
755 int migrate_incoming(const char *device
)
760 if (strcmp(device
, "stdio") == 0)
761 ret
= migrate_incoming_fd(STDIN_FILENO
);
762 else if (strstart(device
, "tcp://", &ptr
)) {
765 end
= strchr(host
, '/');
767 ret
= migrate_incoming_tcp(host
);
777 /* Migration monitor command */
780 1) audit all error paths
783 void do_migrate(int detach
, const char *uri
)
787 if (strstart(uri
, "exec:", &ptr
)) {
788 char *command
= urldecode(ptr
);
789 migration_init_exec(detach
, command
);
791 } else if (strstart(uri
, "ssh://", &ptr
)) {
795 end
= strchr(host
, '/');
797 migration_init_ssh(detach
, host
);
799 } else if (strstart(uri
, "tcp://", &ptr
)) {
803 end
= strchr(host
, '/');
806 if (migration_init_tcp(detach
, host
) == NULL
)
807 term_printf("migration failed (migration_init_tcp for %s failed)\n", host
);
810 term_printf("Unknown migration protocol '%s'\n", uri
);
815 void do_migrate_set_speed(const char *value
)
820 d
= strtod(value
, &ptr
);
832 max_throttle
= (uint32_t)d
;
835 void do_info_migration(void)
837 MigrationState
*s
= current_migration
;
840 term_printf("Migration active\n");
841 if (s
->bps
< (1 << 20))
842 term_printf("Transfer rate %3.1f kb/s\n",
843 (double)s
->bps
/ 1024);
845 term_printf("Transfer rate %3.1f mb/s\n",
846 (double)s
->bps
/ (1024 * 1024));
847 term_printf("Iteration %d\n", s
->iteration
);
848 term_printf("Transferred %d/%d pages\n", s
->updated_pages
, phys_ram_size
>> TARGET_PAGE_BITS
);
850 term_printf("Last iteration found %d dirty pages\n", s
->last_updated_pages
);
852 term_printf("Migration inactive\n");
854 term_printf("Maximum migration speed is ");
855 if (max_throttle
< (1 << 20))
856 term_printf("%3.1f kb/s\n", (double)max_throttle
/ 1024);
858 term_printf("%3.1f mb/s\n", (double)max_throttle
/ (1024 * 1024));
861 void do_migrate_cancel(void)
863 MigrationState
*s
= current_migration
;
871 #ifdef MIGRATION_VERIFY
872 unsigned int calc_page_checksum(target_ulong addr
)
875 unsigned int *p
= (unsigned int *)(phys_ram_base
+ addr
);
876 unsigned int *q
= p
+ (TARGET_PAGE_SIZE
/ sizeof(unsigned int));
878 for ( /*initialized already */ ; p
<q
; p
++)
884 static int save_verify_memory(QEMUFile
*f
, void *opaque
)
889 for (addr
= 0; addr
< phys_ram_size
; addr
+= TARGET_PAGE_SIZE
) {
891 if (kvm_allowed
&& (addr
>=0xa0000) && (addr
<0xc0000)) /* do not access video-addresses */
894 sum
= calc_page_checksum(addr
);
895 qemu_put_be32(f
, addr
);
896 qemu_put_be32(f
, sum
);
901 static int load_verify_memory(QEMUFile
*f
, void *opaque
, int version_id
)
903 unsigned int addr
, raddr
;
904 unsigned int sum
, rsum
;
907 for (addr
= 0; addr
< phys_ram_size
; addr
+= TARGET_PAGE_SIZE
) {
909 if (kvm_allowed
&& (addr
>=0xa0000) && (addr
<0xc0000)) /* do not access video-addresses */
912 sum
= calc_page_checksum(addr
);
913 raddr
= qemu_get_be32(f
);
914 rsum
= qemu_get_be32(f
);
915 if ((raddr
!= addr
) || (rsum
!= sum
)) {
916 term_printf("checksum mismatch: src:0x%x 0x%x , dst:0x%x 0x%x\n",
917 raddr
, rsum
, addr
, sum
);
921 printf("memory_verify: num_errors=%d\n", num_errors
);
922 term_printf("memory_verify: num_errors=%d\n", num_errors
);
923 return 0/* num_errors */;
925 #endif /* MIGRATION_VERIFY */