4 * Copyright (c) 2006-2007 CodeSourcery.
5 * Written by Paul Brook
7 * This code is licensed under the GPL.
10 #include "qemu/osdep.h"
11 #include "qemu/datadir.h"
12 #include "qemu/error-report.h"
13 #include "qapi/error.h"
15 #include "hw/arm/boot.h"
16 #include "hw/arm/linux-boot-if.h"
17 #include "sysemu/kvm.h"
18 #include "sysemu/sysemu.h"
19 #include "sysemu/numa.h"
20 #include "hw/boards.h"
21 #include "sysemu/reset.h"
22 #include "hw/loader.h"
24 #include "sysemu/device_tree.h"
25 #include "qemu/config-file.h"
26 #include "qemu/option.h"
27 #include "qemu/units.h"
29 /* Kernel boot protocol is specified in the kernel docs
30 * Documentation/arm/Booting and Documentation/arm64/booting.txt
31 * They have different preferred image load offsets from system RAM base.
33 #define KERNEL_ARGS_ADDR 0x100
34 #define KERNEL_NOLOAD_ADDR 0x02000000
35 #define KERNEL_LOAD_ADDR 0x00010000
36 #define KERNEL64_LOAD_ADDR 0x00080000
38 #define ARM64_TEXT_OFFSET_OFFSET 8
39 #define ARM64_MAGIC_OFFSET 56
41 #define BOOTLOADER_MAX_SIZE (4 * KiB)
43 AddressSpace
*arm_boot_address_space(ARMCPU
*cpu
,
44 const struct arm_boot_info
*info
)
46 /* Return the address space to use for bootloader reads and writes.
47 * We prefer the secure address space if the CPU has it and we're
48 * going to boot the guest into it.
51 CPUState
*cs
= CPU(cpu
);
53 if (arm_feature(&cpu
->env
, ARM_FEATURE_EL3
) && info
->secure_boot
) {
59 return cpu_get_address_space(cs
, asidx
);
63 FIXUP_NONE
= 0, /* do nothing */
64 FIXUP_TERMINATOR
, /* end of insns */
65 FIXUP_BOARDID
, /* overwrite with board ID number */
66 FIXUP_BOARD_SETUP
, /* overwrite with board specific setup code address */
67 FIXUP_ARGPTR_LO
, /* overwrite with pointer to kernel args */
68 FIXUP_ARGPTR_HI
, /* overwrite with pointer to kernel args (high half) */
69 FIXUP_ENTRYPOINT_LO
, /* overwrite with kernel entry point */
70 FIXUP_ENTRYPOINT_HI
, /* overwrite with kernel entry point (high half) */
71 FIXUP_GIC_CPU_IF
, /* overwrite with GIC CPU interface address */
72 FIXUP_BOOTREG
, /* overwrite with boot register address */
73 FIXUP_DSB
, /* overwrite with correct DSB insn for cpu */
77 typedef struct ARMInsnFixup
{
82 static const ARMInsnFixup bootloader_aarch64
[] = {
83 { 0x580000c0 }, /* ldr x0, arg ; Load the lower 32-bits of DTB */
84 { 0xaa1f03e1 }, /* mov x1, xzr */
85 { 0xaa1f03e2 }, /* mov x2, xzr */
86 { 0xaa1f03e3 }, /* mov x3, xzr */
87 { 0x58000084 }, /* ldr x4, entry ; Load the lower 32-bits of kernel entry */
88 { 0xd61f0080 }, /* br x4 ; Jump to the kernel entry point */
89 { 0, FIXUP_ARGPTR_LO
}, /* arg: .word @DTB Lower 32-bits */
90 { 0, FIXUP_ARGPTR_HI
}, /* .word @DTB Higher 32-bits */
91 { 0, FIXUP_ENTRYPOINT_LO
}, /* entry: .word @Kernel Entry Lower 32-bits */
92 { 0, FIXUP_ENTRYPOINT_HI
}, /* .word @Kernel Entry Higher 32-bits */
93 { 0, FIXUP_TERMINATOR
}
96 /* A very small bootloader: call the board-setup code (if needed),
97 * set r0-r2, then jump to the kernel.
98 * If we're not calling boot setup code then we don't copy across
99 * the first BOOTLOADER_NO_BOARD_SETUP_OFFSET insns in this array.
102 static const ARMInsnFixup bootloader
[] = {
103 { 0xe28fe004 }, /* add lr, pc, #4 */
104 { 0xe51ff004 }, /* ldr pc, [pc, #-4] */
105 { 0, FIXUP_BOARD_SETUP
},
106 #define BOOTLOADER_NO_BOARD_SETUP_OFFSET 3
107 { 0xe3a00000 }, /* mov r0, #0 */
108 { 0xe59f1004 }, /* ldr r1, [pc, #4] */
109 { 0xe59f2004 }, /* ldr r2, [pc, #4] */
110 { 0xe59ff004 }, /* ldr pc, [pc, #4] */
111 { 0, FIXUP_BOARDID
},
112 { 0, FIXUP_ARGPTR_LO
},
113 { 0, FIXUP_ENTRYPOINT_LO
},
114 { 0, FIXUP_TERMINATOR
}
117 /* Handling for secondary CPU boot in a multicore system.
118 * Unlike the uniprocessor/primary CPU boot, this is platform
119 * dependent. The default code here is based on the secondary
120 * CPU boot protocol used on realview/vexpress boards, with
121 * some parameterisation to increase its flexibility.
122 * QEMU platform models for which this code is not appropriate
123 * should override write_secondary_boot and secondary_cpu_reset_hook
126 * This code enables the interrupt controllers for the secondary
127 * CPUs and then puts all the secondary CPUs into a loop waiting
128 * for an interprocessor interrupt and polling a configurable
129 * location for the kernel secondary CPU entry point.
131 #define DSB_INSN 0xf57ff04f
132 #define CP15_DSB_INSN 0xee070f9a /* mcr cp15, 0, r0, c7, c10, 4 */
134 static const ARMInsnFixup smpboot
[] = {
135 { 0xe59f2028 }, /* ldr r2, gic_cpu_if */
136 { 0xe59f0028 }, /* ldr r0, bootreg_addr */
137 { 0xe3a01001 }, /* mov r1, #1 */
138 { 0xe5821000 }, /* str r1, [r2] - set GICC_CTLR.Enable */
139 { 0xe3a010ff }, /* mov r1, #0xff */
140 { 0xe5821004 }, /* str r1, [r2, 4] - set GIC_PMR.Priority to 0xff */
141 { 0, FIXUP_DSB
}, /* dsb */
142 { 0xe320f003 }, /* wfi */
143 { 0xe5901000 }, /* ldr r1, [r0] */
144 { 0xe1110001 }, /* tst r1, r1 */
145 { 0x0afffffb }, /* beq <wfi> */
146 { 0xe12fff11 }, /* bx r1 */
147 { 0, FIXUP_GIC_CPU_IF
}, /* gic_cpu_if: .word 0x.... */
148 { 0, FIXUP_BOOTREG
}, /* bootreg_addr: .word 0x.... */
149 { 0, FIXUP_TERMINATOR
}
152 static void write_bootloader(const char *name
, hwaddr addr
,
153 const ARMInsnFixup
*insns
, uint32_t *fixupcontext
,
156 /* Fix up the specified bootloader fragment and write it into
157 * guest memory using rom_add_blob_fixed(). fixupcontext is
158 * an array giving the values to write in for the fixup types
159 * which write a value into the code array.
165 while (insns
[len
].fixup
!= FIXUP_TERMINATOR
) {
169 code
= g_new0(uint32_t, len
);
171 for (i
= 0; i
< len
; i
++) {
172 uint32_t insn
= insns
[i
].insn
;
173 FixupType fixup
= insns
[i
].fixup
;
179 case FIXUP_BOARD_SETUP
:
180 case FIXUP_ARGPTR_LO
:
181 case FIXUP_ARGPTR_HI
:
182 case FIXUP_ENTRYPOINT_LO
:
183 case FIXUP_ENTRYPOINT_HI
:
184 case FIXUP_GIC_CPU_IF
:
187 insn
= fixupcontext
[fixup
];
192 code
[i
] = tswap32(insn
);
195 assert((len
* sizeof(uint32_t)) < BOOTLOADER_MAX_SIZE
);
197 rom_add_blob_fixed_as(name
, code
, len
* sizeof(uint32_t), addr
, as
);
202 static void default_write_secondary(ARMCPU
*cpu
,
203 const struct arm_boot_info
*info
)
205 uint32_t fixupcontext
[FIXUP_MAX
];
206 AddressSpace
*as
= arm_boot_address_space(cpu
, info
);
208 fixupcontext
[FIXUP_GIC_CPU_IF
] = info
->gic_cpu_if_addr
;
209 fixupcontext
[FIXUP_BOOTREG
] = info
->smp_bootreg_addr
;
210 if (arm_feature(&cpu
->env
, ARM_FEATURE_V7
)) {
211 fixupcontext
[FIXUP_DSB
] = DSB_INSN
;
213 fixupcontext
[FIXUP_DSB
] = CP15_DSB_INSN
;
216 write_bootloader("smpboot", info
->smp_loader_start
,
217 smpboot
, fixupcontext
, as
);
220 void arm_write_secure_board_setup_dummy_smc(ARMCPU
*cpu
,
221 const struct arm_boot_info
*info
,
224 AddressSpace
*as
= arm_boot_address_space(cpu
, info
);
226 uint32_t mvbar_blob
[] = {
227 /* mvbar_addr: secure monitor vectors
228 * Default unimplemented and unused vectors to spin. Makes it
229 * easier to debug (as opposed to the CPU running away).
231 0xeafffffe, /* (spin) */
232 0xeafffffe, /* (spin) */
233 0xe1b0f00e, /* movs pc, lr ;SMC exception return */
234 0xeafffffe, /* (spin) */
235 0xeafffffe, /* (spin) */
236 0xeafffffe, /* (spin) */
237 0xeafffffe, /* (spin) */
238 0xeafffffe, /* (spin) */
240 uint32_t board_setup_blob
[] = {
241 /* board setup addr */
242 0xee110f51, /* mrc p15, 0, r0, c1, c1, 2 ;read NSACR */
243 0xe3800b03, /* orr r0, #0xc00 ;set CP11, CP10 */
244 0xee010f51, /* mcr p15, 0, r0, c1, c1, 2 ;write NSACR */
245 0xe3a00e00 + (mvbar_addr
>> 4), /* mov r0, #mvbar_addr */
246 0xee0c0f30, /* mcr p15, 0, r0, c12, c0, 1 ;set MVBAR */
247 0xee110f11, /* mrc p15, 0, r0, c1 , c1, 0 ;read SCR */
248 0xe3800031, /* orr r0, #0x31 ;enable AW, FW, NS */
249 0xee010f11, /* mcr p15, 0, r0, c1, c1, 0 ;write SCR */
250 0xe1a0100e, /* mov r1, lr ;save LR across SMC */
251 0xe1600070, /* smc #0 ;call monitor to flush SCR */
252 0xe1a0f001, /* mov pc, r1 ;return */
255 /* check that mvbar_addr is correctly aligned and relocatable (using MOV) */
256 assert((mvbar_addr
& 0x1f) == 0 && (mvbar_addr
>> 4) < 0x100);
258 /* check that these blobs don't overlap */
259 assert((mvbar_addr
+ sizeof(mvbar_blob
) <= info
->board_setup_addr
)
260 || (info
->board_setup_addr
+ sizeof(board_setup_blob
) <= mvbar_addr
));
262 for (n
= 0; n
< ARRAY_SIZE(mvbar_blob
); n
++) {
263 mvbar_blob
[n
] = tswap32(mvbar_blob
[n
]);
265 rom_add_blob_fixed_as("board-setup-mvbar", mvbar_blob
, sizeof(mvbar_blob
),
268 for (n
= 0; n
< ARRAY_SIZE(board_setup_blob
); n
++) {
269 board_setup_blob
[n
] = tswap32(board_setup_blob
[n
]);
271 rom_add_blob_fixed_as("board-setup", board_setup_blob
,
272 sizeof(board_setup_blob
), info
->board_setup_addr
, as
);
275 static void default_reset_secondary(ARMCPU
*cpu
,
276 const struct arm_boot_info
*info
)
278 AddressSpace
*as
= arm_boot_address_space(cpu
, info
);
279 CPUState
*cs
= CPU(cpu
);
281 address_space_stl_notdirty(as
, info
->smp_bootreg_addr
,
282 0, MEMTXATTRS_UNSPECIFIED
, NULL
);
283 cpu_set_pc(cs
, info
->smp_loader_start
);
286 static inline bool have_dtb(const struct arm_boot_info
*info
)
288 return info
->dtb_filename
|| info
->get_dtb
;
291 #define WRITE_WORD(p, value) do { \
292 address_space_stl_notdirty(as, p, value, \
293 MEMTXATTRS_UNSPECIFIED, NULL); \
297 static void set_kernel_args(const struct arm_boot_info
*info
, AddressSpace
*as
)
299 int initrd_size
= info
->initrd_size
;
300 hwaddr base
= info
->loader_start
;
303 p
= base
+ KERNEL_ARGS_ADDR
;
306 WRITE_WORD(p
, 0x54410001);
308 WRITE_WORD(p
, 0x1000);
311 /* TODO: handle multiple chips on one ATAG list */
313 WRITE_WORD(p
, 0x54410002);
314 WRITE_WORD(p
, info
->ram_size
);
315 WRITE_WORD(p
, info
->loader_start
);
319 WRITE_WORD(p
, 0x54420005);
320 WRITE_WORD(p
, info
->initrd_start
);
321 WRITE_WORD(p
, initrd_size
);
323 if (info
->kernel_cmdline
&& *info
->kernel_cmdline
) {
327 cmdline_size
= strlen(info
->kernel_cmdline
);
328 address_space_write(as
, p
+ 8, MEMTXATTRS_UNSPECIFIED
,
329 info
->kernel_cmdline
, cmdline_size
+ 1);
330 cmdline_size
= (cmdline_size
>> 2) + 1;
331 WRITE_WORD(p
, cmdline_size
+ 2);
332 WRITE_WORD(p
, 0x54410009);
333 p
+= cmdline_size
* 4;
335 if (info
->atag_board
) {
338 uint8_t atag_board_buf
[0x1000];
340 atag_board_len
= (info
->atag_board(info
, atag_board_buf
) + 3) & ~3;
341 WRITE_WORD(p
, (atag_board_len
+ 8) >> 2);
342 WRITE_WORD(p
, 0x414f4d50);
343 address_space_write(as
, p
, MEMTXATTRS_UNSPECIFIED
,
344 atag_board_buf
, atag_board_len
);
352 static void set_kernel_args_old(const struct arm_boot_info
*info
,
357 int initrd_size
= info
->initrd_size
;
358 hwaddr base
= info
->loader_start
;
360 /* see linux/include/asm-arm/setup.h */
361 p
= base
+ KERNEL_ARGS_ADDR
;
365 WRITE_WORD(p
, info
->ram_size
/ 4096);
368 #define FLAG_READONLY 1
369 #define FLAG_RDLOAD 4
370 #define FLAG_RDPROMPT 8
372 WRITE_WORD(p
, FLAG_READONLY
| FLAG_RDLOAD
| FLAG_RDPROMPT
);
374 WRITE_WORD(p
, (31 << 8) | 0); /* /dev/mtdblock0 */
383 /* memc_control_reg */
385 /* unsigned char sounddefault */
386 /* unsigned char adfsdrives */
387 /* unsigned char bytes_per_char_h */
388 /* unsigned char bytes_per_char_v */
390 /* pages_in_bank[4] */
399 WRITE_WORD(p
, info
->initrd_start
);
404 WRITE_WORD(p
, initrd_size
);
409 /* system_serial_low */
411 /* system_serial_high */
415 /* zero unused fields */
416 while (p
< base
+ KERNEL_ARGS_ADDR
+ 256 + 1024) {
419 s
= info
->kernel_cmdline
;
421 address_space_write(as
, p
, MEMTXATTRS_UNSPECIFIED
, s
, strlen(s
) + 1);
427 static int fdt_add_memory_node(void *fdt
, uint32_t acells
, hwaddr mem_base
,
428 uint32_t scells
, hwaddr mem_len
,
434 nodename
= g_strdup_printf("/memory@%" PRIx64
, mem_base
);
435 qemu_fdt_add_subnode(fdt
, nodename
);
436 qemu_fdt_setprop_string(fdt
, nodename
, "device_type", "memory");
437 ret
= qemu_fdt_setprop_sized_cells(fdt
, nodename
, "reg", acells
, mem_base
,
443 /* only set the NUMA ID if it is specified */
444 if (numa_node_id
>= 0) {
445 ret
= qemu_fdt_setprop_cell(fdt
, nodename
,
446 "numa-node-id", numa_node_id
);
453 static void fdt_add_psci_node(void *fdt
)
455 uint32_t cpu_suspend_fn
;
459 ARMCPU
*armcpu
= ARM_CPU(qemu_get_cpu(0));
460 const char *psci_method
;
461 int64_t psci_conduit
;
464 psci_conduit
= object_property_get_int(OBJECT(armcpu
),
467 switch (psci_conduit
) {
468 case QEMU_PSCI_CONDUIT_DISABLED
:
470 case QEMU_PSCI_CONDUIT_HVC
:
473 case QEMU_PSCI_CONDUIT_SMC
:
477 g_assert_not_reached();
481 * A pre-existing /psci node might specify function ID values
482 * that don't match QEMU's PSCI implementation. Delete the whole
483 * node and put our own in instead.
485 rc
= fdt_path_offset(fdt
, "/psci");
487 qemu_fdt_nop_node(fdt
, "/psci");
490 qemu_fdt_add_subnode(fdt
, "/psci");
491 if (armcpu
->psci_version
>= QEMU_PSCI_VERSION_0_2
) {
492 if (armcpu
->psci_version
< QEMU_PSCI_VERSION_1_0
) {
493 const char comp
[] = "arm,psci-0.2\0arm,psci";
494 qemu_fdt_setprop(fdt
, "/psci", "compatible", comp
, sizeof(comp
));
496 const char comp
[] = "arm,psci-1.0\0arm,psci-0.2\0arm,psci";
497 qemu_fdt_setprop(fdt
, "/psci", "compatible", comp
, sizeof(comp
));
500 cpu_off_fn
= QEMU_PSCI_0_2_FN_CPU_OFF
;
501 if (arm_feature(&armcpu
->env
, ARM_FEATURE_AARCH64
)) {
502 cpu_suspend_fn
= QEMU_PSCI_0_2_FN64_CPU_SUSPEND
;
503 cpu_on_fn
= QEMU_PSCI_0_2_FN64_CPU_ON
;
504 migrate_fn
= QEMU_PSCI_0_2_FN64_MIGRATE
;
506 cpu_suspend_fn
= QEMU_PSCI_0_2_FN_CPU_SUSPEND
;
507 cpu_on_fn
= QEMU_PSCI_0_2_FN_CPU_ON
;
508 migrate_fn
= QEMU_PSCI_0_2_FN_MIGRATE
;
511 qemu_fdt_setprop_string(fdt
, "/psci", "compatible", "arm,psci");
513 cpu_suspend_fn
= QEMU_PSCI_0_1_FN_CPU_SUSPEND
;
514 cpu_off_fn
= QEMU_PSCI_0_1_FN_CPU_OFF
;
515 cpu_on_fn
= QEMU_PSCI_0_1_FN_CPU_ON
;
516 migrate_fn
= QEMU_PSCI_0_1_FN_MIGRATE
;
519 /* We adopt the PSCI spec's nomenclature, and use 'conduit' to refer
520 * to the instruction that should be used to invoke PSCI functions.
521 * However, the device tree binding uses 'method' instead, so that is
522 * what we should use here.
524 qemu_fdt_setprop_string(fdt
, "/psci", "method", psci_method
);
526 qemu_fdt_setprop_cell(fdt
, "/psci", "cpu_suspend", cpu_suspend_fn
);
527 qemu_fdt_setprop_cell(fdt
, "/psci", "cpu_off", cpu_off_fn
);
528 qemu_fdt_setprop_cell(fdt
, "/psci", "cpu_on", cpu_on_fn
);
529 qemu_fdt_setprop_cell(fdt
, "/psci", "migrate", migrate_fn
);
532 int arm_load_dtb(hwaddr addr
, const struct arm_boot_info
*binfo
,
533 hwaddr addr_limit
, AddressSpace
*as
, MachineState
*ms
)
537 uint32_t acells
, scells
;
539 hwaddr mem_base
, mem_len
;
543 if (binfo
->dtb_filename
) {
545 filename
= qemu_find_file(QEMU_FILE_TYPE_BIOS
, binfo
->dtb_filename
);
547 fprintf(stderr
, "Couldn't open dtb file %s\n", binfo
->dtb_filename
);
551 fdt
= load_device_tree(filename
, &size
);
553 fprintf(stderr
, "Couldn't open dtb file %s\n", filename
);
559 fdt
= binfo
->get_dtb(binfo
, &size
);
561 fprintf(stderr
, "Board was unable to create a dtb blob\n");
566 if (addr_limit
> addr
&& size
> (addr_limit
- addr
)) {
567 /* Installing the device tree blob at addr would exceed addr_limit.
568 * Whether this constitutes failure is up to the caller to decide,
569 * so just return 0 as size, i.e., no error.
575 acells
= qemu_fdt_getprop_cell(fdt
, "/", "#address-cells",
577 scells
= qemu_fdt_getprop_cell(fdt
, "/", "#size-cells",
579 if (acells
== 0 || scells
== 0) {
580 fprintf(stderr
, "dtb file invalid (#address-cells or #size-cells 0)\n");
584 if (scells
< 2 && binfo
->ram_size
>= 4 * GiB
) {
585 /* This is user error so deserves a friendlier error message
586 * than the failure of setprop_sized_cells would provide
588 fprintf(stderr
, "qemu: dtb file not compatible with "
593 /* nop all root nodes matching /memory or /memory@unit-address */
594 node_path
= qemu_fdt_node_unit_path(fdt
, "memory", &err
);
596 error_report_err(err
);
599 while (node_path
[n
]) {
600 if (g_str_has_prefix(node_path
[n
], "/memory")) {
601 qemu_fdt_nop_node(fdt
, node_path
[n
]);
605 g_strfreev(node_path
);
608 * We drop all the memory nodes which correspond to empty NUMA nodes
609 * from the device tree, because the Linux NUMA binding document
610 * states they should not be generated. Linux will get the NUMA node
611 * IDs of the empty NUMA nodes from the distance map if they are needed.
612 * This means QEMU users may be obliged to provide command lines which
613 * configure distance maps when the empty NUMA node IDs are needed and
614 * Linux's default distance map isn't sufficient.
616 if (ms
->numa_state
!= NULL
&& ms
->numa_state
->num_nodes
> 0) {
617 mem_base
= binfo
->loader_start
;
618 for (i
= 0; i
< ms
->numa_state
->num_nodes
; i
++) {
619 mem_len
= ms
->numa_state
->nodes
[i
].node_mem
;
624 rc
= fdt_add_memory_node(fdt
, acells
, mem_base
,
627 fprintf(stderr
, "couldn't add /memory@%"PRIx64
" node\n",
635 rc
= fdt_add_memory_node(fdt
, acells
, binfo
->loader_start
,
636 scells
, binfo
->ram_size
, -1);
638 fprintf(stderr
, "couldn't add /memory@%"PRIx64
" node\n",
639 binfo
->loader_start
);
644 rc
= fdt_path_offset(fdt
, "/chosen");
646 qemu_fdt_add_subnode(fdt
, "/chosen");
649 if (ms
->kernel_cmdline
&& *ms
->kernel_cmdline
) {
650 rc
= qemu_fdt_setprop_string(fdt
, "/chosen", "bootargs",
653 fprintf(stderr
, "couldn't set /chosen/bootargs\n");
658 if (binfo
->initrd_size
) {
659 rc
= qemu_fdt_setprop_cell(fdt
, "/chosen", "linux,initrd-start",
660 binfo
->initrd_start
);
662 fprintf(stderr
, "couldn't set /chosen/linux,initrd-start\n");
666 rc
= qemu_fdt_setprop_cell(fdt
, "/chosen", "linux,initrd-end",
667 binfo
->initrd_start
+ binfo
->initrd_size
);
669 fprintf(stderr
, "couldn't set /chosen/linux,initrd-end\n");
674 fdt_add_psci_node(fdt
);
676 if (binfo
->modify_dtb
) {
677 binfo
->modify_dtb(binfo
, fdt
);
680 qemu_fdt_dumpdtb(fdt
, size
);
682 /* Put the DTB into the memory map as a ROM image: this will ensure
683 * the DTB is copied again upon reset, even if addr points into RAM.
685 rom_add_blob_fixed_as("dtb", fdt
, size
, addr
, as
);
686 qemu_register_reset_nosnapshotload(qemu_fdt_randomize_seeds
,
687 rom_ptr_for_as(as
, addr
, size
));
698 static void do_cpu_reset(void *opaque
)
700 ARMCPU
*cpu
= opaque
;
701 CPUState
*cs
= CPU(cpu
);
702 CPUARMState
*env
= &cpu
->env
;
703 const struct arm_boot_info
*info
= env
->boot_info
;
707 if (!info
->is_linux
) {
709 /* Jump to the entry point. */
710 uint64_t entry
= info
->entry
;
712 switch (info
->endianness
) {
713 case ARM_ENDIANNESS_LE
:
714 env
->cp15
.sctlr_el
[1] &= ~SCTLR_E0E
;
715 for (i
= 1; i
< 4; ++i
) {
716 env
->cp15
.sctlr_el
[i
] &= ~SCTLR_EE
;
718 env
->uncached_cpsr
&= ~CPSR_E
;
720 case ARM_ENDIANNESS_BE8
:
721 env
->cp15
.sctlr_el
[1] |= SCTLR_E0E
;
722 for (i
= 1; i
< 4; ++i
) {
723 env
->cp15
.sctlr_el
[i
] |= SCTLR_EE
;
725 env
->uncached_cpsr
|= CPSR_E
;
727 case ARM_ENDIANNESS_BE32
:
728 env
->cp15
.sctlr_el
[1] |= SCTLR_B
;
730 case ARM_ENDIANNESS_UNKNOWN
:
731 break; /* Board's decision */
733 g_assert_not_reached();
736 cpu_set_pc(cs
, entry
);
738 /* If we are booting Linux then we need to check whether we are
739 * booting into secure or non-secure state and adjust the state
740 * accordingly. Out of reset, ARM is defined to be in secure state
741 * (SCR.NS = 0), we change that here if non-secure boot has been
744 if (arm_feature(env
, ARM_FEATURE_EL3
)) {
745 /* AArch64 is defined to come out of reset into EL3 if enabled.
746 * If we are booting Linux then we need to adjust our EL as
747 * Linux expects us to be in EL2 or EL1. AArch32 resets into
748 * SVC, which Linux expects, so no privilege/exception level to
752 env
->cp15
.scr_el3
|= SCR_RW
;
753 if (arm_feature(env
, ARM_FEATURE_EL2
)) {
754 env
->cp15
.hcr_el2
|= HCR_RW
;
755 env
->pstate
= PSTATE_MODE_EL2h
;
757 env
->pstate
= PSTATE_MODE_EL1h
;
759 if (cpu_isar_feature(aa64_pauth
, cpu
)) {
760 env
->cp15
.scr_el3
|= SCR_API
| SCR_APK
;
762 if (cpu_isar_feature(aa64_mte
, cpu
)) {
763 env
->cp15
.scr_el3
|= SCR_ATA
;
765 if (cpu_isar_feature(aa64_sve
, cpu
)) {
766 env
->cp15
.cptr_el
[3] |= R_CPTR_EL3_EZ_MASK
;
768 if (cpu_isar_feature(aa64_sme
, cpu
)) {
769 env
->cp15
.cptr_el
[3] |= R_CPTR_EL3_ESM_MASK
;
770 env
->cp15
.scr_el3
|= SCR_ENTP2
;
772 /* AArch64 kernels never boot in secure mode */
773 assert(!info
->secure_boot
);
774 /* This hook is only supported for AArch32 currently:
775 * bootloader_aarch64[] will not call the hook, and
776 * the code above has already dropped us into EL2 or EL1.
778 assert(!info
->secure_board_setup
);
781 if (arm_feature(env
, ARM_FEATURE_EL2
)) {
782 /* If we have EL2 then Linux expects the HVC insn to work */
783 env
->cp15
.scr_el3
|= SCR_HCE
;
786 /* Set to non-secure if not a secure boot */
787 if (!info
->secure_boot
&&
788 (cs
!= first_cpu
|| !info
->secure_board_setup
)) {
789 /* Linux expects non-secure state */
790 env
->cp15
.scr_el3
|= SCR_NS
;
791 /* Set NSACR.{CP11,CP10} so NS can access the FPU */
792 env
->cp15
.nsacr
|= 3 << 10;
796 if (!env
->aarch64
&& !info
->secure_boot
&&
797 arm_feature(env
, ARM_FEATURE_EL2
)) {
799 * This is an AArch32 boot not to Secure state, and
800 * we have Hyp mode available, so boot the kernel into
801 * Hyp mode. This is not how the CPU comes out of reset,
802 * so we need to manually put it there.
804 cpsr_write(env
, ARM_CPU_MODE_HYP
, CPSR_M
, CPSRWriteRaw
);
807 if (cs
== first_cpu
) {
808 AddressSpace
*as
= arm_boot_address_space(cpu
, info
);
810 cpu_set_pc(cs
, info
->loader_start
);
812 if (!have_dtb(info
)) {
814 set_kernel_args_old(info
, as
);
816 set_kernel_args(info
, as
);
819 } else if (info
->secondary_cpu_reset_hook
) {
820 info
->secondary_cpu_reset_hook(cpu
, info
);
823 arm_rebuild_hflags(env
);
827 static int do_arm_linux_init(Object
*obj
, void *opaque
)
829 if (object_dynamic_cast(obj
, TYPE_ARM_LINUX_BOOT_IF
)) {
830 ARMLinuxBootIf
*albif
= ARM_LINUX_BOOT_IF(obj
);
831 ARMLinuxBootIfClass
*albifc
= ARM_LINUX_BOOT_IF_GET_CLASS(obj
);
832 struct arm_boot_info
*info
= opaque
;
834 if (albifc
->arm_linux_init
) {
835 albifc
->arm_linux_init(albif
, info
->secure_boot
);
841 static ssize_t
arm_load_elf(struct arm_boot_info
*info
, uint64_t *pentry
,
842 uint64_t *lowaddr
, uint64_t *highaddr
,
843 int elf_machine
, AddressSpace
*as
)
856 load_elf_hdr(info
->kernel_filename
, &elf_header
, &elf_is64
, &err
);
863 big_endian
= elf_header
.h64
.e_ident
[EI_DATA
] == ELFDATA2MSB
;
864 info
->endianness
= big_endian
? ARM_ENDIANNESS_BE8
867 big_endian
= elf_header
.h32
.e_ident
[EI_DATA
] == ELFDATA2MSB
;
869 if (bswap32(elf_header
.h32
.e_flags
) & EF_ARM_BE8
) {
870 info
->endianness
= ARM_ENDIANNESS_BE8
;
872 info
->endianness
= ARM_ENDIANNESS_BE32
;
873 /* In BE32, the CPU has a different view of the per-byte
874 * address map than the rest of the system. BE32 ELF files
875 * are organised such that they can be programmed through
876 * the CPU's per-word byte-reversed view of the world. QEMU
877 * however loads ELF files independently of the CPU. So
878 * tell the ELF loader to byte reverse the data for us.
883 info
->endianness
= ARM_ENDIANNESS_LE
;
887 ret
= load_elf_as(info
->kernel_filename
, NULL
, NULL
, NULL
,
888 pentry
, lowaddr
, highaddr
, NULL
, big_endian
, elf_machine
,
891 /* The header loaded but the image didn't */
898 static uint64_t load_aarch64_image(const char *filename
, hwaddr mem_base
,
899 hwaddr
*entry
, AddressSpace
*as
)
901 hwaddr kernel_load_offset
= KERNEL64_LOAD_ADDR
;
902 uint64_t kernel_size
= 0;
906 /* On aarch64, it's the bootloader's job to uncompress the kernel. */
907 size
= load_image_gzipped_buffer(filename
, LOAD_IMAGE_MAX_GUNZIP_BYTES
,
913 /* Load as raw file otherwise */
914 if (!g_file_get_contents(filename
, (char **)&buffer
, &len
, NULL
)) {
920 /* check the arm64 magic header value -- very old kernels may not have it */
921 if (size
> ARM64_MAGIC_OFFSET
+ 4 &&
922 memcmp(buffer
+ ARM64_MAGIC_OFFSET
, "ARM\x64", 4) == 0) {
925 /* The arm64 Image header has text_offset and image_size fields at 8 and
926 * 16 bytes into the Image header, respectively. The text_offset field
927 * is only valid if the image_size is non-zero.
929 memcpy(&hdrvals
, buffer
+ ARM64_TEXT_OFFSET_OFFSET
, sizeof(hdrvals
));
931 kernel_size
= le64_to_cpu(hdrvals
[1]);
933 if (kernel_size
!= 0) {
934 kernel_load_offset
= le64_to_cpu(hdrvals
[0]);
937 * We write our startup "bootloader" at the very bottom of RAM,
938 * so that bit can't be used for the image. Luckily the Image
939 * format specification is that the image requests only an offset
940 * from a 2MB boundary, not an absolute load address. So if the
941 * image requests an offset that might mean it overlaps with the
942 * bootloader, we can just load it starting at 2MB+offset rather
945 if (kernel_load_offset
< BOOTLOADER_MAX_SIZE
) {
946 kernel_load_offset
+= 2 * MiB
;
952 * Kernels before v3.17 don't populate the image_size field, and
953 * raw images have no header. For those our best guess at the size
954 * is the size of the Image file itself.
956 if (kernel_size
== 0) {
960 *entry
= mem_base
+ kernel_load_offset
;
961 rom_add_blob_fixed_as(filename
, buffer
, size
, *entry
, as
);
968 static void arm_setup_direct_kernel_boot(ARMCPU
*cpu
,
969 struct arm_boot_info
*info
)
971 /* Set up for a direct boot of a kernel image file. */
973 AddressSpace
*as
= arm_boot_address_space(cpu
, info
);
978 /* Addresses of first byte used and first byte not used by the image */
979 uint64_t image_low_addr
= 0, image_high_addr
= 0;
982 static const ARMInsnFixup
*primary_loader
;
983 uint64_t ram_end
= info
->loader_start
+ info
->ram_size
;
985 if (arm_feature(&cpu
->env
, ARM_FEATURE_AARCH64
)) {
986 primary_loader
= bootloader_aarch64
;
987 elf_machine
= EM_AARCH64
;
989 primary_loader
= bootloader
;
990 if (!info
->write_board_setup
) {
991 primary_loader
+= BOOTLOADER_NO_BOARD_SETUP_OFFSET
;
993 elf_machine
= EM_ARM
;
996 /* Assume that raw images are linux kernels, and ELF images are not. */
997 kernel_size
= arm_load_elf(info
, &elf_entry
, &image_low_addr
,
998 &image_high_addr
, elf_machine
, as
);
999 if (kernel_size
> 0 && have_dtb(info
)) {
1001 * If there is still some room left at the base of RAM, try and put
1002 * the DTB there like we do for images loaded with -bios or -pflash.
1004 if (image_low_addr
> info
->loader_start
1005 || image_high_addr
< info
->loader_start
) {
1007 * Set image_low_addr as address limit for arm_load_dtb if it may be
1008 * pointing into RAM, otherwise pass '0' (no limit)
1010 if (image_low_addr
< info
->loader_start
) {
1013 info
->dtb_start
= info
->loader_start
;
1014 info
->dtb_limit
= image_low_addr
;
1018 if (kernel_size
< 0) {
1019 uint64_t loadaddr
= info
->loader_start
+ KERNEL_NOLOAD_ADDR
;
1020 kernel_size
= load_uimage_as(info
->kernel_filename
, &entry
, &loadaddr
,
1021 &is_linux
, NULL
, NULL
, as
);
1022 if (kernel_size
>= 0) {
1023 image_low_addr
= loadaddr
;
1024 image_high_addr
= image_low_addr
+ kernel_size
;
1027 if (arm_feature(&cpu
->env
, ARM_FEATURE_AARCH64
) && kernel_size
< 0) {
1028 kernel_size
= load_aarch64_image(info
->kernel_filename
,
1029 info
->loader_start
, &entry
, as
);
1031 if (kernel_size
>= 0) {
1032 image_low_addr
= entry
;
1033 image_high_addr
= image_low_addr
+ kernel_size
;
1035 } else if (kernel_size
< 0) {
1037 entry
= info
->loader_start
+ KERNEL_LOAD_ADDR
;
1038 kernel_size
= load_image_targphys_as(info
->kernel_filename
, entry
,
1039 ram_end
- KERNEL_LOAD_ADDR
, as
);
1041 if (kernel_size
>= 0) {
1042 image_low_addr
= entry
;
1043 image_high_addr
= image_low_addr
+ kernel_size
;
1046 if (kernel_size
< 0) {
1047 error_report("could not load kernel '%s'", info
->kernel_filename
);
1051 if (kernel_size
> info
->ram_size
) {
1052 error_report("kernel '%s' is too large to fit in RAM "
1053 "(kernel size %zd, RAM size %" PRId64
")",
1054 info
->kernel_filename
, kernel_size
, info
->ram_size
);
1058 info
->entry
= entry
;
1061 * We want to put the initrd far enough into RAM that when the
1062 * kernel is uncompressed it will not clobber the initrd. However
1063 * on boards without much RAM we must ensure that we still leave
1064 * enough room for a decent sized initrd, and on boards with large
1065 * amounts of RAM we must avoid the initrd being so far up in RAM
1066 * that it is outside lowmem and inaccessible to the kernel.
1067 * So for boards with less than 256MB of RAM we put the initrd
1068 * halfway into RAM, and for boards with 256MB of RAM or more we put
1069 * the initrd at 128MB.
1070 * We also refuse to put the initrd somewhere that will definitely
1071 * overlay the kernel we just loaded, though for kernel formats which
1072 * don't tell us their exact size (eg self-decompressing 32-bit kernels)
1073 * we might still make a bad choice here.
1075 info
->initrd_start
= info
->loader_start
+
1076 MIN(info
->ram_size
/ 2, 128 * MiB
);
1077 if (image_high_addr
) {
1078 info
->initrd_start
= MAX(info
->initrd_start
, image_high_addr
);
1080 info
->initrd_start
= TARGET_PAGE_ALIGN(info
->initrd_start
);
1083 uint32_t fixupcontext
[FIXUP_MAX
];
1085 if (info
->initrd_filename
) {
1087 if (info
->initrd_start
>= ram_end
) {
1088 error_report("not enough space after kernel to load initrd");
1092 initrd_size
= load_ramdisk_as(info
->initrd_filename
,
1094 ram_end
- info
->initrd_start
, as
);
1095 if (initrd_size
< 0) {
1096 initrd_size
= load_image_targphys_as(info
->initrd_filename
,
1102 if (initrd_size
< 0) {
1103 error_report("could not load initrd '%s'",
1104 info
->initrd_filename
);
1107 if (info
->initrd_start
+ initrd_size
> ram_end
) {
1108 error_report("could not load initrd '%s': "
1109 "too big to fit into RAM after the kernel",
1110 info
->initrd_filename
);
1116 info
->initrd_size
= initrd_size
;
1118 fixupcontext
[FIXUP_BOARDID
] = info
->board_id
;
1119 fixupcontext
[FIXUP_BOARD_SETUP
] = info
->board_setup_addr
;
1122 * for device tree boot, we pass the DTB directly in r2. Otherwise
1123 * we point to the kernel args.
1125 if (have_dtb(info
)) {
1128 if (elf_machine
== EM_AARCH64
) {
1130 * Some AArch64 kernels on early bootup map the fdt region as
1132 * [ ALIGN_DOWN(fdt, 2MB) ... ALIGN_DOWN(fdt, 2MB) + 2MB ]
1134 * Let's play safe and prealign it to 2MB to give us some space.
1139 * Some 32bit kernels will trash anything in the 4K page the
1140 * initrd ends in, so make sure the DTB isn't caught up in that.
1145 /* Place the DTB after the initrd in memory with alignment. */
1146 info
->dtb_start
= QEMU_ALIGN_UP(info
->initrd_start
+ initrd_size
,
1148 if (info
->dtb_start
>= ram_end
) {
1149 error_report("Not enough space for DTB after kernel/initrd");
1152 fixupcontext
[FIXUP_ARGPTR_LO
] = info
->dtb_start
;
1153 fixupcontext
[FIXUP_ARGPTR_HI
] = info
->dtb_start
>> 32;
1155 fixupcontext
[FIXUP_ARGPTR_LO
] =
1156 info
->loader_start
+ KERNEL_ARGS_ADDR
;
1157 fixupcontext
[FIXUP_ARGPTR_HI
] =
1158 (info
->loader_start
+ KERNEL_ARGS_ADDR
) >> 32;
1159 if (info
->ram_size
>= 4 * GiB
) {
1160 error_report("RAM size must be less than 4GB to boot"
1161 " Linux kernel using ATAGS (try passing a device tree"
1166 fixupcontext
[FIXUP_ENTRYPOINT_LO
] = entry
;
1167 fixupcontext
[FIXUP_ENTRYPOINT_HI
] = entry
>> 32;
1169 write_bootloader("bootloader", info
->loader_start
,
1170 primary_loader
, fixupcontext
, as
);
1172 if (info
->write_board_setup
) {
1173 info
->write_board_setup(cpu
, info
);
1177 * Notify devices which need to fake up firmware initialization
1178 * that we're doing a direct kernel boot.
1180 object_child_foreach_recursive(object_get_root(),
1181 do_arm_linux_init
, info
);
1183 info
->is_linux
= is_linux
;
1185 for (cs
= first_cpu
; cs
; cs
= CPU_NEXT(cs
)) {
1186 ARM_CPU(cs
)->env
.boot_info
= info
;
1190 static void arm_setup_firmware_boot(ARMCPU
*cpu
, struct arm_boot_info
*info
)
1192 /* Set up for booting firmware (which might load a kernel via fw_cfg) */
1194 if (have_dtb(info
)) {
1196 * If we have a device tree blob, but no kernel to supply it to (or
1197 * the kernel is supposed to be loaded by the bootloader), copy the
1198 * DTB to the base of RAM for the bootloader to pick up.
1200 info
->dtb_start
= info
->loader_start
;
1203 if (info
->kernel_filename
) {
1205 bool try_decompressing_kernel
;
1207 fw_cfg
= fw_cfg_find();
1210 error_report("This machine type does not support loading both "
1211 "a guest firmware/BIOS image and a guest kernel at "
1212 "the same time. You should change your QEMU command "
1213 "line to specify one or the other, but not both.");
1217 try_decompressing_kernel
= arm_feature(&cpu
->env
,
1218 ARM_FEATURE_AARCH64
);
1221 * Expose the kernel, the command line, and the initrd in fw_cfg.
1222 * We don't process them here at all, it's all left to the
1225 load_image_to_fw_cfg(fw_cfg
,
1226 FW_CFG_KERNEL_SIZE
, FW_CFG_KERNEL_DATA
,
1227 info
->kernel_filename
,
1228 try_decompressing_kernel
);
1229 load_image_to_fw_cfg(fw_cfg
,
1230 FW_CFG_INITRD_SIZE
, FW_CFG_INITRD_DATA
,
1231 info
->initrd_filename
, false);
1233 if (info
->kernel_cmdline
) {
1234 fw_cfg_add_i32(fw_cfg
, FW_CFG_CMDLINE_SIZE
,
1235 strlen(info
->kernel_cmdline
) + 1);
1236 fw_cfg_add_string(fw_cfg
, FW_CFG_CMDLINE_DATA
,
1237 info
->kernel_cmdline
);
1242 * We will start from address 0 (typically a boot ROM image) in the
1243 * same way as hardware. Leave env->boot_info NULL, so that
1244 * do_cpu_reset() knows it does not need to alter the PC on reset.
1248 void arm_load_kernel(ARMCPU
*cpu
, MachineState
*ms
, struct arm_boot_info
*info
)
1251 AddressSpace
*as
= arm_boot_address_space(cpu
, info
);
1253 CPUARMState
*env
= &cpu
->env
;
1257 * CPU objects (unlike devices) are not automatically reset on system
1258 * reset, so we must always register a handler to do so. If we're
1259 * actually loading a kernel, the handler is also responsible for
1260 * arranging that we start it correctly.
1262 for (cs
= first_cpu
; cs
; cs
= CPU_NEXT(cs
)) {
1263 qemu_register_reset(do_cpu_reset
, ARM_CPU(cs
));
1268 * The board code is not supposed to set secure_board_setup unless
1269 * running its code in secure mode is actually possible, and KVM
1270 * doesn't support secure.
1272 assert(!(info
->secure_board_setup
&& kvm_enabled()));
1273 info
->kernel_filename
= ms
->kernel_filename
;
1274 info
->kernel_cmdline
= ms
->kernel_cmdline
;
1275 info
->initrd_filename
= ms
->initrd_filename
;
1276 info
->dtb_filename
= ms
->dtb
;
1277 info
->dtb_limit
= 0;
1279 /* Load the kernel. */
1280 if (!info
->kernel_filename
|| info
->firmware_loaded
) {
1281 arm_setup_firmware_boot(cpu
, info
);
1283 arm_setup_direct_kernel_boot(cpu
, info
);
1287 * Disable the PSCI conduit if it is set up to target the same
1288 * or a lower EL than the one we're going to start the guest code in.
1289 * This logic needs to agree with the code in do_cpu_reset() which
1290 * decides whether we're going to boot the guest in the highest
1291 * supported exception level or in a lower one.
1295 * If PSCI is enabled, then SMC calls all go to the PSCI handler and
1296 * are never emulated to trap into guest code. It therefore does not
1297 * make sense for the board to have a setup code fragment that runs
1298 * in Secure, because this will probably need to itself issue an SMC of some
1299 * kind as part of its operation.
1301 assert(info
->psci_conduit
== QEMU_PSCI_CONDUIT_DISABLED
||
1302 !info
->secure_board_setup
);
1304 /* Boot into highest supported EL ... */
1305 if (arm_feature(env
, ARM_FEATURE_EL3
)) {
1307 } else if (arm_feature(env
, ARM_FEATURE_EL2
)) {
1312 /* ...except that if we're booting Linux we adjust the EL we boot into */
1313 if (info
->is_linux
&& !info
->secure_boot
) {
1314 boot_el
= arm_feature(env
, ARM_FEATURE_EL2
) ? 2 : 1;
1317 if ((info
->psci_conduit
== QEMU_PSCI_CONDUIT_HVC
&& boot_el
>= 2) ||
1318 (info
->psci_conduit
== QEMU_PSCI_CONDUIT_SMC
&& boot_el
== 3)) {
1319 info
->psci_conduit
= QEMU_PSCI_CONDUIT_DISABLED
;
1322 if (info
->psci_conduit
!= QEMU_PSCI_CONDUIT_DISABLED
) {
1323 for (cs
= first_cpu
; cs
; cs
= CPU_NEXT(cs
)) {
1324 Object
*cpuobj
= OBJECT(cs
);
1326 object_property_set_int(cpuobj
, "psci-conduit", info
->psci_conduit
,
1329 * Secondary CPUs start in PSCI powered-down state. Like the
1330 * code in do_cpu_reset(), we assume first_cpu is the primary
1333 if (cs
!= first_cpu
) {
1334 object_property_set_bool(cpuobj
, "start-powered-off", true,
1340 if (info
->psci_conduit
== QEMU_PSCI_CONDUIT_DISABLED
&&
1341 info
->is_linux
&& nb_cpus
> 1) {
1343 * We're booting Linux but not using PSCI, so for SMP we need
1344 * to write a custom secondary CPU boot loader stub, and arrange
1345 * for the secondary CPU reset to make the accompanying initialization.
1347 if (!info
->secondary_cpu_reset_hook
) {
1348 info
->secondary_cpu_reset_hook
= default_reset_secondary
;
1350 if (!info
->write_secondary_boot
) {
1351 info
->write_secondary_boot
= default_write_secondary
;
1353 info
->write_secondary_boot(cpu
, info
);
1356 * No secondary boot stub; don't use the reset hook that would
1357 * have set the CPU up to call it
1359 info
->write_secondary_boot
= NULL
;
1360 info
->secondary_cpu_reset_hook
= NULL
;
1364 * arm_load_dtb() may add a PSCI node so it must be called after we have
1365 * decided whether to enable PSCI and set the psci-conduit CPU properties.
1367 if (!info
->skip_dtb_autoload
&& have_dtb(info
)) {
1368 if (arm_load_dtb(info
->dtb_start
, info
, info
->dtb_limit
, as
, ms
) < 0) {
1374 static const TypeInfo arm_linux_boot_if_info
= {
1375 .name
= TYPE_ARM_LINUX_BOOT_IF
,
1376 .parent
= TYPE_INTERFACE
,
1377 .class_size
= sizeof(ARMLinuxBootIfClass
),
1380 static void arm_linux_boot_register_types(void)
1382 type_register_static(&arm_linux_boot_if_info
);
1385 type_init(arm_linux_boot_register_types
)