2 * Machine specific setup for xen
4 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
7 #include <linux/module.h>
8 #include <linux/sched.h>
11 #include <linux/memblock.h>
16 #include <asm/setup.h>
18 #include <asm/xen/hypervisor.h>
19 #include <asm/xen/hypercall.h>
23 #include <xen/interface/callback.h>
24 #include <xen/interface/memory.h>
25 #include <xen/interface/physdev.h>
26 #include <xen/features.h>
31 /* These are code, but not functions. Defined in entry.S */
32 extern const char xen_hypervisor_callback
[];
33 extern const char xen_failsafe_callback
[];
34 extern void xen_sysenter_target(void);
35 extern void xen_syscall_target(void);
36 extern void xen_syscall32_target(void);
38 /* Amount of extra memory space we add to the e820 ranges */
39 phys_addr_t xen_extra_mem_start
, xen_extra_mem_size
;
42 * The maximum amount of extra memory compared to the base size. The
43 * main scaling factor is the size of struct page. At extreme ratios
44 * of base:extra, all the base memory can be filled with page
45 * structures for the extra memory, leaving no space for anything
48 * 10x seems like a reasonable balance between scaling flexibility and
49 * leaving a practically usable system.
51 #define EXTRA_MEM_RATIO (10)
53 static void __init
xen_add_extra_mem(unsigned long pages
)
57 u64 size
= (u64
)pages
* PAGE_SIZE
;
58 u64 extra_start
= xen_extra_mem_start
+ xen_extra_mem_size
;
63 e820_add_region(extra_start
, size
, E820_RAM
);
64 sanitize_e820_map(e820
.map
, ARRAY_SIZE(e820
.map
), &e820
.nr_map
);
66 memblock_x86_reserve_range(extra_start
, extra_start
+ size
, "XEN EXTRA");
68 xen_extra_mem_size
+= size
;
70 xen_max_p2m_pfn
= PFN_DOWN(extra_start
+ size
);
72 for (pfn
= PFN_DOWN(extra_start
); pfn
<= xen_max_p2m_pfn
; pfn
++)
73 __set_phys_to_machine(pfn
, INVALID_P2M_ENTRY
);
76 static unsigned long __init
xen_release_chunk(phys_addr_t start_addr
,
79 struct xen_memory_reservation reservation
= {
84 unsigned long start
, end
;
85 unsigned long len
= 0;
89 start
= PFN_UP(start_addr
);
90 end
= PFN_DOWN(end_addr
);
95 for(pfn
= start
; pfn
< end
; pfn
++) {
96 unsigned long mfn
= pfn_to_mfn(pfn
);
98 /* Make sure pfn exists to start with */
99 if (mfn
== INVALID_P2M_ENTRY
|| mfn_to_pfn(mfn
) != pfn
)
102 set_xen_guest_handle(reservation
.extent_start
, &mfn
);
103 reservation
.nr_extents
= 1;
105 ret
= HYPERVISOR_memory_op(XENMEM_decrease_reservation
,
107 WARN(ret
!= 1, "Failed to release pfn %lx err=%d\n", pfn
, ret
);
109 __set_phys_to_machine(pfn
, INVALID_P2M_ENTRY
);
113 printk(KERN_INFO
"Freeing %lx-%lx pfn range: %lu pages freed\n",
119 static unsigned long __init
xen_return_unused_memory(unsigned long max_pfn
,
120 const struct e820map
*e820
)
122 phys_addr_t max_addr
= PFN_PHYS(max_pfn
);
123 phys_addr_t last_end
= ISA_END_ADDRESS
;
124 unsigned long released
= 0;
127 /* Free any unused memory above the low 1Mbyte. */
128 for (i
= 0; i
< e820
->nr_map
&& last_end
< max_addr
; i
++) {
129 phys_addr_t end
= e820
->map
[i
].addr
;
130 end
= min(max_addr
, end
);
133 released
+= xen_release_chunk(last_end
, end
);
134 last_end
= max(last_end
, e820
->map
[i
].addr
+ e820
->map
[i
].size
);
137 if (last_end
< max_addr
)
138 released
+= xen_release_chunk(last_end
, max_addr
);
140 printk(KERN_INFO
"released %lu pages of unused memory\n", released
);
144 static unsigned long __init
xen_set_identity(const struct e820entry
*list
,
147 phys_addr_t last
= xen_initial_domain() ? 0 : ISA_END_ADDRESS
;
148 phys_addr_t start_pci
= last
;
149 const struct e820entry
*entry
;
150 unsigned long identity
= 0;
153 for (i
= 0, entry
= list
; i
< map_size
; i
++, entry
++) {
154 phys_addr_t start
= entry
->addr
;
155 phys_addr_t end
= start
+ entry
->size
;
163 /* Skip over the 1MB region. */
167 if ((entry
->type
== E820_RAM
) || (entry
->type
== E820_UNUSABLE
)) {
168 if (start
> start_pci
)
169 identity
+= set_phys_range_identity(
170 PFN_UP(start_pci
), PFN_DOWN(start
));
172 /* Without saving 'last' we would gooble RAM too
173 * at the end of the loop. */
178 start_pci
= min(start
, start_pci
);
181 if (last
> start_pci
)
182 identity
+= set_phys_range_identity(
183 PFN_UP(start_pci
), PFN_DOWN(last
));
187 static unsigned long __init
xen_get_max_pages(void)
189 unsigned long max_pages
= MAX_DOMAIN_PAGES
;
190 domid_t domid
= DOMID_SELF
;
193 ret
= HYPERVISOR_memory_op(XENMEM_maximum_reservation
, &domid
);
196 return min(max_pages
, MAX_DOMAIN_PAGES
);
200 * machine_specific_memory_setup - Hook for machine specific memory setup.
202 char * __init
xen_memory_setup(void)
204 static struct e820entry map
[E820MAX
] __initdata
;
205 static struct e820entry map_raw
[E820MAX
] __initdata
;
207 unsigned long max_pfn
= xen_start_info
->nr_pages
;
208 unsigned long long mem_end
;
210 struct xen_memory_map memmap
;
211 unsigned long extra_pages
= 0;
212 unsigned long extra_limit
;
213 unsigned long identity_pages
= 0;
217 max_pfn
= min(MAX_DOMAIN_PAGES
, max_pfn
);
218 mem_end
= PFN_PHYS(max_pfn
);
220 memmap
.nr_entries
= E820MAX
;
221 set_xen_guest_handle(memmap
.buffer
, map
);
223 op
= xen_initial_domain() ?
224 XENMEM_machine_memory_map
:
226 rc
= HYPERVISOR_memory_op(op
, &memmap
);
228 BUG_ON(xen_initial_domain());
229 memmap
.nr_entries
= 1;
231 map
[0].size
= mem_end
;
232 /* 8MB slack (to balance backend allocations). */
233 map
[0].size
+= 8ULL << 20;
234 map
[0].type
= E820_RAM
;
239 memcpy(map_raw
, map
, sizeof(map
));
241 xen_extra_mem_start
= mem_end
;
242 for (i
= 0; i
< memmap
.nr_entries
; i
++) {
243 unsigned long long end
;
245 /* Guard against non-page aligned E820 entries. */
246 if (map
[i
].type
== E820_RAM
)
247 map
[i
].size
-= (map
[i
].size
+ map
[i
].addr
) % PAGE_SIZE
;
249 end
= map
[i
].addr
+ map
[i
].size
;
250 if (map
[i
].type
== E820_RAM
&& end
> mem_end
) {
251 /* RAM off the end - may be partially included */
252 u64 delta
= min(map
[i
].size
, end
- mem_end
);
254 map
[i
].size
-= delta
;
257 extra_pages
+= PFN_DOWN(delta
);
259 * Set RAM below 4GB that is not for us to be unusable.
260 * This prevents "System RAM" address space from being
261 * used as potential resource for I/O address (happens
262 * when 'allocate_resource' is called).
265 (xen_initial_domain() && end
< 0x100000000ULL
))
266 e820_add_region(end
, delta
, E820_UNUSABLE
);
269 if (map
[i
].size
> 0 && end
> xen_extra_mem_start
)
270 xen_extra_mem_start
= end
;
272 /* Add region if any remains */
274 e820_add_region(map
[i
].addr
, map
[i
].size
, map
[i
].type
);
276 /* Align the balloon area so that max_low_pfn does not get set
277 * to be at the _end_ of the PCI gap at the far end (fee01000).
278 * Note that xen_extra_mem_start gets set in the loop above to be
279 * past the last E820 region. */
280 if (xen_initial_domain() && (xen_extra_mem_start
< (1ULL<<32)))
281 xen_extra_mem_start
= (1ULL<<32);
284 * In domU, the ISA region is normal, usable memory, but we
285 * reserve ISA memory anyway because too many things poke
288 * In Dom0, the host E820 information can leave gaps in the
289 * ISA range, which would cause us to release those pages. To
290 * avoid this, we unconditionally reserve them here.
292 e820_add_region(ISA_START_ADDRESS
, ISA_END_ADDRESS
- ISA_START_ADDRESS
,
299 * See comment above "struct start_info" in <xen/interface/xen.h>
301 memblock_x86_reserve_range(__pa(xen_start_info
->mfn_list
),
302 __pa(xen_start_info
->pt_base
),
305 sanitize_e820_map(e820
.map
, ARRAY_SIZE(e820
.map
), &e820
.nr_map
);
307 extra_limit
= xen_get_max_pages();
308 if (extra_limit
>= max_pfn
)
309 extra_pages
= extra_limit
- max_pfn
;
313 extra_pages
+= xen_return_unused_memory(xen_start_info
->nr_pages
, &e820
);
316 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
317 * factor the base size. On non-highmem systems, the base
318 * size is the full initial memory allocation; on highmem it
319 * is limited to the max size of lowmem, so that it doesn't
320 * get completely filled.
322 * In principle there could be a problem in lowmem systems if
323 * the initial memory is also very large with respect to
324 * lowmem, but we won't try to deal with that here.
326 extra_limit
= min(EXTRA_MEM_RATIO
* min(max_pfn
, PFN_DOWN(MAXMEM
)),
327 max_pfn
+ extra_pages
);
329 if (extra_limit
>= max_pfn
)
330 extra_pages
= extra_limit
- max_pfn
;
334 xen_add_extra_mem(extra_pages
);
337 * Set P2M for all non-RAM pages and E820 gaps to be identity
338 * type PFNs. We supply it with the non-sanitized version
341 identity_pages
= xen_set_identity(map_raw
, memmap
.nr_entries
);
342 printk(KERN_INFO
"Set %ld page(s) to 1-1 mapping.\n", identity_pages
);
347 * Set the bit indicating "nosegneg" library variants should be used.
348 * We only need to bother in pure 32-bit mode; compat 32-bit processes
349 * can have un-truncated segments, so wrapping around is allowed.
351 static void __init
fiddle_vdso(void)
355 mask
= VDSO32_SYMBOL(&vdso32_int80_start
, NOTE_MASK
);
356 *mask
|= 1 << VDSO_NOTE_NONEGSEG_BIT
;
357 mask
= VDSO32_SYMBOL(&vdso32_sysenter_start
, NOTE_MASK
);
358 *mask
|= 1 << VDSO_NOTE_NONEGSEG_BIT
;
362 static int __cpuinit
register_callback(unsigned type
, const void *func
)
364 struct callback_register callback
= {
366 .address
= XEN_CALLBACK(__KERNEL_CS
, func
),
367 .flags
= CALLBACKF_mask_events
,
370 return HYPERVISOR_callback_op(CALLBACKOP_register
, &callback
);
373 void __cpuinit
xen_enable_sysenter(void)
376 unsigned sysenter_feature
;
379 sysenter_feature
= X86_FEATURE_SEP
;
381 sysenter_feature
= X86_FEATURE_SYSENTER32
;
384 if (!boot_cpu_has(sysenter_feature
))
387 ret
= register_callback(CALLBACKTYPE_sysenter
, xen_sysenter_target
);
389 setup_clear_cpu_cap(sysenter_feature
);
392 void __cpuinit
xen_enable_syscall(void)
397 ret
= register_callback(CALLBACKTYPE_syscall
, xen_syscall_target
);
399 printk(KERN_ERR
"Failed to set syscall callback: %d\n", ret
);
400 /* Pretty fatal; 64-bit userspace has no other
401 mechanism for syscalls. */
404 if (boot_cpu_has(X86_FEATURE_SYSCALL32
)) {
405 ret
= register_callback(CALLBACKTYPE_syscall32
,
406 xen_syscall32_target
);
408 setup_clear_cpu_cap(X86_FEATURE_SYSCALL32
);
410 #endif /* CONFIG_X86_64 */
413 void __init
xen_arch_setup(void)
415 xen_panic_handler_init();
417 HYPERVISOR_vm_assist(VMASST_CMD_enable
, VMASST_TYPE_4gb_segments
);
418 HYPERVISOR_vm_assist(VMASST_CMD_enable
, VMASST_TYPE_writable_pagetables
);
420 if (!xen_feature(XENFEAT_auto_translated_physmap
))
421 HYPERVISOR_vm_assist(VMASST_CMD_enable
,
422 VMASST_TYPE_pae_extended_cr3
);
424 if (register_callback(CALLBACKTYPE_event
, xen_hypervisor_callback
) ||
425 register_callback(CALLBACKTYPE_failsafe
, xen_failsafe_callback
))
428 xen_enable_sysenter();
429 xen_enable_syscall();
432 if (!(xen_start_info
->flags
& SIF_INITDOMAIN
)) {
433 printk(KERN_INFO
"ACPI in unprivileged domain disabled\n");
438 memcpy(boot_command_line
, xen_start_info
->cmd_line
,
439 MAX_GUEST_CMDLINE
> COMMAND_LINE_SIZE
?
440 COMMAND_LINE_SIZE
: MAX_GUEST_CMDLINE
);
442 /* Set up idle, making sure it calls safe_halt() pvop */
444 boot_cpu_data
.hlt_works_ok
= 1;
446 pm_idle
= default_idle
;
447 boot_option_idle_override
= IDLE_HALT
;