Fix 32-bit overflow in parallels image support
[qemu-kvm/fedora.git] / pc-bios / bios-pq / 0014_add-srat-acpi-table-support.patch
bloba5227df259765bcdd37ddf5829b7daf54f729ac2
1 add SRAT ACPI table support (Andre Przywara)
3 Take NUMA topology info from the QEMU firmware configuration interface
4 (number of nodes, node for each (V)CPU and amount of memory) and build
5 a SRAT table describing this topology for the guest OS. Handles more than
6 4 GB of RAM by including a hole for 32bit PCI memory mapping.
8 Signed-off-by: Andre Przywara <andre.przywara@amd.com>
9 Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
11 diff --git a/bios/rombios32.c b/bios/rombios32.c
12 index 49dfd62..d8f6d4e 100644
13 --- a/bios/rombios32.c
14 +++ b/bios/rombios32.c
15 @@ -450,6 +450,11 @@ int pm_sci_int;
16 unsigned long bios_table_cur_addr;
17 unsigned long bios_table_end_addr;
19 +static inline uint64_t le64_to_cpu(uint64_t x)
21 + return x;
24 void wrmsr_smp(uint32_t index, uint64_t val)
26 static struct { uint32_t ecx, eax, edx; } *p = (void *)SMP_MSR_ADDR;
27 @@ -468,6 +473,7 @@ void wrmsr_smp(uint32_t index, uint64_t val)
28 #define QEMU_CFG_SIGNATURE 0x00
29 #define QEMU_CFG_ID 0x01
30 #define QEMU_CFG_UUID 0x02
31 +#define QEMU_CFG_NUMA 0x0D
32 #define QEMU_CFG_ARCH_LOCAL 0x8000
33 #define QEMU_CFG_ACPI_TABLES (QEMU_CFG_ARCH_LOCAL + 0)
34 #define QEMU_CFG_SMBIOS_ENTRIES (QEMU_CFG_ARCH_LOCAL + 1)
35 @@ -529,6 +535,14 @@ static uint16_t smbios_entries(void)
37 return cnt;
40 +uint64_t qemu_cfg_get64 (void)
42 + uint64_t ret;
44 + qemu_cfg_read((uint8_t*)&ret, 8);
45 + return le64_to_cpu(ret);
47 #endif
49 void cpu_probe(void)
50 @@ -1281,7 +1295,7 @@ struct rsdt_descriptor_rev1
52 ACPI_TABLE_HEADER_DEF /* ACPI common table header */
53 #ifdef BX_QEMU
54 - uint32_t table_offset_entry [4]; /* Array of pointers to other */
55 + uint32_t table_offset_entry [5]; /* Array of pointers to other */
56 #else
57 uint32_t table_offset_entry [3]; /* Array of pointers to other */
58 #endif
59 @@ -1389,7 +1403,7 @@ struct multiple_apic_table
60 } __attribute__((__packed__));
63 -/* Values for Type in APIC_HEADER_DEF */
64 +/* Values for Type in APIC sub-headers */
66 #define APIC_PROCESSOR 0
67 #define APIC_IO 1
68 @@ -1402,18 +1416,18 @@ struct multiple_apic_table
69 #define APIC_XRUPT_SOURCE 8
70 #define APIC_RESERVED 9 /* 9 and greater are reserved */
72 -/*
73 - * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE)
74 - */
75 -#define APIC_HEADER_DEF /* Common APIC sub-structure header */\
76 +#define ACPI_SUB_HEADER_DEF /* Common ACPI sub-structure header */\
77 uint8_t type; \
78 uint8_t length;
80 +/*
81 + * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE)
82 + */
83 /* Sub-structures for MADT */
85 struct madt_processor_apic
87 - APIC_HEADER_DEF
88 + ACPI_SUB_HEADER_DEF
89 uint8_t processor_id; /* ACPI processor id */
90 uint8_t local_apic_id; /* Processor's local APIC id */
91 #if 0
92 @@ -1424,6 +1438,43 @@ struct madt_processor_apic
93 #endif
94 } __attribute__((__packed__));
96 +/*
97 + * SRAT (NUMA topology description) table
98 + */
100 +#define SRAT_PROCESSOR 0
101 +#define SRAT_MEMORY 1
103 +struct system_resource_affinity_table
105 + ACPI_TABLE_HEADER_DEF
106 + uint32_t reserved1;
107 + uint32_t reserved2[2];
110 +struct srat_processor_affinity
112 + ACPI_SUB_HEADER_DEF
113 + uint8_t proximity_lo;
114 + uint8_t local_apic_id;
115 + uint32_t flags;
116 + uint8_t local_sapic_eid;
117 + uint8_t proximity_hi[3];
118 + uint32_t reserved;
121 +struct srat_memory_affinity
123 + ACPI_SUB_HEADER_DEF
124 + uint8_t proximity[4];
125 + uint16_t reserved1;
126 + uint32_t base_addr_low,base_addr_high;
127 + uint32_t length_low,length_high;
128 + uint32_t reserved2;
129 + uint32_t flags;
130 + uint32_t reserved3[2];
133 #ifdef BX_QEMU
135 * * ACPI 2.0 Generic Address Space definition.
136 @@ -1452,7 +1503,7 @@ struct acpi_20_hpet {
138 struct madt_io_apic
140 - APIC_HEADER_DEF
141 + ACPI_SUB_HEADER_DEF
142 uint8_t io_apic_id; /* I/O APIC ID */
143 uint8_t reserved; /* Reserved - must be zero */
144 uint32_t address; /* APIC physical address */
145 @@ -1463,7 +1514,7 @@ struct madt_io_apic
146 #ifdef BX_QEMU
147 struct madt_int_override
149 - APIC_HEADER_DEF
150 + ACPI_SUB_HEADER_DEF
151 uint8_t bus; /* Identifies ISA Bus */
152 uint8_t source; /* Bus-relative interrupt source */
153 uint32_t gsi; /* GSI that source will signal */
154 @@ -1567,6 +1618,21 @@ int acpi_build_processor_ssdt(uint8_t *ssdt)
155 return ssdt_ptr - ssdt;
158 +static void acpi_build_srat_memory(struct srat_memory_affinity *numamem,
159 + uint64_t base, uint64_t len, int node, int enabled)
161 + numamem->type = SRAT_MEMORY;
162 + numamem->length = sizeof(*numamem);
163 + memset (numamem->proximity, 0 ,4);
164 + numamem->proximity[0] = node;
165 + numamem->flags = cpu_to_le32(!!enabled);
166 + numamem->base_addr_low = base & 0xFFFFFFFF;
167 + numamem->base_addr_high = base >> 32;
168 + numamem->length_low = len & 0xFFFFFFFF;
169 + numamem->length_high = len >> 32;
170 + return;
173 /* base_addr must be a multiple of 4KB */
174 void acpi_bios_init(void)
176 @@ -1577,12 +1643,15 @@ void acpi_bios_init(void)
177 struct multiple_apic_table *madt;
178 uint8_t *dsdt, *ssdt;
179 #ifdef BX_QEMU
180 + struct system_resource_affinity_table *srat;
181 struct acpi_20_hpet *hpet;
182 uint32_t hpet_addr;
183 #endif
184 uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr, dsdt_addr, ssdt_addr;
185 uint32_t acpi_tables_size, madt_addr, madt_size, rsdt_size;
186 + uint32_t srat_addr,srat_size;
187 uint16_t i, external_tables;
188 + int nb_numa_nodes;
190 /* reserve memory space for tables */
191 #ifdef BX_USE_EBDA_TABLES
192 @@ -1624,6 +1693,25 @@ void acpi_bios_init(void)
193 ssdt_addr = addr;
194 ssdt = (void *)(addr);
195 addr += acpi_build_processor_ssdt(ssdt);
196 +#ifdef BX_QEMU
197 + qemu_cfg_select(QEMU_CFG_NUMA);
198 + nb_numa_nodes = qemu_cfg_get64();
199 +#else
200 + nb_numa_nodes = 0;
201 +#endif
202 + if (nb_numa_nodes > 0) {
203 + addr = (addr + 7) & ~7;
204 + srat_addr = addr;
205 + srat_size = sizeof(*srat) +
206 + sizeof(struct srat_processor_affinity) * smp_cpus +
207 + sizeof(struct srat_memory_affinity) * (nb_numa_nodes + 2);
208 + srat = (void *)(addr);
209 + addr += srat_size;
210 + } else {
211 + srat_addr = addr;
212 + srat = (void*)(addr);
213 + srat_size = 0;
216 addr = (addr + 7) & ~7;
217 madt_addr = addr;
218 @@ -1733,6 +1821,69 @@ void acpi_bios_init(void)
220 memset(rsdt, 0, rsdt_size);
221 #ifdef BX_QEMU
222 + /* SRAT */
223 + if (nb_numa_nodes > 0) {
224 + struct srat_processor_affinity *core;
225 + struct srat_memory_affinity *numamem;
226 + int slots;
227 + uint64_t mem_len, mem_base, next_base = 0, curnode;
229 + qemu_cfg_select(QEMU_CFG_NUMA);
230 + qemu_cfg_get64();
231 + memset (srat, 0 , srat_size);
232 + srat->reserved1=1;
234 + core = (void*)(srat + 1);
235 + for (i = 0; i < smp_cpus; ++i) {
236 + core->type = SRAT_PROCESSOR;
237 + core->length = sizeof(*core);
238 + core->local_apic_id = i;
239 + curnode = qemu_cfg_get64();
240 + core->proximity_lo = curnode;
241 + memset (core->proximity_hi, 0, 3);
242 + core->local_sapic_eid = 0;
243 + if (i < smp_cpus)
244 + core->flags = cpu_to_le32(1);
245 + else
246 + core->flags = 0;
247 + core++;
250 + /* the memory map is a bit tricky, it contains at least one hole
251 + * from 640k-1M and possibly another one from 3.5G-4G.
252 + */
253 + numamem = (void*)core; slots = 0;
254 + acpi_build_srat_memory(numamem, 0, 640*1024, 0, 1);
255 + next_base = 1024 * 1024; numamem++;slots++;
256 + for (i = 1; i < nb_numa_nodes + 1; ++i) {
257 + mem_base = next_base;
258 + mem_len = qemu_cfg_get64();
259 + if (i == 1) mem_len -= 1024 * 1024;
260 + next_base = mem_base + mem_len;
262 + /* Cut out the PCI hole */
263 + if (mem_base <= ram_size && next_base > ram_size) {
264 + mem_len -= next_base - ram_size;
265 + if (mem_len > 0) {
266 + acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1);
267 + numamem++; slots++;
269 + mem_base = 1ULL << 32;
270 + mem_len = next_base - ram_size;
271 + next_base += (1ULL << 32) - ram_size;
273 + acpi_build_srat_memory(numamem, mem_base, mem_len, i-1, 1);
274 + numamem++; slots++;
276 + for (; slots < nb_numa_nodes + 2; slots++) {
277 + acpi_build_srat_memory(numamem, 0, 0, 0, 0);
278 + numamem++;
281 + acpi_build_table_header((struct acpi_table_header *)srat,
282 + "SRAT", srat_size, 1);
285 /* HPET */
286 memset(hpet, 0, sizeof(*hpet));
287 /* Note timer_block_id value must be kept in sync with value advertised by
288 @@ -1761,9 +1912,11 @@ void acpi_bios_init(void)
289 rsdt->table_offset_entry[2] = cpu_to_le32(ssdt_addr);
290 #ifdef BX_QEMU
291 rsdt->table_offset_entry[3] = cpu_to_le32(hpet_addr);
292 + if (nb_numa_nodes > 0)
293 + rsdt->table_offset_entry[4] = cpu_to_le32(srat_addr);
294 #endif
295 - acpi_build_table_header((struct acpi_table_header *)rsdt,
296 - "RSDT", rsdt_size, 1);
297 + acpi_build_table_header((struct acpi_table_header *)rsdt, "RSDT",
298 + rsdt_size - (nb_numa_nodes > 0? 0: sizeof(uint32_t)), 1);
300 acpi_tables_size = addr - base_addr;
303 1.6.1.3