Import 2.3.41pre2
[davej-history.git] / arch / sparc64 / mm / init.c
blob1be2716f54a31e5bfc5a86f930c9ede058456e63
1 /* $Id: init.c,v 1.143 1999/12/16 16:15:14 davem Exp $
2 * arch/sparc64/mm/init.c
4 * Copyright (C) 1996-1999 David S. Miller (davem@caip.rutgers.edu)
5 * Copyright (C) 1997-1999 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
6 */
8 #include <linux/config.h>
9 #include <linux/kernel.h>
10 #include <linux/sched.h>
11 #include <linux/string.h>
12 #include <linux/init.h>
13 #include <linux/bootmem.h>
14 #include <linux/mm.h>
15 #include <linux/malloc.h>
16 #include <linux/blk.h>
17 #include <linux/swap.h>
18 #include <linux/swapctl.h>
20 #include <asm/head.h>
21 #include <asm/system.h>
22 #include <asm/page.h>
23 #include <asm/pgalloc.h>
24 #include <asm/pgtable.h>
25 #include <asm/oplib.h>
26 #include <asm/iommu.h>
27 #include <asm/io.h>
28 #include <asm/uaccess.h>
29 #include <asm/mmu_context.h>
30 #include <asm/vaddrs.h>
31 #include <asm/dma.h>
33 extern void show_net_buffers(void);
34 extern void device_scan(void);
36 struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS];
38 unsigned long *sparc64_valid_addr_bitmap;
40 /* Ugly, but necessary... -DaveM */
41 unsigned long phys_base;
43 static unsigned long totalram_pages = 0;
45 /* get_new_mmu_context() uses "cache + 1". */
46 spinlock_t ctx_alloc_lock = SPIN_LOCK_UNLOCKED;
47 unsigned long tlb_context_cache = CTX_FIRST_VERSION - 1;
48 #define CTX_BMAP_SLOTS (1UL << (CTX_VERSION_SHIFT - 6))
49 unsigned long mmu_context_bmap[CTX_BMAP_SLOTS];
51 /* References to section boundaries */
52 extern char __init_begin, __init_end, _start, _end, etext, edata;
54 int do_check_pgt_cache(int low, int high)
56 int freed = 0;
58 if(pgtable_cache_size > high) {
59 do {
60 #ifdef __SMP__
61 if(pgd_quicklist)
62 free_pgd_slow(get_pgd_fast()), freed++;
63 #endif
64 if(pte_quicklist[0])
65 free_pte_slow(get_pte_fast(0)), freed++;
66 if(pte_quicklist[1])
67 free_pte_slow(get_pte_fast(1)), freed++;
68 } while(pgtable_cache_size > low);
70 #ifndef __SMP__
71 if (pgd_cache_size > high / 4) {
72 struct page *page, *page2;
73 for (page2 = NULL, page = (struct page *)pgd_quicklist; page;) {
74 if ((unsigned long)page->pprev_hash == 3) {
75 if (page2)
76 page2->next_hash = page->next_hash;
77 else
78 (struct page *)pgd_quicklist = page->next_hash;
79 page->next_hash = NULL;
80 page->pprev_hash = NULL;
81 pgd_cache_size -= 2;
82 __free_page(page);
83 freed++;
84 if (page2)
85 page = page2->next_hash;
86 else
87 page = (struct page *)pgd_quicklist;
88 if (pgd_cache_size <= low / 4)
89 break;
90 continue;
92 page2 = page;
93 page = page->next_hash;
96 #endif
97 return freed;
101 * BAD_PAGE is the page that is used for page faults when linux
102 * is out-of-memory. Older versions of linux just did a
103 * do_exit(), but using this instead means there is less risk
104 * for a process dying in kernel mode, possibly leaving an inode
105 * unused etc..
107 * BAD_PAGETABLE is the accompanying page-table: it is initialized
108 * to point to BAD_PAGE entries.
110 * ZERO_PAGE is a special page that is used for zero-initialized
111 * data and COW.
113 pte_t __bad_page(void)
115 memset((void *) &empty_bad_page, 0, PAGE_SIZE);
116 return pte_mkdirty(mk_pte_phys((((unsigned long) &empty_bad_page)
117 - ((unsigned long)&empty_zero_page)
118 + phys_base),
119 PAGE_SHARED));
122 void show_mem(void)
124 printk("Mem-info:\n");
125 show_free_areas();
126 printk("Free swap: %6dkB\n",
127 nr_swap_pages << (PAGE_SHIFT-10));
128 printk("%ld pages of RAM\n", totalram_pages);
129 printk("%d free pages\n", nr_free_pages());
130 printk("%d pages in page table cache\n",pgtable_cache_size);
131 #ifndef __SMP__
132 printk("%d entries in page dir cache\n",pgd_cache_size);
133 #endif
134 show_buffers();
135 #ifdef CONFIG_NET
136 show_net_buffers();
137 #endif
140 int mmu_info(char *buf)
142 /* We'll do the rest later to make it nice... -DaveM */
143 #if 0
144 if (this_is_cheetah)
145 sprintf(buf, "MMU Type\t: One bad ass cpu\n");
146 else
147 #endif
148 return sprintf(buf, "MMU Type\t: Spitfire\n");
151 struct linux_prom_translation {
152 unsigned long virt;
153 unsigned long size;
154 unsigned long data;
157 extern unsigned long prom_boot_page;
158 extern void prom_remap(unsigned long physpage, unsigned long virtpage, int mmu_ihandle);
159 extern int prom_get_mmu_ihandle(void);
160 extern void register_prom_callbacks(void);
162 /* Exported for SMP bootup purposes. */
163 unsigned long kern_locked_tte_data;
165 void __init early_pgtable_allocfail(char *type)
167 prom_printf("inherit_prom_mappings: Cannot alloc kernel %s.\n", type);
168 prom_halt();
171 static void inherit_prom_mappings(void)
173 struct linux_prom_translation *trans;
174 unsigned long phys_page, tte_vaddr, tte_data;
175 void (*remap_func)(unsigned long, unsigned long, int);
176 pgd_t *pgdp;
177 pmd_t *pmdp;
178 pte_t *ptep;
179 int node, n, i, tsz;
181 node = prom_finddevice("/virtual-memory");
182 n = prom_getproplen(node, "translations");
183 if (n == 0 || n == -1) {
184 prom_printf("Couldn't get translation property\n");
185 prom_halt();
187 n += 5 * sizeof(struct linux_prom_translation);
188 for (tsz = 1; tsz < n; tsz <<= 1)
189 /* empty */;
190 trans = __alloc_bootmem(tsz, SMP_CACHE_BYTES, 0UL);
191 if (trans == NULL) {
192 prom_printf("inherit_prom_mappings: Cannot alloc translations.\n");
193 prom_halt();
195 memset(trans, 0, tsz);
197 if ((n = prom_getproperty(node, "translations", (char *)trans, tsz)) == -1) {
198 prom_printf("Couldn't get translation property\n");
199 prom_halt();
201 n = n / sizeof(*trans);
203 for (i = 0; i < n; i++) {
204 unsigned long vaddr;
206 if (trans[i].virt >= 0xf0000000 && trans[i].virt < 0x100000000) {
207 for (vaddr = trans[i].virt;
208 vaddr < trans[i].virt + trans[i].size;
209 vaddr += PAGE_SIZE) {
210 pgdp = pgd_offset(&init_mm, vaddr);
211 if (pgd_none(*pgdp)) {
212 pmdp = __alloc_bootmem(PMD_TABLE_SIZE,
213 PMD_TABLE_SIZE,
214 0UL);
215 if (pmdp == NULL)
216 early_pgtable_allocfail("pmd");
217 memset(pmdp, 0, PMD_TABLE_SIZE);
218 pgd_set(pgdp, pmdp);
220 pmdp = pmd_offset(pgdp, vaddr);
221 if (pmd_none(*pmdp)) {
222 ptep = __alloc_bootmem(PTE_TABLE_SIZE,
223 PTE_TABLE_SIZE,
224 0UL);
225 if (ptep == NULL)
226 early_pgtable_allocfail("pte");
227 memset(ptep, 0, PTE_TABLE_SIZE);
228 pmd_set(pmdp, ptep);
230 ptep = pte_offset(pmdp, vaddr);
231 set_pte (ptep, __pte(trans[i].data | _PAGE_MODIFIED));
232 trans[i].data += PAGE_SIZE;
237 /* Now fixup OBP's idea about where we really are mapped. */
238 prom_printf("Remapping the kernel... ");
239 phys_page = spitfire_get_dtlb_data(63) & _PAGE_PADDR;
240 phys_page += ((unsigned long)&prom_boot_page -
241 (unsigned long)&empty_zero_page);
243 /* Lock this into i/d tlb entry 59 */
244 __asm__ __volatile__(
245 "stxa %%g0, [%2] %3\n\t"
246 "stxa %0, [%1] %4\n\t"
247 "membar #Sync\n\t"
248 "flush %%g6\n\t"
249 "stxa %%g0, [%2] %5\n\t"
250 "stxa %0, [%1] %6\n\t"
251 "membar #Sync\n\t"
252 "flush %%g6"
253 : : "r" (phys_page | _PAGE_VALID | _PAGE_SZ8K | _PAGE_CP |
254 _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W),
255 "r" (59 << 3), "r" (TLB_TAG_ACCESS),
256 "i" (ASI_DMMU), "i" (ASI_DTLB_DATA_ACCESS),
257 "i" (ASI_IMMU), "i" (ASI_ITLB_DATA_ACCESS)
258 : "memory");
260 tte_vaddr = (unsigned long) &empty_zero_page;
261 kern_locked_tte_data = tte_data = spitfire_get_dtlb_data(63);
263 remap_func = (void *) ((unsigned long) &prom_remap -
264 (unsigned long) &prom_boot_page);
266 remap_func(spitfire_get_dtlb_data(63) & _PAGE_PADDR,
267 (unsigned long) &empty_zero_page,
268 prom_get_mmu_ihandle());
270 /* Flush out that temporary mapping. */
271 spitfire_flush_dtlb_nucleus_page(0x0);
272 spitfire_flush_itlb_nucleus_page(0x0);
274 /* Now lock us back into the TLBs via OBP. */
275 prom_dtlb_load(63, tte_data, tte_vaddr);
276 prom_itlb_load(63, tte_data, tte_vaddr);
278 /* Re-read translations property. */
279 if ((n = prom_getproperty(node, "translations", (char *)trans, tsz)) == -1) {
280 prom_printf("Couldn't get translation property\n");
281 prom_halt();
283 n = n / sizeof(*trans);
285 for (i = 0; i < n; i++) {
286 unsigned long vaddr = trans[i].virt;
287 unsigned long size = trans[i].size;
289 if (vaddr < 0xf0000000UL) {
290 unsigned long avoid_start = (unsigned long) &empty_zero_page;
291 unsigned long avoid_end = avoid_start + (4 * 1024 * 1024);
293 if (vaddr < avoid_start) {
294 unsigned long top = vaddr + size;
296 if (top > avoid_start)
297 top = avoid_start;
298 prom_unmap(top - vaddr, vaddr);
300 if ((vaddr + size) > avoid_end) {
301 unsigned long bottom = vaddr;
303 if (bottom < avoid_end)
304 bottom = avoid_end;
305 prom_unmap((vaddr + size) - bottom, bottom);
310 prom_printf("done.\n");
312 register_prom_callbacks();
315 /* The OBP specifications for sun4u mark 0xfffffffc00000000 and
316 * upwards as reserved for use by the firmware (I wonder if this
317 * will be the same on Cheetah...). We use this virtual address
318 * range for the VPTE table mappings of the nucleus so we need
319 * to zap them when we enter the PROM. -DaveM
321 static void __flush_nucleus_vptes(void)
323 unsigned long prom_reserved_base = 0xfffffffc00000000UL;
324 int i;
326 /* Only DTLB must be checked for VPTE entries. */
327 for(i = 0; i < 63; i++) {
328 unsigned long tag = spitfire_get_dtlb_tag(i);
330 if(((tag & ~(PAGE_MASK)) == 0) &&
331 ((tag & (PAGE_MASK)) >= prom_reserved_base)) {
332 __asm__ __volatile__("stxa %%g0, [%0] %1"
333 : /* no outputs */
334 : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
335 membar("#Sync");
336 spitfire_put_dtlb_data(i, 0x0UL);
337 membar("#Sync");
342 static int prom_ditlb_set = 0;
343 struct prom_tlb_entry {
344 int tlb_ent;
345 unsigned long tlb_tag;
346 unsigned long tlb_data;
348 struct prom_tlb_entry prom_itlb[8], prom_dtlb[8];
350 void prom_world(int enter)
352 unsigned long pstate;
353 int i;
355 if (!enter)
356 set_fs(current->thread.current_ds);
358 if (!prom_ditlb_set)
359 return;
361 /* Make sure the following runs atomically. */
362 __asm__ __volatile__("flushw\n\t"
363 "rdpr %%pstate, %0\n\t"
364 "wrpr %0, %1, %%pstate"
365 : "=r" (pstate)
366 : "i" (PSTATE_IE));
368 if (enter) {
369 /* Kick out nucleus VPTEs. */
370 __flush_nucleus_vptes();
372 /* Install PROM world. */
373 for (i = 0; i < 8; i++) {
374 if (prom_dtlb[i].tlb_ent != -1) {
375 __asm__ __volatile__("stxa %0, [%1] %2"
376 : : "r" (prom_dtlb[i].tlb_tag), "r" (TLB_TAG_ACCESS),
377 "i" (ASI_DMMU));
378 membar("#Sync");
379 spitfire_put_dtlb_data(prom_dtlb[i].tlb_ent,
380 prom_dtlb[i].tlb_data);
381 membar("#Sync");
384 if (prom_itlb[i].tlb_ent != -1) {
385 __asm__ __volatile__("stxa %0, [%1] %2"
386 : : "r" (prom_itlb[i].tlb_tag), "r" (TLB_TAG_ACCESS),
387 "i" (ASI_IMMU));
388 membar("#Sync");
389 spitfire_put_itlb_data(prom_itlb[i].tlb_ent,
390 prom_itlb[i].tlb_data);
391 membar("#Sync");
394 } else {
395 for (i = 0; i < 8; i++) {
396 if (prom_dtlb[i].tlb_ent != -1) {
397 __asm__ __volatile__("stxa %%g0, [%0] %1"
398 : : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
399 membar("#Sync");
400 spitfire_put_dtlb_data(prom_dtlb[i].tlb_ent, 0x0UL);
401 membar("#Sync");
403 if (prom_itlb[i].tlb_ent != -1) {
404 __asm__ __volatile__("stxa %%g0, [%0] %1"
405 : : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU));
406 membar("#Sync");
407 spitfire_put_itlb_data(prom_itlb[i].tlb_ent, 0x0UL);
408 membar("#Sync");
412 __asm__ __volatile__("wrpr %0, 0, %%pstate"
413 : : "r" (pstate));
416 void inherit_locked_prom_mappings(int save_p)
418 int i;
419 int dtlb_seen = 0;
420 int itlb_seen = 0;
422 /* Fucking losing PROM has more mappings in the TLB, but
423 * it (conveniently) fails to mention any of these in the
424 * translations property. The only ones that matter are
425 * the locked PROM tlb entries, so we impose the following
426 * irrecovable rule on the PROM, it is allowed 8 locked
427 * entries in the ITLB and 8 in the DTLB.
429 * Supposedly the upper 16GB of the address space is
430 * reserved for OBP, BUT I WISH THIS WAS DOCUMENTED
431 * SOMEWHERE!!!!!!!!!!!!!!!!! Furthermore the entire interface
432 * used between the client program and the firmware on sun5
433 * systems to coordinate mmu mappings is also COMPLETELY
434 * UNDOCUMENTED!!!!!! Thanks S(t)un!
436 if (save_p) {
437 for(i = 0; i < 8; i++) {
438 prom_dtlb[i].tlb_ent = -1;
439 prom_itlb[i].tlb_ent = -1;
442 for(i = 0; i < 63; i++) {
443 unsigned long data;
445 data = spitfire_get_dtlb_data(i);
446 if((data & (_PAGE_L|_PAGE_VALID)) == (_PAGE_L|_PAGE_VALID)) {
447 unsigned long tag = spitfire_get_dtlb_tag(i);
449 if(save_p) {
450 prom_dtlb[dtlb_seen].tlb_ent = i;
451 prom_dtlb[dtlb_seen].tlb_tag = tag;
452 prom_dtlb[dtlb_seen].tlb_data = data;
454 __asm__ __volatile__("stxa %%g0, [%0] %1"
455 : : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
456 membar("#Sync");
457 spitfire_put_dtlb_data(i, 0x0UL);
458 membar("#Sync");
460 dtlb_seen++;
461 if(dtlb_seen > 7)
462 break;
465 for(i = 0; i < 63; i++) {
466 unsigned long data;
468 data = spitfire_get_itlb_data(i);
469 if((data & (_PAGE_L|_PAGE_VALID)) == (_PAGE_L|_PAGE_VALID)) {
470 unsigned long tag = spitfire_get_itlb_tag(i);
472 if(save_p) {
473 prom_itlb[itlb_seen].tlb_ent = i;
474 prom_itlb[itlb_seen].tlb_tag = tag;
475 prom_itlb[itlb_seen].tlb_data = data;
477 __asm__ __volatile__("stxa %%g0, [%0] %1"
478 : : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU));
479 membar("#Sync");
480 spitfire_put_itlb_data(i, 0x0UL);
481 membar("#Sync");
483 itlb_seen++;
484 if(itlb_seen > 7)
485 break;
488 if (save_p)
489 prom_ditlb_set = 1;
492 /* Give PROM back his world, done during reboots... */
493 void prom_reload_locked(void)
495 int i;
497 for (i = 0; i < 8; i++) {
498 if (prom_dtlb[i].tlb_ent != -1) {
499 __asm__ __volatile__("stxa %0, [%1] %2"
500 : : "r" (prom_dtlb[i].tlb_tag), "r" (TLB_TAG_ACCESS),
501 "i" (ASI_DMMU));
502 membar("#Sync");
503 spitfire_put_dtlb_data(prom_dtlb[i].tlb_ent,
504 prom_dtlb[i].tlb_data);
505 membar("#Sync");
508 if (prom_itlb[i].tlb_ent != -1) {
509 __asm__ __volatile__("stxa %0, [%1] %2"
510 : : "r" (prom_itlb[i].tlb_tag), "r" (TLB_TAG_ACCESS),
511 "i" (ASI_IMMU));
512 membar("#Sync");
513 spitfire_put_itlb_data(prom_itlb[i].tlb_ent,
514 prom_itlb[i].tlb_data);
515 membar("#Sync");
520 void __flush_dcache_range(unsigned long start, unsigned long end)
522 unsigned long va;
523 int n = 0;
525 for (va = start; va < end; va += 32) {
526 spitfire_put_dcache_tag(va & 0x3fe0, 0x0);
527 if (++n >= 512)
528 break;
532 void __flush_cache_all(void)
534 unsigned long va;
536 flushw_all();
537 for(va = 0; va < (PAGE_SIZE << 1); va += 32)
538 spitfire_put_icache_tag(va, 0x0);
541 /* If not locked, zap it. */
542 void __flush_tlb_all(void)
544 unsigned long pstate;
545 int i;
547 __asm__ __volatile__("flushw\n\t"
548 "rdpr %%pstate, %0\n\t"
549 "wrpr %0, %1, %%pstate"
550 : "=r" (pstate)
551 : "i" (PSTATE_IE));
552 for(i = 0; i < 64; i++) {
553 if(!(spitfire_get_dtlb_data(i) & _PAGE_L)) {
554 __asm__ __volatile__("stxa %%g0, [%0] %1"
555 : /* no outputs */
556 : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
557 membar("#Sync");
558 spitfire_put_dtlb_data(i, 0x0UL);
559 membar("#Sync");
561 if(!(spitfire_get_itlb_data(i) & _PAGE_L)) {
562 __asm__ __volatile__("stxa %%g0, [%0] %1"
563 : /* no outputs */
564 : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU));
565 membar("#Sync");
566 spitfire_put_itlb_data(i, 0x0UL);
567 membar("#Sync");
570 __asm__ __volatile__("wrpr %0, 0, %%pstate"
571 : : "r" (pstate));
574 /* Caller does TLB context flushing on local CPU if necessary.
576 * We must be careful about boundary cases so that we never
577 * let the user have CTX 0 (nucleus) or we ever use a CTX
578 * version of zero (and thus NO_CONTEXT would not be caught
579 * by version mis-match tests in mmu_context.h).
581 void get_new_mmu_context(struct mm_struct *mm)
583 unsigned long ctx, new_ctx;
585 spin_lock(&ctx_alloc_lock);
586 ctx = CTX_HWBITS(tlb_context_cache + 1);
587 if (ctx == 0)
588 ctx = 1;
589 if (CTX_VALID(mm->context)) {
590 unsigned long nr = CTX_HWBITS(mm->context);
591 mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63));
593 new_ctx = find_next_zero_bit(mmu_context_bmap, 1UL << CTX_VERSION_SHIFT, ctx);
594 if (new_ctx >= (1UL << CTX_VERSION_SHIFT)) {
595 new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
596 if (new_ctx >= ctx) {
597 int i;
598 new_ctx = (tlb_context_cache & CTX_VERSION_MASK) +
599 CTX_FIRST_VERSION;
600 if (new_ctx == 1)
601 new_ctx = CTX_FIRST_VERSION;
603 /* Don't call memset, for 16 entries that's just
604 * plain silly...
606 mmu_context_bmap[0] = 3;
607 mmu_context_bmap[1] = 0;
608 mmu_context_bmap[2] = 0;
609 mmu_context_bmap[3] = 0;
610 for(i = 4; i < CTX_BMAP_SLOTS; i += 4) {
611 mmu_context_bmap[i + 0] = 0;
612 mmu_context_bmap[i + 1] = 0;
613 mmu_context_bmap[i + 2] = 0;
614 mmu_context_bmap[i + 3] = 0;
616 goto out;
619 mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63));
620 new_ctx |= (tlb_context_cache & CTX_VERSION_MASK);
621 out:
622 tlb_context_cache = new_ctx;
623 spin_unlock(&ctx_alloc_lock);
625 mm->context = new_ctx;
628 #ifndef __SMP__
629 struct pgtable_cache_struct pgt_quicklists;
630 #endif
632 /* For PMDs we don't care about the color, writes are
633 * only done via Dcache which is write-thru, so non-Dcache
634 * reads will always see correct data.
636 pmd_t *get_pmd_slow(pgd_t *pgd, unsigned long offset)
638 pmd_t *pmd;
640 pmd = (pmd_t *) __get_free_page(GFP_KERNEL);
641 if(pmd) {
642 memset(pmd, 0, PAGE_SIZE);
643 pgd_set(pgd, pmd);
644 return pmd + offset;
646 return NULL;
649 /* OK, we have to color these pages because during DTLB
650 * protection faults we set the dirty bit via a non-Dcache
651 * enabled mapping in the VPTE area. The kernel can end
652 * up missing the dirty bit resulting in processes crashing
653 * _iff_ the VPTE mapping of the ptes have a virtual address
654 * bit 13 which is different from bit 13 of the physical address.
656 * The sequence is:
657 * 1) DTLB protection fault, write dirty bit into pte via VPTE
658 * mappings.
659 * 2) Swapper checks pte, does not see dirty bit, frees page.
660 * 3) Process faults back in the page, the old pre-dirtied copy
661 * is provided and here is the corruption.
663 pte_t *get_pte_slow(pmd_t *pmd, unsigned long offset, unsigned long color)
665 unsigned long paddr = __get_free_pages(GFP_KERNEL, 1);
667 if (paddr) {
668 struct page *page2 = mem_map + MAP_NR(paddr + PAGE_SIZE);
669 unsigned long *to_free;
670 pte_t *pte;
672 /* Set count of second page, so we can free it
673 * seperately later on.
675 atomic_set(&page2->count, 1);
677 /* Clear out both pages now. */
678 memset((char *)paddr, 0, (PAGE_SIZE << 1));
680 /* Determine which page we give to this request. */
681 if (!color) {
682 pte = (pte_t *) paddr;
683 to_free = (unsigned long *) (paddr + PAGE_SIZE);
684 } else {
685 pte = (pte_t *) (paddr + PAGE_SIZE);
686 to_free = (unsigned long *) paddr;
689 /* Now free the other one up, adjust cache size. */
690 *to_free = (unsigned long) pte_quicklist[color ^ 0x1];
691 pte_quicklist[color ^ 0x1] = to_free;
692 pgtable_cache_size++;
694 pmd_set(pmd, pte);
695 return pte + offset;
697 return NULL;
700 void sparc_ultra_dump_itlb(void)
702 int slot;
704 printk ("Contents of itlb: ");
705 for (slot = 0; slot < 14; slot++) printk (" ");
706 printk ("%2x:%016lx,%016lx\n", 0, spitfire_get_itlb_tag(0), spitfire_get_itlb_data(0));
707 for (slot = 1; slot < 64; slot+=3) {
708 printk ("%2x:%016lx,%016lx %2x:%016lx,%016lx %2x:%016lx,%016lx\n",
709 slot, spitfire_get_itlb_tag(slot), spitfire_get_itlb_data(slot),
710 slot+1, spitfire_get_itlb_tag(slot+1), spitfire_get_itlb_data(slot+1),
711 slot+2, spitfire_get_itlb_tag(slot+2), spitfire_get_itlb_data(slot+2));
715 void sparc_ultra_dump_dtlb(void)
717 int slot;
719 printk ("Contents of dtlb: ");
720 for (slot = 0; slot < 14; slot++) printk (" ");
721 printk ("%2x:%016lx,%016lx\n", 0, spitfire_get_dtlb_tag(0),
722 spitfire_get_dtlb_data(0));
723 for (slot = 1; slot < 64; slot+=3) {
724 printk ("%2x:%016lx,%016lx %2x:%016lx,%016lx %2x:%016lx,%016lx\n",
725 slot, spitfire_get_dtlb_tag(slot), spitfire_get_dtlb_data(slot),
726 slot+1, spitfire_get_dtlb_tag(slot+1), spitfire_get_dtlb_data(slot+1),
727 slot+2, spitfire_get_dtlb_tag(slot+2), spitfire_get_dtlb_data(slot+2));
731 #undef DEBUG_BOOTMEM
733 extern unsigned long cmdline_memory_size;
735 unsigned long __init bootmem_init(void)
737 unsigned long bootmap_size, start_pfn, end_pfn;
738 unsigned long end_of_phys_memory = 0UL;
739 int i;
741 /* XXX It is a bit ambiguous here, whether we should
742 * XXX treat the user specified mem=xxx as total wanted
743 * XXX physical memory, or as a limit to the upper
744 * XXX physical address we allow. For now it is the
745 * XXX latter. -DaveM
747 #ifdef DEBUG_BOOTMEM
748 prom_printf("bootmem_init: Scan sp_banks, ");
749 #endif
750 for (i = 0; sp_banks[i].num_bytes != 0; i++) {
751 end_of_phys_memory = sp_banks[i].base_addr +
752 sp_banks[i].num_bytes;
753 if (cmdline_memory_size) {
754 if (end_of_phys_memory > cmdline_memory_size) {
755 if (cmdline_memory_size > sp_banks[i].base_addr) {
756 end_of_phys_memory =
757 sp_banks[i-1].base_addr +
758 sp_banks[i-1].num_bytes;
759 sp_banks[i].base_addr = 0xdeadbeef;
760 sp_banks[i].num_bytes = 0;
761 } else {
762 sp_banks[i].num_bytes -=
763 (end_of_phys_memory -
764 cmdline_memory_size);
765 end_of_phys_memory = cmdline_memory_size;
766 sp_banks[++i].base_addr = 0xdeadbeef;
767 sp_banks[i].num_bytes = 0;
769 break;
774 /* Start with page aligned address of last symbol in kernel
775 * image. The kernel is hard mapped below PAGE_OFFSET in a
776 * 4MB locked TLB translation.
778 start_pfn = PAGE_ALIGN((unsigned long) &_end) -
779 ((unsigned long) &empty_zero_page);
781 /* Adjust up to the physical address where the kernel begins. */
782 start_pfn += phys_base;
784 /* Now shift down to get the real physical page frame number. */
785 start_pfn >>= PAGE_SHIFT;
787 end_pfn = end_of_phys_memory >> PAGE_SHIFT;
789 /* Initialize the boot-time allocator. */
790 #ifdef DEBUG_BOOTMEM
791 prom_printf("init_bootmem(spfn[%lx],epfn[%lx])\n",
792 start_pfn, end_pfn);
793 #endif
794 bootmap_size = init_bootmem(start_pfn, end_pfn);
796 /* Now register the available physical memory with the
797 * allocator.
799 for (i = 0; sp_banks[i].num_bytes != 0; i++) {
800 #ifdef DEBUG_BOOTMEM
801 prom_printf("free_bootmem: base[%lx] size[%lx]\n",
802 sp_banks[i].base_addr,
803 sp_banks[i].num_bytes);
804 #endif
805 free_bootmem(sp_banks[i].base_addr,
806 sp_banks[i].num_bytes);
809 /* Reserve the kernel text/data/bss and the bootmem bitmap. */
810 #ifdef DEBUG_BOOTMEM
811 prom_printf("reserve_bootmem: base[%lx] size[%lx]\n",
812 phys_base,
813 (((start_pfn << PAGE_SHIFT) +
814 bootmap_size) - phys_base));
815 #endif
816 reserve_bootmem(phys_base, (((start_pfn << PAGE_SHIFT) +
817 bootmap_size) - phys_base));
819 #ifdef DEBUG_BOOTMEM
820 prom_printf("init_bootmem: return end_pfn[%lx]\n", end_pfn);
821 #endif
822 return end_pfn;
825 /* paging_init() sets up the page tables */
827 extern void sun_serial_setup(void);
829 static unsigned long last_valid_pfn;
831 void __init paging_init(void)
833 extern pmd_t swapper_pmd_dir[1024];
834 extern unsigned int sparc64_vpte_patchme1[1];
835 extern unsigned int sparc64_vpte_patchme2[1];
836 unsigned long alias_base = phys_base + PAGE_OFFSET;
837 unsigned long second_alias_page = 0;
838 unsigned long pt, flags, end_pfn;
839 unsigned long shift = alias_base - ((unsigned long)&empty_zero_page);
841 set_bit(0, mmu_context_bmap);
842 /* We assume physical memory starts at some 4mb multiple,
843 * if this were not true we wouldn't boot up to this point
844 * anyways.
846 pt = phys_base | _PAGE_VALID | _PAGE_SZ4MB;
847 pt |= _PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_L | _PAGE_W;
848 __save_and_cli(flags);
849 __asm__ __volatile__("
850 stxa %1, [%0] %3
851 stxa %2, [%5] %4
852 membar #Sync
853 flush %%g6
856 nop"
857 : /* No outputs */
858 : "r" (TLB_TAG_ACCESS), "r" (alias_base), "r" (pt),
859 "i" (ASI_DMMU), "i" (ASI_DTLB_DATA_ACCESS), "r" (61 << 3)
860 : "memory");
861 if (((unsigned long)&_end) >= KERNBASE + 0x340000) {
862 second_alias_page = alias_base + 0x400000;
863 __asm__ __volatile__("
864 stxa %1, [%0] %3
865 stxa %2, [%5] %4
866 membar #Sync
867 flush %%g6
870 nop"
871 : /* No outputs */
872 : "r" (TLB_TAG_ACCESS), "r" (second_alias_page), "r" (pt + 0x400000),
873 "i" (ASI_DMMU), "i" (ASI_DTLB_DATA_ACCESS), "r" (60 << 3)
874 : "memory");
876 __restore_flags(flags);
878 /* Now set kernel pgd to upper alias so physical page computations
879 * work.
881 init_mm.pgd += ((shift) / (sizeof(pgd_t)));
883 memset(swapper_pmd_dir, 0, sizeof(swapper_pmd_dir));
885 /* Now can init the kernel/bad page tables. */
886 pgd_set(&swapper_pg_dir[0], swapper_pmd_dir + (shift / sizeof(pgd_t)));
888 sparc64_vpte_patchme1[0] |= (pgd_val(init_mm.pgd[0]) >> 10);
889 sparc64_vpte_patchme2[0] |= (pgd_val(init_mm.pgd[0]) & 0x3ff);
890 flushi((long)&sparc64_vpte_patchme1[0]);
892 /* Setup bootmem... */
893 last_valid_pfn = end_pfn = bootmem_init();
895 #ifdef CONFIG_SUN_SERIAL
896 /* This does not logically belong here, but we need to
897 * call it at the moment we are able to use the bootmem
898 * allocator.
900 sun_serial_setup();
901 #endif
903 /* Inherit non-locked OBP mappings. */
904 inherit_prom_mappings();
906 /* Ok, we can use our TLB miss and window trap handlers safely.
907 * We need to do a quick peek here to see if we are on StarFire
908 * or not, so setup_tba can setup the IRQ globals correctly (it
909 * needs to get the hard smp processor id correctly).
912 extern void setup_tba(int);
913 int is_starfire = prom_finddevice("/ssp-serial");
914 if (is_starfire != 0 && is_starfire != -1)
915 is_starfire = 1;
916 else
917 is_starfire = 0;
918 setup_tba(is_starfire);
921 inherit_locked_prom_mappings(1);
923 /* We only created DTLB mapping of this stuff. */
924 spitfire_flush_dtlb_nucleus_page(alias_base);
925 if (second_alias_page)
926 spitfire_flush_dtlb_nucleus_page(second_alias_page);
928 flush_tlb_all();
931 unsigned int zones_size[MAX_NR_ZONES] = { 0, 0, 0};
933 zones_size[ZONE_DMA] = end_pfn;
934 free_area_init(zones_size);
937 device_scan();
940 /* Ok, it seems that the prom can allocate some more memory chunks
941 * as a side effect of some prom calls we perform during the
942 * boot sequence. My most likely theory is that it is from the
943 * prom_set_traptable() call, and OBP is allocating a scratchpad
944 * for saving client program register state etc.
946 void __init sort_memlist(struct linux_mlist_p1275 *thislist)
948 int swapi = 0;
949 int i, mitr;
950 unsigned long tmpaddr, tmpsize;
951 unsigned long lowest;
953 for (i = 0; thislist[i].theres_more != 0; i++) {
954 lowest = thislist[i].start_adr;
955 for (mitr = i+1; thislist[mitr-1].theres_more != 0; mitr++)
956 if (thislist[mitr].start_adr < lowest) {
957 lowest = thislist[mitr].start_adr;
958 swapi = mitr;
960 if (lowest == thislist[i].start_adr)
961 continue;
962 tmpaddr = thislist[swapi].start_adr;
963 tmpsize = thislist[swapi].num_bytes;
964 for (mitr = swapi; mitr > i; mitr--) {
965 thislist[mitr].start_adr = thislist[mitr-1].start_adr;
966 thislist[mitr].num_bytes = thislist[mitr-1].num_bytes;
968 thislist[i].start_adr = tmpaddr;
969 thislist[i].num_bytes = tmpsize;
973 void __init rescan_sp_banks(void)
975 struct linux_prom64_registers memlist[64];
976 struct linux_mlist_p1275 avail[64], *mlist;
977 unsigned long bytes, base_paddr;
978 int num_regs, node = prom_finddevice("/memory");
979 int i;
981 num_regs = prom_getproperty(node, "available",
982 (char *) memlist, sizeof(memlist));
983 num_regs = (num_regs / sizeof(struct linux_prom64_registers));
984 for (i = 0; i < num_regs; i++) {
985 avail[i].start_adr = memlist[i].phys_addr;
986 avail[i].num_bytes = memlist[i].reg_size;
987 avail[i].theres_more = &avail[i + 1];
989 avail[i - 1].theres_more = NULL;
990 sort_memlist(avail);
992 mlist = &avail[0];
993 i = 0;
994 bytes = mlist->num_bytes;
995 base_paddr = mlist->start_adr;
997 sp_banks[0].base_addr = base_paddr;
998 sp_banks[0].num_bytes = bytes;
1000 while (mlist->theres_more != NULL){
1001 i++;
1002 mlist = mlist->theres_more;
1003 bytes = mlist->num_bytes;
1004 if (i >= SPARC_PHYS_BANKS-1) {
1005 printk ("The machine has more banks than "
1006 "this kernel can support\n"
1007 "Increase the SPARC_PHYS_BANKS "
1008 "setting (currently %d)\n",
1009 SPARC_PHYS_BANKS);
1010 i = SPARC_PHYS_BANKS-1;
1011 break;
1014 sp_banks[i].base_addr = mlist->start_adr;
1015 sp_banks[i].num_bytes = mlist->num_bytes;
1018 i++;
1019 sp_banks[i].base_addr = 0xdeadbeefbeefdeadUL;
1020 sp_banks[i].num_bytes = 0;
1022 for (i = 0; sp_banks[i].num_bytes != 0; i++)
1023 sp_banks[i].num_bytes &= PAGE_MASK;
1026 static void __init taint_real_pages(void)
1028 struct sparc_phys_banks saved_sp_banks[SPARC_PHYS_BANKS];
1029 int i;
1031 #ifdef DEBUG_BOOTMEM
1032 prom_printf("taint_real_pages: Rescan sp_banks[].\n");
1033 #endif
1034 for (i = 0; i < SPARC_PHYS_BANKS; i++) {
1035 saved_sp_banks[i].base_addr =
1036 sp_banks[i].base_addr;
1037 saved_sp_banks[i].num_bytes =
1038 sp_banks[i].num_bytes;
1041 rescan_sp_banks();
1043 /* Find changes discovered in the sp_bank rescan and
1044 * reserve the lost portions in the bootmem maps.
1046 for (i = 0; saved_sp_banks[i].num_bytes; i++) {
1047 unsigned long old_start, old_end;
1049 old_start = saved_sp_banks[i].base_addr;
1050 old_end = old_start +
1051 saved_sp_banks[i].num_bytes;
1052 while (old_start < old_end) {
1053 int n;
1055 for (n = 0; sp_banks[n].num_bytes; n++) {
1056 unsigned long new_start, new_end;
1058 new_start = sp_banks[n].base_addr;
1059 new_end = new_start + sp_banks[n].num_bytes;
1061 if (new_start <= old_start &&
1062 new_end >= (old_start + PAGE_SIZE)) {
1063 set_bit (old_start >> 22,
1064 sparc64_valid_addr_bitmap);
1065 goto do_next_page;
1068 #ifdef DEBUG_BOOTMEM
1069 prom_printf("taint: Page went away, reserve page %lx.\n",
1070 old_start);
1071 #endif
1072 reserve_bootmem(old_start, PAGE_SIZE);
1074 do_next_page:
1075 old_start += PAGE_SIZE;
1080 void __init free_mem_map_range(struct page *first, struct page *last)
1082 first = (struct page *) PAGE_ALIGN((unsigned long)first);
1083 last = (struct page *) ((unsigned long)last & PAGE_MASK);
1084 #ifdef DEBUG_BOOTMEM
1085 prom_printf("[%p,%p] ", first, last);
1086 #endif
1087 while (first < last) {
1088 ClearPageReserved(mem_map + MAP_NR(first));
1089 set_page_count(mem_map + MAP_NR(first), 1);
1090 free_page((unsigned long)first);
1091 totalram_pages++;
1092 num_physpages++;
1094 first = (struct page *)((unsigned long)first + PAGE_SIZE);
1098 /* Walk through holes in sp_banks regions, if the mem_map array
1099 * areas representing those holes consume a page or more, free
1100 * up such pages. This helps a lot on machines where physical
1101 * ram is configured such that it begins at some hugh value.
1103 * The sp_banks array is sorted by base address.
1105 void __init free_unused_mem_map(void)
1107 int i;
1109 #ifdef DEBUG_BOOTMEM
1110 prom_printf("free_unused_mem_map: ");
1111 #endif
1112 for (i = 0; sp_banks[i].num_bytes; i++) {
1113 if (i == 0) {
1114 struct page *first, *last;
1116 first = mem_map;
1117 last = &mem_map[sp_banks[i].base_addr >> PAGE_SHIFT];
1118 free_mem_map_range(first, last);
1119 } else {
1120 struct page *first, *last;
1121 unsigned long prev_end;
1123 prev_end = sp_banks[i-1].base_addr +
1124 sp_banks[i-1].num_bytes;
1125 prev_end = PAGE_ALIGN(prev_end);
1126 first = &mem_map[prev_end >> PAGE_SHIFT];
1127 last = &mem_map[sp_banks[i].base_addr >> PAGE_SHIFT];
1129 free_mem_map_range(first, last);
1131 if (!sp_banks[i+1].num_bytes) {
1132 prev_end = sp_banks[i].base_addr +
1133 sp_banks[i].num_bytes;
1134 first = &mem_map[prev_end >> PAGE_SHIFT];
1135 last = &mem_map[last_valid_pfn];
1136 free_mem_map_range(first, last);
1140 #ifdef DEBUG_BOOTMEM
1141 prom_printf("\n");
1142 #endif
1145 void __init mem_init(void)
1147 unsigned long codepages, datapages, initpages;
1148 unsigned long addr, last;
1149 int i;
1151 i = last_valid_pfn >> ((22 - PAGE_SHIFT) + 6);
1152 i += 1;
1153 sparc64_valid_addr_bitmap = (unsigned long *)
1154 __alloc_bootmem(i << 3, SMP_CACHE_BYTES, 0UL);
1155 if (sparc64_valid_addr_bitmap == NULL) {
1156 prom_printf("mem_init: Cannot alloc valid_addr_bitmap.\n");
1157 prom_halt();
1159 memset(sparc64_valid_addr_bitmap, 0, i << 3);
1161 addr = PAGE_OFFSET + phys_base;
1162 last = PAGE_ALIGN((unsigned long)&_end) -
1163 ((unsigned long) &empty_zero_page);
1164 last += PAGE_OFFSET + phys_base;
1165 while (addr < last) {
1166 #ifdef CONFIG_BLK_DEV_INITRD
1167 // FIXME to use bootmem scheme...
1168 if (initrd_below_start_ok && addr >= initrd_start && addr < initrd_end)
1169 mem_map[MAP_NR(addr)].flags &= ~(1<<PG_reserved);
1170 #endif
1171 set_bit(__pa(addr) >> 22, sparc64_valid_addr_bitmap);
1172 addr += PAGE_SIZE;
1175 taint_real_pages();
1177 max_mapnr = last_valid_pfn;
1178 high_memory = __va(last_valid_pfn << PAGE_SHIFT);
1180 #ifdef DEBUG_BOOTMEM
1181 prom_printf("mem_init: Calling free_all_bootmem().\n");
1182 #endif
1183 num_physpages = totalram_pages = free_all_bootmem();
1184 #if 0
1185 free_unused_mem_map();
1186 #endif
1187 codepages = (((unsigned long) &etext) - ((unsigned long)&_start));
1188 codepages = PAGE_ALIGN(codepages) >> PAGE_SHIFT;
1189 datapages = (((unsigned long) &edata) - ((unsigned long)&etext));
1190 datapages = PAGE_ALIGN(datapages) >> PAGE_SHIFT;
1191 initpages = (((unsigned long) &__init_end) - ((unsigned long) &__init_begin));
1192 initpages = PAGE_ALIGN(initpages) >> PAGE_SHIFT;
1194 #ifndef __SMP__
1196 /* Put empty_pg_dir on pgd_quicklist */
1197 extern pgd_t empty_pg_dir[1024];
1198 unsigned long addr = (unsigned long)empty_pg_dir;
1199 unsigned long alias_base = phys_base + PAGE_OFFSET -
1200 (long)(&empty_zero_page);
1202 memset(empty_pg_dir, 0, sizeof(empty_pg_dir));
1203 addr += alias_base;
1204 free_pgd_fast((pgd_t *)addr);
1205 totalram_pages++;
1206 num_physpages++;
1208 #endif
1210 printk("Memory: %uk available (%ldk kernel code, %ldk data, %ldk init) [%016lx,%016lx]\n",
1211 nr_free_pages() << (PAGE_SHIFT-10),
1212 codepages << (PAGE_SHIFT-10),
1213 datapages << (PAGE_SHIFT-10),
1214 initpages << (PAGE_SHIFT-10),
1215 PAGE_OFFSET, (last_valid_pfn << PAGE_SHIFT));
1217 /* NOTE NOTE NOTE NOTE
1218 * Please keep track of things and make sure this
1219 * always matches the code in mm/page_alloc.c -DaveM
1221 i = nr_free_pages() >> 7;
1222 if (i < 48)
1223 i = 48;
1224 if (i > 256)
1225 i = 256;
1226 freepages.min = i;
1227 freepages.low = i << 1;
1228 freepages.high = freepages.low + i;
1231 void free_initmem (void)
1233 unsigned long addr;
1235 addr = (unsigned long)(&__init_begin);
1236 for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
1237 unsigned long page;
1238 struct page *p;
1240 page = (addr +
1241 ((unsigned long) __va(phys_base)) -
1242 ((unsigned long) &empty_zero_page));
1243 p = mem_map + MAP_NR(page);
1245 ClearPageReserved(p);
1246 set_page_count(p, 1);
1247 __free_page(p);
1248 totalram_pages++;
1249 num_physpages++;
1253 void si_meminfo(struct sysinfo *val)
1255 val->totalram = totalram_pages;
1256 val->sharedram = 0;
1257 val->freeram = nr_free_pages();
1258 val->bufferram = atomic_read(&buffermem_pages);
1260 /* These are always zero on Sparc64. */
1261 val->totalhigh = 0;
1262 val->freehigh = 0;
1264 val->mem_unit = PAGE_SIZE;