2 * $Id: init.c,v 1.195 1999/10/15 16:39:39 cort Exp $
5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
7 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
8 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
9 * Copyright (C) 1996 Paul Mackerras
10 * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
12 * Derived from "arch/i386/mm/init.c"
13 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version.
22 #include <linux/config.h>
23 #include <linux/signal.h>
24 #include <linux/sched.h>
25 #include <linux/kernel.h>
26 #include <linux/errno.h>
27 #include <linux/string.h>
28 #include <linux/types.h>
29 #include <linux/ptrace.h>
30 #include <linux/mman.h>
32 #include <linux/swap.h>
33 #include <linux/stddef.h>
34 #include <linux/vmalloc.h>
35 #include <linux/init.h>
36 #include <linux/delay.h>
37 #include <linux/openpic.h>
38 #include <linux/bootmem.h>
39 #ifdef CONFIG_BLK_DEV_INITRD
40 #include <linux/blk.h> /* for initrd_* */
43 #include <asm/pgalloc.h>
46 #include <asm/mmu_context.h>
47 #include <asm/pgtable.h>
49 #include <asm/residual.h>
50 #include <asm/uaccess.h>
51 #include <asm/8xx_immap.h>
52 #include <asm/mpc8xx.h>
54 #include <asm/bootx.h>
55 #include <asm/machdep.h>
56 #include <asm/setup.h>
57 #include <asm/amigahw.h>
58 #include <asm/gemini.h>
60 #include "mem_pieces.h"
62 #if defined(CONFIG_4xx)
66 #define PGTOKB(pages) (((pages) * PAGE_SIZE) >> 10)
69 atomic_t next_mmu_context
;
70 unsigned long *end_of_DRAM
;
72 int init_bootmem_done
;
74 unsigned long totalram_pages
= 0;
75 extern pgd_t swapper_pg_dir
[];
76 extern char _start
[], _end
[];
77 extern char etext
[], _stext
[];
78 extern char __init_begin
, __init_end
;
79 extern char __prep_begin
, __prep_end
;
80 extern char __pmac_begin
, __pmac_end
;
81 extern char __apus_begin
, __apus_end
;
82 extern char __openfirmware_begin
, __openfirmware_end
;
83 struct device_node
*memory_node
;
84 unsigned long ioremap_base
;
85 unsigned long ioremap_bot
;
86 unsigned long avail_start
;
87 extern int num_memory
;
88 extern struct mem_info memory
[];
89 extern boot_infos_t
*boot_infos
;
91 struct pgtable_cache_struct quicklists
;
95 static void *MMU_get_page(void);
96 unsigned long *prep_find_end_of_memory(void);
97 unsigned long *pmac_find_end_of_memory(void);
98 unsigned long *apus_find_end_of_memory(void);
99 unsigned long *gemini_find_end_of_memory(void);
100 extern unsigned long *find_end_of_memory(void);
102 unsigned long *m8xx_find_end_of_memory(void);
103 #endif /* CONFIG_8xx */
105 unsigned long *oak_find_end_of_memory(void);
107 static void mapin_ram(void);
108 void map_page(unsigned long va
, unsigned long pa
, int flags
);
109 extern void die_if_kernel(char *,struct pt_regs
*,long);
111 struct mem_pieces phys_mem
;
113 extern struct task_struct
*current_set
[NR_CPUS
];
115 PTE
*Hash
, *Hash_end
;
116 unsigned long Hash_size
, Hash_mask
;
117 #if !defined(CONFIG_4xx) && !defined(CONFIG_8xx)
119 static void hash_init(void);
121 union ubat
{ /* BAT register values to be loaded */
128 } BATS
[4][2]; /* 4 pairs of IBAT, DBAT */
130 struct batrange
{ /* stores address ranges mapped by BATs */
137 * Return PA for this VA if it is mapped by a BAT, or 0
139 static inline unsigned long v_mapped_by_bats(unsigned long va
)
142 for (b
= 0; b
< 4; ++b
)
143 if (va
>= bat_addrs
[b
].start
&& va
< bat_addrs
[b
].limit
)
144 return bat_addrs
[b
].phys
+ (va
- bat_addrs
[b
].start
);
149 * Return VA for a given PA or 0 if not mapped
151 static inline unsigned long p_mapped_by_bats(unsigned long pa
)
154 for (b
= 0; b
< 4; ++b
)
155 if (pa
>= bat_addrs
[b
].phys
156 && pa
< (bat_addrs
[b
].limit
-bat_addrs
[b
].start
)
158 return bat_addrs
[b
].start
+(pa
-bat_addrs
[b
].phys
);
162 #else /* CONFIG_4xx || CONFIG_8xx */
163 #define v_mapped_by_bats(x) (0UL)
164 #define p_mapped_by_bats(x) (0UL)
165 #endif /* !CONFIG_4xx && !CONFIG_8xx */
168 * this tells the system to map all of ram with the segregs
169 * (i.e. page tables) instead of the bats.
172 int __map_without_bats
= 0;
174 /* max amount of RAM to use */
175 unsigned long __max_memory
;
177 void __bad_pte(pmd_t
*pmd
)
179 printk("Bad pmd in pte_alloc: %08lx\n", pmd_val(*pmd
));
180 pmd_val(*pmd
) = (unsigned long) BAD_PAGETABLE
;
183 pte_t
*get_pte_slow(pmd_t
*pmd
, unsigned long offset
)
187 if (pmd_none(*pmd
)) {
189 pte
= (pte_t
*) MMU_get_page();
190 else if ((pte
= (pte_t
*) __get_free_page(GFP_KERNEL
)))
193 pmd_val(*pmd
) = (unsigned long)pte
;
196 pmd_val(*pmd
) = (unsigned long)BAD_PAGETABLE
;
203 return (pte_t
*) pmd_page(*pmd
) + offset
;
206 int do_check_pgt_cache(int low
, int high
)
209 if(pgtable_cache_size
> high
) {
212 free_pgd_slow(get_pgd_fast()), freed
++;
214 free_pmd_slow(get_pmd_fast()), freed
++;
216 free_pte_slow(get_pte_fast()), freed
++;
217 } while(pgtable_cache_size
> low
);
223 * BAD_PAGE is the page that is used for page faults when linux
224 * is out-of-memory. Older versions of linux just did a
225 * do_exit(), but using this instead means there is less risk
226 * for a process dying in kernel mode, possibly leaving a inode
229 * BAD_PAGETABLE is the accompanying page-table: it is initialized
230 * to point to BAD_PAGE entries.
232 * ZERO_PAGE is a special page that is used for zero-initialized
235 pte_t
*empty_bad_page_table
;
237 pte_t
* __bad_pagetable(void)
239 clear_page(empty_bad_page_table
);
240 return empty_bad_page_table
;
243 void *empty_bad_page
;
245 pte_t
__bad_page(void)
247 clear_page(empty_bad_page
);
248 return pte_mkdirty(mk_pte_phys(__pa(empty_bad_page
), PAGE_SHARED
));
253 int i
,free
= 0,total
= 0,reserved
= 0;
254 int shared
= 0, cached
= 0;
255 struct task_struct
*p
;
257 printk("Mem-info:\n");
259 printk("Free swap: %6dkB\n",nr_swap_pages
<<(PAGE_SHIFT
-10));
263 if (PageReserved(mem_map
+i
))
265 else if (PageSwapCache(mem_map
+i
))
267 else if (!atomic_read(&mem_map
[i
].count
))
270 shared
+= atomic_read(&mem_map
[i
].count
) - 1;
272 printk("%d pages of RAM\n",total
);
273 printk("%d free pages\n",free
);
274 printk("%d reserved pages\n",reserved
);
275 printk("%d pages shared\n",shared
);
276 printk("%d pages swap cached\n",cached
);
277 printk("%d pages in page table cache\n",(int)pgtable_cache_size
);
279 printk("%-8s %3s %8s %8s %8s %9s %8s", "Process", "Pid",
280 "Ctx", "Ctx<<4", "Last Sys", "pc", "task");
282 printk(" %3s", "CPU");
287 printk("%-8.8s %3d %8ld %8ld %8ld %c%08lx %08lx ",
289 (p
->mm
)?p
->mm
->context
:0,
290 (p
->mm
)?(p
->mm
->context
<<4):0,
291 p
->thread
.last_syscall
,
292 (p
->thread
.regs
)?user_mode(p
->thread
.regs
) ? 'u' : 'k' : '?',
293 (p
->thread
.regs
)?p
->thread
.regs
->nip
:0,
298 printk("%3d ", p
->processor
);
299 if ( (p
->processor
!= NO_PROC_ID
) &&
300 (p
== current_set
[p
->processor
]) )
312 if ( p
== last_task_used_math
)
324 void si_meminfo(struct sysinfo
*val
)
331 val
->freeram
= nr_free_pages();
332 val
->bufferram
= atomic_read(&buffermem_pages
);
334 if (PageReserved(mem_map
+i
))
337 if (!atomic_read(&mem_map
[i
].count
))
339 val
->sharedram
+= atomic_read(&mem_map
[i
].count
) - 1;
341 val
->mem_unit
= PAGE_SIZE
;
345 ioremap(unsigned long addr
, unsigned long size
)
347 return __ioremap(addr
, size
, _PAGE_NO_CACHE
);
351 __ioremap(unsigned long addr
, unsigned long size
, unsigned long flags
)
353 unsigned long p
, v
, i
;
356 * Choose an address to map it to.
357 * Once the vmalloc system is running, we use it.
358 * Before then, we map addresses >= ioremap_base
359 * virt == phys; for addresses below this we use
360 * space going down from ioremap_base (ioremap_bot
361 * records where we're up to).
363 p
= addr
& PAGE_MASK
;
364 size
= PAGE_ALIGN(addr
+ size
) - p
;
367 * If the address lies within the first 16 MB, assume it's in ISA
370 if (p
< 16*1024*1024)
374 * Don't allow anybody to remap normal RAM that we're using.
375 * mem_init() sets high_memory so only do the check after that.
377 if ( mem_init_done
&& (p
< virt_to_phys(high_memory
)) )
379 printk("__ioremap(): phys addr %0lx is RAM lr %p\n", p
,
380 __builtin_return_address(0));
388 * Is it already mapped? Perhaps overlapped by a previous
389 * BAT mapping. If the whole area is mapped then we're done,
390 * otherwise remap it since we want to keep the virt addrs for
391 * each request contiguous.
393 * We make the assumption here that if the bottom and top
394 * of the range we want are mapped then it's mapped to the
395 * same virt address (and this is contiguous).
398 if ((v
= p_mapped_by_bats(p
)) /*&& p_mapped_by_bats(p+size-1)*/ )
402 struct vm_struct
*area
;
403 area
= get_vm_area(size
, VM_IOREMAP
);
406 v
= VMALLOC_VMADDR(area
->addr
);
408 if (p
>= ioremap_base
)
411 v
= (ioremap_bot
-= size
);
414 if ((flags
& _PAGE_PRESENT
) == 0)
415 flags
|= pgprot_val(PAGE_KERNEL
);
416 if (flags
& (_PAGE_NO_CACHE
| _PAGE_WRITETHRU
))
417 flags
|= _PAGE_GUARDED
;
420 * Is it a candidate for a BAT mapping?
422 for (i
= 0; i
< size
; i
+= PAGE_SIZE
)
423 map_page(v
+i
, p
+i
, flags
);
425 return (void *) (v
+ (addr
& ~PAGE_MASK
));
428 void iounmap(void *addr
)
433 unsigned long iopa(unsigned long addr
)
440 pa
= v_mapped_by_bats(addr
);
444 /* Do we have a page table? */
445 if (init_mm
.pgd
== NULL
)
448 /* Use upper 10 bits of addr to index the first level map */
449 pd
= (pmd_t
*) (init_mm
.pgd
+ (addr
>> PGDIR_SHIFT
));
453 /* Use middle 10 bits of addr to index the second-level map */
454 pg
= pte_offset(pd
, addr
);
455 return (pte_val(*pg
) & PAGE_MASK
) | (addr
& ~PAGE_MASK
);
459 map_page(unsigned long va
, unsigned long pa
, int flags
)
464 /* Use upper 10 bits of VA to index the first level map */
465 pd
= pmd_offset(pgd_offset_k(va
), va
);
467 /* Use middle 10 bits of VA to index the second-level map */
468 pg
= pte_alloc(pd
, va
);
469 if (pmd_none(oldpd
) && mem_init_done
)
470 set_pgdir(va
, *(pgd_t
*)pd
);
471 set_pte(pg
, mk_pte_phys(pa
& PAGE_MASK
, __pgprot(flags
)));
472 flush_hash_page(0, va
);
479 * - flush_tlb_all() flushes all processes TLBs
480 * - flush_tlb_mm(mm) flushes the specified mm context TLB's
481 * - flush_tlb_page(vma, vmaddr) flushes one page
482 * - flush_tlb_range(mm, start, end) flushes a range of pages
484 * since the hardware hash table functions as an extension of the
485 * tlb as far as the linux tables are concerned, flush it too.
490 * Flush all tlb/hash table entries (except perhaps for those
491 * mapping RAM starting at PAGE_OFFSET, since they never change).
494 local_flush_tlb_all(void)
496 __clear_user(Hash
, Hash_size
);
499 smp_send_tlb_invalidate(0);
504 * Flush all the (user) entries for the address space described
505 * by mm. We can't rely on mm->mmap describing all the entries
506 * that might be in the hash table.
509 local_flush_tlb_mm(struct mm_struct
*mm
)
511 mm
->context
= NO_CONTEXT
;
512 if (mm
== current
->mm
)
515 smp_send_tlb_invalidate(0);
520 local_flush_tlb_page(struct vm_area_struct
*vma
, unsigned long vmaddr
)
522 if (vmaddr
< TASK_SIZE
)
523 flush_hash_page(vma
->vm_mm
->context
, vmaddr
);
525 flush_hash_page(0, vmaddr
);
527 smp_send_tlb_invalidate(0);
533 * for each page addr in the range, call MMU_invalidate_page()
534 * if the range is very large and the hash table is small it might be
535 * faster to do a search of the hash table and just invalidate pages
536 * that are in the range but that's for study later.
540 local_flush_tlb_range(struct mm_struct
*mm
, unsigned long start
, unsigned long end
)
544 if (end
- start
> 20 * PAGE_SIZE
)
550 for (; start
< end
&& start
< TASK_SIZE
; start
+= PAGE_SIZE
)
552 flush_hash_page(mm
->context
, start
);
555 smp_send_tlb_invalidate(0);
560 * The context counter has overflowed.
561 * We set mm->context to NO_CONTEXT for all mm's in the system.
562 * We assume we can get to all mm's by looking as tsk->mm for
563 * all tasks in the system.
566 mmu_context_overflow(void)
568 struct task_struct
*tsk
;
570 printk(KERN_DEBUG
"mmu_context_overflow\n");
571 read_lock(&tasklist_lock
);
574 tsk
->mm
->context
= NO_CONTEXT
;
576 read_unlock(&tasklist_lock
);
577 flush_hash_segments(0x10, 0xffffff);
579 smp_send_tlb_invalidate(0);
581 atomic_set(&next_mmu_context
, 0);
582 /* make sure current always has a context */
583 current
->mm
->context
= MUNGE_CONTEXT(atomic_inc_return(&next_mmu_context
));
584 set_context(current
->mm
->context
);
586 #endif /* CONFIG_8xx */
588 #if !defined(CONFIG_4xx) && !defined(CONFIG_8xx)
589 static void get_mem_prop(char *, struct mem_pieces
*);
591 #if defined(CONFIG_ALL_PPC)
593 * Read in a property describing some pieces of memory.
596 static void __init
get_mem_prop(char *name
, struct mem_pieces
*mp
)
598 struct reg_property
*rp
;
601 rp
= (struct reg_property
*) get_property(memory_node
, name
, &s
);
603 printk(KERN_ERR
"error: couldn't get %s property on /memory\n",
607 mp
->n_regions
= s
/ sizeof(mp
->regions
[0]);
608 memcpy(mp
->regions
, rp
, s
);
610 /* Make sure the pieces are sorted. */
612 mem_pieces_coalesce(mp
);
614 #endif /* CONFIG_ALL_PPC */
617 * Set up one of the I/D BAT (block address translation) register pairs.
618 * The parameters are not checked; in particular size must be a power
619 * of 2 between 128k and 256M.
621 void __init
setbat(int index
, unsigned long virt
, unsigned long phys
,
622 unsigned int size
, int flags
)
626 union ubat
*bat
= BATS
[index
];
628 bl
= (size
>> 17) - 1;
629 if ((_get_PVR() >> 16) != 1) {
632 wimgxpp
= flags
& (_PAGE_WRITETHRU
| _PAGE_NO_CACHE
633 | _PAGE_COHERENT
| _PAGE_GUARDED
);
634 wimgxpp
|= (flags
& _PAGE_RW
)? BPP_RW
: BPP_RX
;
635 bat
[1].word
[0] = virt
| (bl
<< 2) | 2; /* Vs=1, Vp=0 */
636 bat
[1].word
[1] = phys
| wimgxpp
;
637 #ifndef CONFIG_KGDB /* want user access for breakpoints */
638 if (flags
& _PAGE_USER
)
640 bat
[1].bat
.batu
.vp
= 1;
641 if (flags
& _PAGE_GUARDED
) {
642 /* G bit must be zero in IBATs */
643 bat
[0].word
[0] = bat
[0].word
[1] = 0;
645 /* make IBAT same as DBAT */
652 wimgxpp
= flags
& (_PAGE_WRITETHRU
| _PAGE_NO_CACHE
654 wimgxpp
|= (flags
& _PAGE_RW
)?
655 ((flags
& _PAGE_USER
)? PP_RWRW
: PP_RWXX
): PP_RXRX
;
656 bat
->word
[0] = virt
| wimgxpp
| 4; /* Ks=0, Ku=1 */
657 bat
->word
[1] = phys
| bl
| 0x40; /* V=1 */
660 bat_addrs
[index
].start
= virt
;
661 bat_addrs
[index
].limit
= virt
+ ((bl
+ 1) << 17) - 1;
662 bat_addrs
[index
].phys
= phys
;
665 #define IO_PAGE (_PAGE_NO_CACHE | _PAGE_GUARDED | _PAGE_RW)
667 #define RAM_PAGE (_PAGE_RW|_PAGE_COHERENT)
669 #define RAM_PAGE (_PAGE_RW)
671 #endif /* CONFIG_8xx */
674 * Map in all of physical memory starting at KERNELBASE.
676 #define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED)
678 static void __init
mapin_ram(void)
681 unsigned long v
, p
, s
, f
;
683 #if !defined(CONFIG_4xx) && !defined(CONFIG_8xx)
684 if (!__map_without_bats
) {
685 unsigned long tot
, mem_base
, bl
, done
;
686 unsigned long max_size
= (256<<20);
689 /* Set up BAT2 and if necessary BAT3 to cover RAM. */
690 mem_base
= __pa(KERNELBASE
);
692 /* Make sure we don't map a block larger than the
693 smallest alignment of the physical address. */
694 /* alignment of mem_base */
695 align
= ~(mem_base
-1) & mem_base
;
696 /* set BAT block size to MIN(max_size, align) */
697 if (align
&& align
< max_size
)
700 tot
= (unsigned long)end_of_DRAM
- KERNELBASE
;
701 for (bl
= 128<<10; bl
< max_size
; bl
<<= 1) {
706 setbat(2, KERNELBASE
, mem_base
, bl
, RAM_PAGE
);
707 done
= (unsigned long)bat_addrs
[2].limit
- KERNELBASE
+ 1;
708 if ((done
< tot
) && !bat_addrs
[3].limit
) {
709 /* use BAT3 to cover a bit more */
711 for (bl
= 128<<10; bl
< max_size
; bl
<<= 1)
714 setbat(3, KERNELBASE
+done
, mem_base
+done
, bl
,
718 #endif /* !CONFIG_4xx && !CONFIG_8xx */
720 for (i
= 0; i
< phys_mem
.n_regions
; ++i
) {
721 v
= (ulong
)__va(phys_mem
.regions
[i
].address
);
722 p
= phys_mem
.regions
[i
].address
;
723 for (s
= 0; s
< phys_mem
.regions
[i
].size
; s
+= PAGE_SIZE
) {
724 /* On the MPC8xx, we want the page shared so we
725 * don't get ASID compares on kernel space.
727 f
= _PAGE_PRESENT
| _PAGE_ACCESSED
| _PAGE_SHARED
;
729 /* Allows stub to set breakpoints everywhere */
730 f
|= _PAGE_RW
| _PAGE_DIRTY
| _PAGE_HWWRITE
;
732 if ((char *) v
< _stext
|| (char *) v
>= etext
)
733 f
|= _PAGE_RW
| _PAGE_DIRTY
| _PAGE_HWWRITE
;
736 /* On the powerpc (not 8xx), no user access
737 forces R/W kernel access */
739 #endif /* CONFIG_8xx */
740 #endif /* CONFIG_KGDB */
748 /* In fact this is only called until mem_init is done. */
749 static void __init
*MMU_get_page(void)
754 p
= (void *) __get_free_page(GFP_KERNEL
);
755 } else if (init_bootmem_done
) {
756 p
= alloc_bootmem_pages(PAGE_SIZE
);
758 p
= mem_pieces_find(PAGE_SIZE
, PAGE_SIZE
);
761 panic("couldn't get a page in MMU_get_page");
762 __clear_user(p
, PAGE_SIZE
);
766 void __init
free_initmem(void)
769 unsigned long num_freed_pages
= 0, num_prep_pages
= 0,
770 num_pmac_pages
= 0, num_openfirmware_pages
= 0,
772 #define FREESEC(START,END,CNT) do { \
773 a = (unsigned long)(&START); \
774 for (; a < (unsigned long)(&END); a += PAGE_SIZE) { \
775 clear_bit(PG_reserved, &mem_map[MAP_NR(a)].flags); \
776 set_page_count(mem_map+MAP_NR(a), 1); \
782 FREESEC(__init_begin
,__init_end
,num_freed_pages
);
786 FREESEC(__apus_begin
,__apus_end
,num_apus_pages
);
787 FREESEC(__prep_begin
,__prep_end
,num_prep_pages
);
790 FREESEC(__apus_begin
,__apus_end
,num_apus_pages
);
791 FREESEC(__pmac_begin
,__pmac_end
,num_pmac_pages
);
792 FREESEC(__prep_begin
,__prep_end
,num_prep_pages
);
795 FREESEC(__apus_begin
,__apus_end
,num_apus_pages
);
796 FREESEC(__pmac_begin
,__pmac_end
,num_pmac_pages
);
799 FREESEC(__apus_begin
,__apus_end
,num_apus_pages
);
800 FREESEC(__pmac_begin
,__pmac_end
,num_pmac_pages
);
801 FREESEC(__prep_begin
,__prep_end
,num_prep_pages
);
804 FREESEC(__pmac_begin
,__pmac_end
,num_pmac_pages
);
805 FREESEC(__prep_begin
,__prep_end
,num_prep_pages
);
808 FREESEC(__apus_begin
,__apus_end
,num_apus_pages
);
809 FREESEC(__pmac_begin
,__pmac_end
,num_pmac_pages
);
810 FREESEC(__prep_begin
,__prep_end
,num_prep_pages
);
815 FREESEC( __openfirmware_begin
, __openfirmware_end
,
816 num_openfirmware_pages
);
818 printk ("Freeing unused kernel memory: %ldk init",
819 PGTOKB(num_freed_pages
));
821 if ( num_prep_pages
)
822 printk(" %ldk prep", PGTOKB(num_prep_pages
));
823 if ( num_pmac_pages
)
824 printk(" %ldk pmac", PGTOKB(num_pmac_pages
));
825 if ( num_openfirmware_pages
)
826 printk(" %ldk open firmware", PGTOKB(num_openfirmware_pages
));
827 if ( num_apus_pages
)
828 printk(" %ldk apus", PGTOKB(num_apus_pages
));
832 #ifdef CONFIG_BLK_DEV_INITRD
833 void free_initrd_mem(unsigned long start
, unsigned long end
)
835 for (; start
< end
; start
+= PAGE_SIZE
) {
836 ClearPageReserved(mem_map
+ MAP_NR(start
));
837 set_page_count(mem_map
+MAP_NR(start
), 1);
841 printk ("Freeing initrd memory: %ldk freed\n", (end
- start
) >> 10);
845 extern boot_infos_t
*disp_bi
;
848 * Do very early mm setup such as finding the size of memory
849 * and setting up the hash table.
850 * A lot of this is prep/pmac specific but a lot of it could
854 #if defined(CONFIG_4xx)
859 * The Zone Protection Register (ZPR) defines how protection will
860 * be applied to every page which is a member of a given zone. At
861 * present, we utilize only two of the 4xx's zones. The first, zone
862 * 0, is set at '00b and only allows access in supervisor-mode based
863 * on the EX and WR bits. No user-mode access is allowed. The second,
864 * zone 1, is set at '10b and in supervisor-mode allows access
865 * without regard to the EX and WR bits. In user-mode, access is
866 * allowed based on the EX and WR bits.
869 mtspr(SPRN_ZPR
, 0x2aaaaaaa);
871 /* Hardwire any TLB entries necessary here. */
873 PPC4xx_tlb_pin(KERNELBASE
, 0, TLB_PAGESZ(PAGESZ_16M
), 1);
876 * Find the top of physical memory and map all of it in starting
880 end_of_DRAM
= oak_find_end_of_memory();
884 * Set up the real-mode cache parameters for the exception vector
885 * handlers (which are run in real-mode).
888 mtspr(SPRN_DCWR
, 0x00000000); /* All caching is write-back */
891 * Cache instruction and data space where the exception
892 * vectors and the kernel live in real-mode.
895 mtspr(SPRN_DCCR
, 0x80000000); /* 128 MB of data space at 0x0. */
896 mtspr(SPRN_ICCR
, 0x80000000); /* 128 MB of instr. space at 0x0. */
899 void __init
MMU_init(void)
901 if ( ppc_md
.progress
) ppc_md
.progress("MMU:enter", 0x111);
904 end_of_DRAM
= pmac_find_end_of_memory();
906 else if (_machine
== _MACH_apus
)
907 end_of_DRAM
= apus_find_end_of_memory();
910 else if ( _machine
== _MACH_gemini
)
911 end_of_DRAM
= gemini_find_end_of_memory();
912 #endif /* CONFIG_GEMINI */
914 end_of_DRAM
= prep_find_end_of_memory();
916 if ( ppc_md
.progress
) ppc_md
.progress("MMU:hash init", 0x300);
919 _SDR1
= __pa(Hash
) | (ffz(~Hash_size
) - 7)-11;
921 _SDR1
= __pa(Hash
) | (Hash_mask
>> 10);
924 ioremap_base
= 0xf8000000;
926 if ( ppc_md
.progress
) ppc_md
.progress("MMU:mapin", 0x301);
927 /* Map in all of RAM starting at KERNELBASE */
931 * Setup the bat mappings we're going to load that cover
932 * the io areas. RAM was mapped by mapin_ram().
935 if ( ppc_md
.progress
) ppc_md
.progress("MMU:setbat", 0x302);
938 setbat(0, 0x80000000, 0x80000000, 0x10000000, IO_PAGE
);
939 setbat(1, 0xf0000000, 0xc0000000, 0x08000000, IO_PAGE
);
940 ioremap_base
= 0xf0000000;
943 setbat(0, 0xf8000000, 0xf8000000, 0x08000000, IO_PAGE
);
945 /* temporary hack to get working until page tables are stable -- Cort*/
946 /* setbat(1, 0x80000000, 0xc0000000, 0x10000000, IO_PAGE);*/
947 setbat(3, 0xd0000000, 0xd0000000, 0x10000000, IO_PAGE
);
949 setbat(1, 0x80000000, 0x80000000, 0x10000000, IO_PAGE
);
950 setbat(3, 0x90000000, 0x90000000, 0x10000000, IO_PAGE
);
956 unsigned long base
= 0xf3000000;
957 struct device_node
*macio
= find_devices("mac-io");
958 if (macio
&& macio
->n_addrs
)
959 base
= macio
->addrs
[0].address
;
960 setbat(0, base
, base
, 0x100000, IO_PAGE
);
964 setbat(0, disp_bi
->dispDeviceBase
, disp_bi
->dispDeviceBase
, 0x100000, IO_PAGE
);
965 disp_bi
->logicalDisplayBase
= disp_bi
->dispDeviceBase
;
967 ioremap_base
= 0xf0000000;
970 /* Map PPC exception vectors. */
971 setbat(0, 0xfff00000, 0xfff00000, 0x00020000, RAM_PAGE
);
972 /* Map chip and ZorroII memory */
973 setbat(1, zTwoBase
, 0x00000000, 0x01000000, IO_PAGE
);
976 setbat(0, 0xf0000000, 0xf0000000, 0x10000000, IO_PAGE
);
977 setbat(1, 0x80000000, 0x80000000, 0x10000000, IO_PAGE
);
980 ioremap_bot
= ioremap_base
;
981 #else /* CONFIG_8xx */
983 end_of_DRAM
= m8xx_find_end_of_memory();
985 /* Map in all of RAM starting at KERNELBASE */
988 /* Now map in some of the I/O space that is generically needed
989 * or shared with multiple devices.
990 * All of this fits into the same 4Mbyte region, so it only
991 * requires one page table page.
993 ioremap(IMAP_ADDR
, IMAP_SIZE
);
995 ioremap(NVRAM_ADDR
, NVRAM_SIZE
);
996 ioremap(MBX_CSR_ADDR
, MBX_CSR_SIZE
);
997 ioremap(PCI_CSR_ADDR
, PCI_CSR_SIZE
);
999 /* Map some of the PCI/ISA I/O space to get the IDE interface.
1001 ioremap(PCI_ISA_IO_ADDR
, 0x4000);
1002 ioremap(PCI_IDE_ADDR
, 0x4000);
1004 #ifdef CONFIG_RPXLITE
1005 ioremap(RPX_CSR_ADDR
, RPX_CSR_SIZE
);
1006 ioremap(HIOX_CSR_ADDR
, HIOX_CSR_SIZE
);
1008 #ifdef CONFIG_RPXCLASSIC
1009 ioremap(PCI_CSR_ADDR
, PCI_CSR_SIZE
);
1010 ioremap(RPX_CSR_ADDR
, RPX_CSR_SIZE
);
1012 #endif /* CONFIG_8xx */
1013 if ( ppc_md
.progress
) ppc_md
.progress("MMU:exit", 0x211);
1015 #endif /* CONFIG_4xx */
1018 * Initialize the bootmem system and give it all the memory we
1021 void __init
do_init_bootmem(void)
1023 unsigned long start
, size
;
1027 * Find an area to use for the bootmem bitmap.
1028 * We look for the first area which is at least
1029 * 128kB in length (128kB is enough for a bitmap
1030 * for 4GB of memory, using 4kB pages), plus 1 page
1031 * (in case the address isn't page-aligned).
1035 for (i
= 0; i
< phys_avail
.n_regions
; ++i
) {
1036 unsigned long a
= phys_avail
.regions
[i
].address
;
1037 unsigned long s
= phys_avail
.regions
[i
].size
;
1042 if (s
>= 33 * PAGE_SIZE
)
1045 start
= PAGE_ALIGN(start
);
1047 boot_mapsize
= init_bootmem(start
>> PAGE_SHIFT
,
1048 __pa(end_of_DRAM
) >> PAGE_SHIFT
);
1050 /* remove the bootmem bitmap from the available memory */
1051 mem_pieces_remove(&phys_avail
, start
, boot_mapsize
, 1);
1052 /* add everything in phys_avail into the bootmem map */
1053 for (i
= 0; i
< phys_avail
.n_regions
; ++i
)
1054 free_bootmem(phys_avail
.regions
[i
].address
,
1055 phys_avail
.regions
[i
].size
);
1057 init_bootmem_done
= 1;
1062 * Find some memory for setup_arch to return.
1063 * We use the largest chunk of available memory as the area
1064 * that setup_arch returns, making sure that there are at
1065 * least 32 pages unused before this for MMU_get_page to use.
1067 unsigned long __init
find_available_memory(void)
1070 unsigned long a
, free
;
1071 unsigned long start
, end
;
1073 if (_machine
== _MACH_mbx
) {
1074 /* Return the first, not the last region, because we
1075 * may not yet have properly initialized the additonal
1078 a
= PAGE_ALIGN(phys_avail
.regions
[0].address
);
1079 avail_start
= (unsigned long) __va(a
);
1084 for (i
= 1; i
< phys_avail
.n_regions
; ++i
)
1085 if (phys_avail
.regions
[i
].size
> phys_avail
.regions
[rn
].size
)
1088 for (i
= 0; i
< rn
; ++i
) {
1089 start
= phys_avail
.regions
[i
].address
;
1090 end
= start
+ phys_avail
.regions
[i
].size
;
1091 free
+= (end
& PAGE_MASK
) - PAGE_ALIGN(start
);
1093 a
= PAGE_ALIGN(phys_avail
.regions
[rn
].address
);
1094 if (free
< 32 * PAGE_SIZE
)
1095 a
+= 32 * PAGE_SIZE
- free
;
1096 avail_start
= (unsigned long) __va(a
);
1102 * paging_init() sets up the page tables - in fact we've already done this.
1104 void __init
paging_init(void)
1106 unsigned long zones_size
[MAX_NR_ZONES
], i
;
1109 * Grab some memory for bad_page and bad_pagetable to use.
1111 empty_bad_page
= alloc_bootmem_pages(PAGE_SIZE
);
1112 empty_bad_page_table
= alloc_bootmem_pages(PAGE_SIZE
);
1115 * All pages are DMA-able so we put them all in the DMA zone.
1117 zones_size
[0] = ((unsigned long)end_of_DRAM
- KERNELBASE
) >> PAGE_SHIFT
;
1118 for (i
= 1; i
< MAX_NR_ZONES
; i
++)
1120 free_area_init(zones_size
);
1123 void __init
mem_init(void)
1125 extern char *sysmap
;
1126 extern unsigned long sysmap_size
;
1131 #if defined(CONFIG_ALL_PPC)
1132 extern unsigned int rtas_data
, rtas_size
;
1133 #endif /* defined(CONFIG_ALL_PPC) */
1134 max_mapnr
= max_low_pfn
;
1135 high_memory
= (void *) __va(max_low_pfn
* PAGE_SIZE
);
1136 num_physpages
= max_mapnr
; /* RAM is assumed contiguous */
1138 totalram_pages
+= free_all_bootmem();
1140 #ifdef CONFIG_BLK_DEV_INITRD
1141 /* if we are booted from BootX with an initial ramdisk,
1142 make sure the ramdisk pages aren't reserved. */
1144 for (addr
= initrd_start
; addr
< initrd_end
; addr
+= PAGE_SIZE
)
1145 clear_bit(PG_reserved
, &mem_map
[MAP_NR(addr
)].flags
);
1147 #endif /* CONFIG_BLK_DEV_INITRD */
1149 #if defined(CONFIG_ALL_PPC)
1150 /* mark the RTAS pages as reserved */
1152 for (addr
= rtas_data
; addr
< PAGE_ALIGN(rtas_data
+rtas_size
) ;
1154 SetPageReserved(mem_map
+ MAP_NR(addr
));
1155 #endif /* defined(CONFIG_ALL_PPC) */
1157 for (addr
= (unsigned long)sysmap
;
1158 addr
< PAGE_ALIGN((unsigned long)sysmap
+sysmap_size
) ;
1160 SetPageReserved(mem_map
+ MAP_NR(addr
));
1162 for (addr
= PAGE_OFFSET
; addr
< (unsigned long)end_of_DRAM
;
1163 addr
+= PAGE_SIZE
) {
1164 if (!PageReserved(mem_map
+ MAP_NR(addr
)))
1166 if (addr
< (ulong
) etext
)
1168 else if (addr
>= (unsigned long)&__init_begin
1169 && addr
< (unsigned long)&__init_end
)
1171 else if (addr
< (ulong
) klimit
)
1175 printk("Memory: %luk available (%dk kernel code, %dk data, %dk init) [%08x,%08lx]\n",
1176 (unsigned long)nr_free_pages()<< (PAGE_SHIFT
-10),
1177 codepages
<< (PAGE_SHIFT
-10), datapages
<< (PAGE_SHIFT
-10),
1178 initpages
<< (PAGE_SHIFT
-10),
1179 PAGE_OFFSET
, (unsigned long) end_of_DRAM
);
1183 #if !defined(CONFIG_4xx) && !defined(CONFIG_8xx)
1184 #if defined(CONFIG_ALL_PPC)
1186 * On systems with Open Firmware, collect information about
1187 * physical RAM and which pieces are already in use.
1188 * At this point, we have (at least) the first 8MB mapped with a BAT.
1189 * Our text, data, bss use something over 1MB, starting at 0.
1190 * Open Firmware may be using 1MB at the 4MB point.
1192 unsigned long __init
*pmac_find_end_of_memory(void)
1194 unsigned long a
, total
;
1195 unsigned long ram_limit
= 0xf0000000 - KERNELBASE
;
1196 /* allow 0x08000000 for IO space */
1197 if ( _machine
& (_MACH_prep
|_MACH_Pmac
) )
1198 ram_limit
= 0xd8000000 - KERNELBASE
;
1203 memory_node
= find_devices("memory");
1204 if (memory_node
== NULL
) {
1205 printk(KERN_ERR
"can't find memory node\n");
1210 * Find out where physical memory is, and check that it
1211 * starts at 0 and is contiguous. It seems that RAM is
1212 * always physically contiguous on Power Macintoshes,
1213 * because MacOS can't cope if it isn't.
1215 * Supporting discontiguous physical memory isn't hard,
1216 * it just makes the virtual <-> physical mapping functions
1217 * more complicated (or else you end up wasting space
1220 get_mem_prop("reg", &phys_mem
);
1221 if (phys_mem
.n_regions
== 0)
1223 a
= phys_mem
.regions
[0].address
;
1225 panic("RAM doesn't start at physical address 0");
1226 if (__max_memory
== 0 || __max_memory
> ram_limit
)
1227 __max_memory
= ram_limit
;
1228 if (phys_mem
.regions
[0].size
>= __max_memory
) {
1229 phys_mem
.regions
[0].size
= __max_memory
;
1230 phys_mem
.n_regions
= 1;
1232 total
= phys_mem
.regions
[0].size
;
1234 if (phys_mem
.n_regions
> 1) {
1235 printk("RAM starting at 0x%x is not contiguous\n",
1236 phys_mem
.regions
[1].address
);
1237 printk("Using RAM from 0 to 0x%lx\n", total
-1);
1238 phys_mem
.n_regions
= 1;
1241 set_phys_avail(&phys_mem
);
1245 #endif /* CONFIG_ALL_PPC */
1247 #if defined(CONFIG_ALL_PPC)
1249 * This finds the amount of physical ram and does necessary
1250 * setup for prep. This is pretty architecture specific so
1251 * this will likely stay separate from the pmac.
1254 unsigned long __init
*prep_find_end_of_memory(void)
1256 unsigned long total
;
1257 total
= res
->TotalMemory
;
1262 * I need a way to probe the amount of memory if the residual
1263 * data doesn't contain it. -- Cort
1265 printk("Ramsize from residual data was 0 -- Probing for value\n");
1267 printk("Ramsize default to be %ldM\n", total
>>20);
1269 mem_pieces_append(&phys_mem
, 0, total
);
1270 set_phys_avail(&phys_mem
);
1272 return (__va(total
));
1274 #endif /* defined(CONFIG_ALL_PPC) */
1277 #if defined(CONFIG_GEMINI)
1278 unsigned long __init
*gemini_find_end_of_memory(void)
1280 unsigned long total
, *ret
;
1283 reg
= readb(GEMINI_MEMCFG
);
1284 total
= ((1<<((reg
& 0x7) - 1)) *
1285 (8<<((reg
>> 3) & 0x7)));
1286 total
*= (1024*1024);
1287 phys_mem
.regions
[0].address
= 0;
1288 phys_mem
.regions
[0].size
= total
;
1289 phys_mem
.n_regions
= 1;
1291 ret
= __va(phys_mem
.regions
[0].size
);
1292 set_phys_avail(&phys_mem
);
1295 #endif /* defined(CONFIG_GEMINI) */
1298 #define HARDWARE_MAPPED_SIZE (512*1024)
1299 unsigned long __init
*apus_find_end_of_memory(void)
1303 /* The memory size reported by ADOS excludes the 512KB
1304 reserved for PPC exception registers and possibly 512KB
1305 containing a shadow of the ADOS ROM. */
1307 unsigned long size
= memory
[0].size
;
1309 /* If 2MB aligned, size was probably user
1310 specified. We can't tell anything about shadowing
1311 in this case so skip shadow assignment. */
1312 if (0 != (size
& 0x1fffff)){
1313 /* Align to 512KB to ensure correct handling
1314 of both memfile and system specified
1316 size
= ((size
+0x0007ffff) & 0xfff80000);
1317 /* If memory is 1MB aligned, assume
1319 shadow
= !(size
& 0x80000);
1322 /* Add the chunk that ADOS does not see. by aligning
1323 the size to the nearest 2MB limit upwards. */
1324 memory
[0].size
= ((size
+0x001fffff) & 0xffe00000);
1327 /* Now register the memory block. */
1328 mem_pieces_append(&phys_mem
, memory
[0].addr
, memory
[0].size
);
1329 set_phys_avail(&phys_mem
);
1331 /* Remove the memory chunks that are controlled by special
1334 unsigned long top
= memory
[0].addr
+ memory
[0].size
;
1336 /* Remove the upper 512KB if it contains a shadow of
1337 the ADOS ROM. FIXME: It might be possible to
1338 disable this shadow HW. Check the booter
1342 top
-= HARDWARE_MAPPED_SIZE
;
1343 mem_pieces_remove(&phys_avail
, top
,
1344 HARDWARE_MAPPED_SIZE
, 0);
1347 /* Remove the upper 512KB where the PPC exception
1348 vectors are mapped. */
1349 top
-= HARDWARE_MAPPED_SIZE
;
1351 /* This would be neat, but it breaks on A3000 machines!? */
1352 mem_pieces_remove(&phys_avail
, top
, 16384, 0);
1354 mem_pieces_remove(&phys_avail
, top
, HARDWARE_MAPPED_SIZE
, 0);
1359 /* Linux/APUS only handles one block of memory -- the one on
1360 the PowerUP board. Other system memory is horrible slow in
1361 comparison. The user can use other memory for swapping
1362 using the z2ram device. */
1363 return __va(memory
[0].addr
+ memory
[0].size
);
1365 #endif /* CONFIG_APUS */
1368 * Initialize the hash table and patch the instructions in head.S.
1370 static void __init
hash_init(void)
1373 unsigned long h
, ramsize
;
1375 extern unsigned int hash_page_patch_A
[], hash_page_patch_B
[],
1376 hash_page_patch_C
[], hash_page
[];
1378 if ( ppc_md
.progress
) ppc_md
.progress("hash:enter", 0x105);
1380 * Allow 64k of hash table for every 16MB of memory,
1381 * up to a maximum of 2MB.
1383 ramsize
= (ulong
)end_of_DRAM
- KERNELBASE
;
1384 for (h
= 64<<10; h
< ramsize
/ 256 && h
< 2<<20; h
*= 2)
1388 Hash_mask
= (h
>> 7) - 1;
1390 Hash_mask
= (h
>> 6) - 1;
1393 /* shrink the htab since we don't use it on 603's -- Cort */
1394 switch (_get_PVR()>>16) {
1402 /* on 601/4 let things be */
1406 if ( ppc_md
.progress
) ppc_md
.progress("hash:find piece", 0x322);
1407 /* Find some memory for the hash table. */
1409 Hash
= mem_pieces_find(Hash_size
, Hash_size
);
1413 printk("Total memory = %ldMB; using %ldkB for hash table (at %p)\n",
1414 ramsize
>> 20, Hash_size
>> 10, Hash
);
1417 if ( ppc_md
.progress
) ppc_md
.progress("hash:patch", 0x345);
1418 Hash_end
= (PTE
*) ((unsigned long)Hash
+ Hash_size
);
1419 /*__clear_user(Hash, Hash_size);*/
1422 * Patch up the instructions in head.S:hash_page
1425 Hash_bits
= ffz(~Hash_size
) - 7;
1427 Hash_bits
= ffz(~Hash_size
) - 6;
1429 hash_page_patch_A
[0] = (hash_page_patch_A
[0] & ~0xffff)
1430 | (__pa(Hash
) >> 16);
1431 hash_page_patch_A
[1] = (hash_page_patch_A
[1] & ~0x7c0)
1432 | ((26 - Hash_bits
) << 6);
1435 hash_page_patch_A
[2] = (hash_page_patch_A
[2] & ~0x7c0)
1436 | ((26 - Hash_bits
) << 6);
1437 hash_page_patch_B
[0] = (hash_page_patch_B
[0] & ~0xffff)
1439 | (Hash_mask
>> 11);
1441 | (Hash_mask
>> 10);
1443 hash_page_patch_C
[0] = (hash_page_patch_C
[0] & ~0xffff)
1445 | (Hash_mask
>> 11);
1447 | (Hash_mask
>> 10);
1449 #if 0 /* see hash_page in head.S, note also patch_C ref below */
1450 hash_page_patch_D
[0] = (hash_page_patch_D
[0] & ~0xffff)
1451 | (Hash_mask
>> 10);
1454 * Ensure that the locations we've patched have been written
1455 * out from the data cache and invalidated in the instruction
1456 * cache, on those machines with split caches.
1458 flush_icache_range((unsigned long) &hash_page_patch_A
[0],
1459 (unsigned long) &hash_page_patch_C
[1]);
1464 * Put a blr (procedure return) instruction at the
1465 * start of hash_page, since we can still get DSI
1466 * exceptions on a 603.
1468 hash_page
[0] = 0x4e800020;
1469 flush_icache_range((unsigned long) &hash_page
[0],
1470 (unsigned long) &hash_page
[1]);
1472 if ( ppc_md
.progress
) ppc_md
.progress("hash:done", 0x205);
1474 #elif defined(CONFIG_8xx)
1476 * This is a big hack right now, but it may turn into something real
1479 * For the 8xx boards (at this time anyway), there is nothing to initialize
1480 * associated the PROM. Rather than include all of the prom.c
1481 * functions in the image just to get prom_init, all we really need right
1482 * now is the initialization of the physical memory region.
1484 unsigned long __init
*m8xx_find_end_of_memory(void)
1488 extern unsigned char __res
[];
1490 binfo
= (bd_t
*)__res
;
1492 phys_mem
.regions
[0].address
= 0;
1493 phys_mem
.regions
[0].size
= binfo
->bi_memsize
;
1494 phys_mem
.n_regions
= 1;
1496 ret
= __va(phys_mem
.regions
[0].address
+
1497 phys_mem
.regions
[0].size
);
1499 set_phys_avail(&phys_mem
);
1502 #endif /* !CONFIG_4xx && !CONFIG_8xx */
1506 * Return the virtual address representing the top of physical RAM
1509 unsigned long __init
*
1510 oak_find_end_of_memory(void)
1512 extern unsigned char __res
[];
1515 bd_t
*bip
= (bd_t
*)__res
;
1517 phys_mem
.regions
[0].address
= 0;
1518 phys_mem
.regions
[0].size
= bip
->bi_memsize
;
1519 phys_mem
.n_regions
= 1;
1521 ret
= __va(phys_mem
.regions
[0].address
+
1522 phys_mem
.regions
[0].size
);
1524 set_phys_avail(&phys_mem
);