Import 2.1.116pre2
[davej-history.git] / mm / page_alloc.c
blobbd237cd8fbebf6aedf1669db58f5c1d624db33b5
1 /*
2 * linux/mm/page_alloc.c
4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
5 * Swap reorganised 29.12.95, Stephen Tweedie
6 */
8 #include <linux/config.h>
9 #include <linux/mm.h>
10 #include <linux/sched.h>
11 #include <linux/head.h>
12 #include <linux/kernel.h>
13 #include <linux/kernel_stat.h>
14 #include <linux/errno.h>
15 #include <linux/string.h>
16 #include <linux/stat.h>
17 #include <linux/swap.h>
18 #include <linux/fs.h>
19 #include <linux/swapctl.h>
20 #include <linux/interrupt.h>
21 #include <linux/init.h>
22 #include <linux/pagemap.h>
24 #include <asm/dma.h>
25 #include <asm/system.h> /* for cli()/sti() */
26 #include <asm/uaccess.h> /* for copy_to/from_user */
27 #include <asm/bitops.h>
28 #include <asm/pgtable.h>
29 #include <asm/spinlock.h>
31 int nr_swap_pages = 0;
32 int nr_free_pages = 0;
35 * Free area management
37 * The free_area_list arrays point to the queue heads of the free areas
38 * of different sizes
41 #if CONFIG_AP1000
42 /* the AP+ needs to allocate 8MB contiguous, aligned chunks of ram
43 for the ring buffers */
44 #define NR_MEM_LISTS 12
45 #else
46 #define NR_MEM_LISTS 6
47 #endif
49 /* The start of this MUST match the start of "struct page" */
50 struct free_area_struct {
51 struct page *next;
52 struct page *prev;
53 unsigned int * map;
56 #define memory_head(x) ((struct page *)(x))
58 static struct free_area_struct free_area[NR_MEM_LISTS];
60 static inline void init_mem_queue(struct free_area_struct * head)
62 head->next = memory_head(head);
63 head->prev = memory_head(head);
66 static inline void add_mem_queue(struct free_area_struct * head, struct page * entry)
68 struct page * next = head->next;
70 entry->prev = memory_head(head);
71 entry->next = next;
72 next->prev = entry;
73 head->next = entry;
76 static inline void remove_mem_queue(struct page * entry)
78 struct page * next = entry->next;
79 struct page * prev = entry->prev;
80 next->prev = prev;
81 prev->next = next;
85 * Free_page() adds the page to the free lists. This is optimized for
86 * fast normal cases (no error jumps taken normally).
88 * The way to optimize jumps for gcc-2.2.2 is to:
89 * - select the "normal" case and put it inside the if () { XXX }
90 * - no else-statements if you can avoid them
92 * With the above two rules, you get a straight-line execution path
93 * for the normal case, giving better asm-code.
97 * Buddy system. Hairy. You really aren't expected to understand this
99 * Hint: -mask = 1+~mask
101 spinlock_t page_alloc_lock = SPIN_LOCK_UNLOCKED;
104 * This routine is used by the kernel swap daemon to determine
105 * whether we have "enough" free pages. It is fairly arbitrary,
106 * having a low-water and high-water mark.
108 * This returns:
109 * 0 - urgent need for memory
110 * 1 - need some memory, but do it slowly in the background
111 * 2 - no need to even think about it.
113 int free_memory_available(void)
115 static int available = 1;
117 if (nr_free_pages < freepages.low) {
118 available = 0;
119 return 0;
122 if (nr_free_pages > freepages.high) {
123 available = 1;
124 return 2;
127 return available;
130 static inline void free_pages_ok(unsigned long map_nr, unsigned long order)
132 struct free_area_struct *area = free_area + order;
133 unsigned long index = map_nr >> (1 + order);
134 unsigned long mask = (~0UL) << order;
135 unsigned long flags;
137 spin_lock_irqsave(&page_alloc_lock, flags);
139 #define list(x) (mem_map+(x))
141 map_nr &= mask;
142 nr_free_pages -= mask;
143 while (mask + (1 << (NR_MEM_LISTS-1))) {
144 if (!test_and_change_bit(index, area->map))
145 break;
146 remove_mem_queue(list(map_nr ^ -mask));
147 mask <<= 1;
148 area++;
149 index >>= 1;
150 map_nr &= mask;
152 add_mem_queue(area, list(map_nr));
154 #undef list
156 spin_unlock_irqrestore(&page_alloc_lock, flags);
159 void __free_page(struct page *page)
161 if (!PageReserved(page) && atomic_dec_and_test(&page->count)) {
162 if (PageSwapCache(page))
163 panic ("Freeing swap cache page");
164 free_pages_ok(page->map_nr, 0);
165 return;
167 if (PageSwapCache(page) && atomic_read(&page->count) == 1)
168 printk(KERN_WARNING "VM: Releasing swap cache page at %p",
169 __builtin_return_address(0));
172 void free_pages(unsigned long addr, unsigned long order)
174 unsigned long map_nr = MAP_NR(addr);
176 if (map_nr < max_mapnr) {
177 mem_map_t * map = mem_map + map_nr;
178 if (PageReserved(map))
179 return;
180 if (atomic_dec_and_test(&map->count)) {
181 if (PageSwapCache(map))
182 panic ("Freeing swap cache pages");
183 free_pages_ok(map_nr, order);
184 return;
186 if (PageSwapCache(map) && atomic_read(&map->count) == 1)
187 printk(KERN_WARNING
188 "VM: Releasing swap cache pages at %p",
189 __builtin_return_address(0));
194 * Some ugly macros to speed up __get_free_pages()..
196 #define MARK_USED(index, order, area) \
197 change_bit((index) >> (1+(order)), (area)->map)
198 #define CAN_DMA(x) (PageDMA(x))
199 #define ADDRESS(x) (PAGE_OFFSET + ((x) << PAGE_SHIFT))
200 #define RMQUEUE(order, dma) \
201 do { struct free_area_struct * area = free_area+order; \
202 unsigned long new_order = order; \
203 do { struct page *prev = memory_head(area), *ret = prev->next; \
204 while (memory_head(area) != ret) { \
205 if (!dma || CAN_DMA(ret)) { \
206 unsigned long map_nr = ret->map_nr; \
207 (prev->next = ret->next)->prev = prev; \
208 MARK_USED(map_nr, new_order, area); \
209 nr_free_pages -= 1 << order; \
210 EXPAND(ret, map_nr, order, new_order, area); \
211 spin_unlock_irqrestore(&page_alloc_lock, flags); \
212 return ADDRESS(map_nr); \
214 prev = ret; \
215 ret = ret->next; \
217 new_order++; area++; \
218 } while (new_order < NR_MEM_LISTS); \
219 } while (0)
221 #define EXPAND(map,index,low,high,area) \
222 do { unsigned long size = 1 << high; \
223 while (high > low) { \
224 area--; high--; size >>= 1; \
225 add_mem_queue(area, map); \
226 MARK_USED(index, high, area); \
227 index += size; \
228 map += size; \
230 atomic_set(&map->count, 1); \
231 map->age = PAGE_INITIAL_AGE; \
232 } while (0)
234 unsigned long __get_free_pages(int gfp_mask, unsigned long order)
236 unsigned long flags;
238 if (order >= NR_MEM_LISTS)
239 goto nopage;
241 if (gfp_mask & __GFP_WAIT) {
242 __check_locks(1);
243 if (in_interrupt()) {
244 static int count = 0;
245 if (++count < 5) {
246 printk("gfp called nonatomically from interrupt %p\n",
247 __builtin_return_address(0));
249 goto nopage;
252 if (freepages.min > nr_free_pages) {
253 int freed;
254 freed = try_to_free_pages(gfp_mask, SWAP_CLUSTER_MAX);
256 * Low priority (user) allocations must not
257 * succeed if we didn't have enough memory
258 * and we couldn't get more..
260 if (!freed && !(gfp_mask & (__GFP_MED | __GFP_HIGH)))
261 goto nopage;
264 spin_lock_irqsave(&page_alloc_lock, flags);
265 RMQUEUE(order, (gfp_mask & GFP_DMA));
266 spin_unlock_irqrestore(&page_alloc_lock, flags);
267 nopage:
268 return 0;
272 * Show free area list (used inside shift_scroll-lock stuff)
273 * We also calculate the percentage fragmentation. We do this by counting the
274 * memory on each free list with the exception of the first item on the list.
276 void show_free_areas(void)
278 unsigned long order, flags;
279 unsigned long total = 0;
281 printk("Free pages: %6dkB\n ( ",nr_free_pages<<(PAGE_SHIFT-10));
282 printk("Free: %d (%d %d %d)\n",
283 nr_free_pages,
284 freepages.min,
285 freepages.low,
286 freepages.high);
287 spin_lock_irqsave(&page_alloc_lock, flags);
288 for (order=0 ; order < NR_MEM_LISTS; order++) {
289 struct page * tmp;
290 unsigned long nr = 0;
291 for (tmp = free_area[order].next ; tmp != memory_head(free_area+order) ; tmp = tmp->next) {
292 nr ++;
294 total += nr * ((PAGE_SIZE>>10) << order);
295 printk("%lu*%lukB ", nr, (unsigned long)((PAGE_SIZE>>10) << order));
297 spin_unlock_irqrestore(&page_alloc_lock, flags);
298 printk("= %lukB)\n", total);
299 #ifdef SWAP_CACHE_INFO
300 show_swap_cache_info();
301 #endif
304 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
307 * set up the free-area data structures:
308 * - mark all pages reserved
309 * - mark all memory queues empty
310 * - clear the memory bitmaps
312 __initfunc(unsigned long free_area_init(unsigned long start_mem, unsigned long end_mem))
314 mem_map_t * p;
315 unsigned long mask = PAGE_MASK;
316 unsigned long i;
319 * Select nr of pages we try to keep free for important stuff
320 * with a minimum of 10 pages and a maximum of 256 pages, so
321 * that we don't waste too much memory on large systems.
322 * This is fairly arbitrary, but based on some behaviour
323 * analysis.
325 i = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT+7);
326 if (i < 10)
327 i = 10;
328 if (i > 256)
329 i = 256;
330 freepages.min = i;
331 freepages.low = i * 2;
332 freepages.high = i * 3;
333 mem_map = (mem_map_t *) LONG_ALIGN(start_mem);
334 p = mem_map + MAP_NR(end_mem);
335 start_mem = LONG_ALIGN((unsigned long) p);
336 memset(mem_map, 0, start_mem - (unsigned long) mem_map);
337 do {
338 --p;
339 atomic_set(&p->count, 0);
340 p->flags = (1 << PG_DMA) | (1 << PG_reserved);
341 p->map_nr = p - mem_map;
342 } while (p > mem_map);
344 for (i = 0 ; i < NR_MEM_LISTS ; i++) {
345 unsigned long bitmap_size;
346 init_mem_queue(free_area+i);
347 mask += mask;
348 end_mem = (end_mem + ~mask) & mask;
349 bitmap_size = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT + i);
350 bitmap_size = (bitmap_size + 7) >> 3;
351 bitmap_size = LONG_ALIGN(bitmap_size);
352 free_area[i].map = (unsigned int *) start_mem;
353 memset((void *) start_mem, 0, bitmap_size);
354 start_mem += bitmap_size;
356 return start_mem;
360 * The tests may look silly, but it essentially makes sure that
361 * no other process did a swap-in on us just as we were waiting.
363 * Also, don't bother to add to the swap cache if this page-in
364 * was due to a write access.
366 void swap_in(struct task_struct * tsk, struct vm_area_struct * vma,
367 pte_t * page_table, unsigned long entry, int write_access)
369 unsigned long page;
370 struct page *page_map;
372 page_map = read_swap_cache(entry);
374 if (pte_val(*page_table) != entry) {
375 if (page_map)
376 free_page_and_swap_cache(page_address(page_map));
377 return;
379 if (!page_map) {
380 set_pte(page_table, BAD_PAGE);
381 swap_free(entry);
382 oom(tsk);
383 return;
386 page = page_address(page_map);
387 vma->vm_mm->rss++;
388 tsk->min_flt++;
389 swap_free(entry);
391 if (!write_access || is_page_shared(page_map)) {
392 set_pte(page_table, mk_pte(page, vma->vm_page_prot));
393 return;
396 /* The page is unshared, and we want write access. In this
397 case, it is safe to tear down the swap cache and give the
398 page over entirely to this process. */
400 delete_from_swap_cache(page_map);
401 set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
402 return;