Import 2.1.118
[davej-history.git] / mm / page_alloc.c
blob2db2a66e3de09244bd95586c4a977586e742621d
1 /*
2 * linux/mm/page_alloc.c
4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
5 * Swap reorganised 29.12.95, Stephen Tweedie
6 */
8 #include <linux/config.h>
9 #include <linux/mm.h>
10 #include <linux/sched.h>
11 #include <linux/head.h>
12 #include <linux/kernel.h>
13 #include <linux/kernel_stat.h>
14 #include <linux/errno.h>
15 #include <linux/string.h>
16 #include <linux/stat.h>
17 #include <linux/swap.h>
18 #include <linux/fs.h>
19 #include <linux/swapctl.h>
20 #include <linux/interrupt.h>
21 #include <linux/init.h>
22 #include <linux/pagemap.h>
24 #include <asm/dma.h>
25 #include <asm/system.h> /* for cli()/sti() */
26 #include <asm/uaccess.h> /* for copy_to/from_user */
27 #include <asm/bitops.h>
28 #include <asm/pgtable.h>
29 #include <asm/spinlock.h>
31 int nr_swap_pages = 0;
32 int nr_free_pages = 0;
35 * Free area management
37 * The free_area_list arrays point to the queue heads of the free areas
38 * of different sizes
41 #if CONFIG_AP1000
42 /* the AP+ needs to allocate 8MB contiguous, aligned chunks of ram
43 for the ring buffers */
44 #define NR_MEM_LISTS 12
45 #else
46 #define NR_MEM_LISTS 6
47 #endif
49 /* The start of this MUST match the start of "struct page" */
50 struct free_area_struct {
51 struct page *next;
52 struct page *prev;
53 unsigned int * map;
56 #define memory_head(x) ((struct page *)(x))
58 static struct free_area_struct free_area[NR_MEM_LISTS];
60 static inline void init_mem_queue(struct free_area_struct * head)
62 head->next = memory_head(head);
63 head->prev = memory_head(head);
66 static inline void add_mem_queue(struct free_area_struct * head, struct page * entry)
68 struct page * next = head->next;
70 entry->prev = memory_head(head);
71 entry->next = next;
72 next->prev = entry;
73 head->next = entry;
76 static inline void remove_mem_queue(struct page * entry)
78 struct page * next = entry->next;
79 struct page * prev = entry->prev;
80 next->prev = prev;
81 prev->next = next;
85 * Free_page() adds the page to the free lists. This is optimized for
86 * fast normal cases (no error jumps taken normally).
88 * The way to optimize jumps for gcc-2.2.2 is to:
89 * - select the "normal" case and put it inside the if () { XXX }
90 * - no else-statements if you can avoid them
92 * With the above two rules, you get a straight-line execution path
93 * for the normal case, giving better asm-code.
97 * Buddy system. Hairy. You really aren't expected to understand this
99 * Hint: -mask = 1+~mask
101 spinlock_t page_alloc_lock = SPIN_LOCK_UNLOCKED;
104 * This routine is used by the kernel swap daemon to determine
105 * whether we have "enough" free pages. It is fairly arbitrary,
106 * having a low-water and high-water mark.
108 * This returns:
109 * 0 - urgent need for memory
110 * 1 - need some memory, but do it slowly in the background
111 * 2 - no need to even think about it.
113 int free_memory_available(void)
115 static int available = 1;
117 if (nr_free_pages < freepages.low) {
118 available = 0;
119 return 0;
122 if (nr_free_pages > freepages.high) {
123 available = 1;
124 return 2;
127 return available;
130 static inline void free_pages_ok(unsigned long map_nr, unsigned long order)
132 struct free_area_struct *area = free_area + order;
133 unsigned long index = map_nr >> (1 + order);
134 unsigned long mask = (~0UL) << order;
135 unsigned long flags;
137 spin_lock_irqsave(&page_alloc_lock, flags);
139 #define list(x) (mem_map+(x))
141 map_nr &= mask;
142 nr_free_pages -= mask;
143 while (mask + (1 << (NR_MEM_LISTS-1))) {
144 if (!test_and_change_bit(index, area->map))
145 break;
146 remove_mem_queue(list(map_nr ^ -mask));
147 mask <<= 1;
148 area++;
149 index >>= 1;
150 map_nr &= mask;
152 add_mem_queue(area, list(map_nr));
154 #undef list
156 spin_unlock_irqrestore(&page_alloc_lock, flags);
159 void __free_page(struct page *page)
161 if (!PageReserved(page) && atomic_dec_and_test(&page->count)) {
162 if (PageSwapCache(page))
163 panic ("Freeing swap cache page");
164 free_pages_ok(page->map_nr, 0);
165 return;
167 if (PageSwapCache(page) && atomic_read(&page->count) == 1)
168 printk(KERN_WARNING "VM: Releasing swap cache page at %p",
169 __builtin_return_address(0));
172 void free_pages(unsigned long addr, unsigned long order)
174 unsigned long map_nr = MAP_NR(addr);
176 if (map_nr < max_mapnr) {
177 mem_map_t * map = mem_map + map_nr;
178 if (PageReserved(map))
179 return;
180 if (atomic_dec_and_test(&map->count)) {
181 if (PageSwapCache(map))
182 panic ("Freeing swap cache pages");
183 free_pages_ok(map_nr, order);
184 return;
186 if (PageSwapCache(map) && atomic_read(&map->count) == 1)
187 printk(KERN_WARNING
188 "VM: Releasing swap cache pages at %p",
189 __builtin_return_address(0));
194 * Some ugly macros to speed up __get_free_pages()..
196 #define MARK_USED(index, order, area) \
197 change_bit((index) >> (1+(order)), (area)->map)
198 #define CAN_DMA(x) (PageDMA(x))
199 #define ADDRESS(x) (PAGE_OFFSET + ((x) << PAGE_SHIFT))
200 #define RMQUEUE(order, dma) \
201 do { struct free_area_struct * area = free_area+order; \
202 unsigned long new_order = order; \
203 do { struct page *prev = memory_head(area), *ret = prev->next; \
204 while (memory_head(area) != ret) { \
205 if (!dma || CAN_DMA(ret)) { \
206 unsigned long map_nr = ret->map_nr; \
207 (prev->next = ret->next)->prev = prev; \
208 MARK_USED(map_nr, new_order, area); \
209 nr_free_pages -= 1 << order; \
210 EXPAND(ret, map_nr, order, new_order, area); \
211 spin_unlock_irqrestore(&page_alloc_lock, flags); \
212 return ADDRESS(map_nr); \
214 prev = ret; \
215 ret = ret->next; \
217 new_order++; area++; \
218 } while (new_order < NR_MEM_LISTS); \
219 } while (0)
221 #define EXPAND(map,index,low,high,area) \
222 do { unsigned long size = 1 << high; \
223 while (high > low) { \
224 area--; high--; size >>= 1; \
225 add_mem_queue(area, map); \
226 MARK_USED(index, high, area); \
227 index += size; \
228 map += size; \
230 atomic_set(&map->count, 1); \
231 map->age = PAGE_INITIAL_AGE; \
232 } while (0)
234 unsigned long __get_free_pages(int gfp_mask, unsigned long order)
236 unsigned long flags;
238 if (order >= NR_MEM_LISTS)
239 goto nopage;
241 if (gfp_mask & __GFP_WAIT) {
242 if (in_interrupt()) {
243 static int count = 0;
244 if (++count < 5) {
245 printk("gfp called nonatomically from interrupt %p\n",
246 __builtin_return_address(0));
248 goto nopage;
251 if (freepages.min > nr_free_pages) {
252 int freed;
253 freed = try_to_free_pages(gfp_mask, SWAP_CLUSTER_MAX);
255 * Low priority (user) allocations must not
256 * succeed if we didn't have enough memory
257 * and we couldn't get more..
259 if (!freed && !(gfp_mask & (__GFP_MED | __GFP_HIGH)))
260 goto nopage;
263 spin_lock_irqsave(&page_alloc_lock, flags);
264 RMQUEUE(order, (gfp_mask & GFP_DMA));
265 spin_unlock_irqrestore(&page_alloc_lock, flags);
268 * If we failed to find anything, we'll return NULL, but we'll
269 * wake up kswapd _now_ ad even wait for it synchronously if
270 * we can.. This way we'll at least make some forward progress
271 * over time.
273 wake_up(&kswapd_wait);
274 if (gfp_mask & __GFP_WAIT)
275 schedule();
276 nopage:
277 return 0;
281 * Show free area list (used inside shift_scroll-lock stuff)
282 * We also calculate the percentage fragmentation. We do this by counting the
283 * memory on each free list with the exception of the first item on the list.
285 void show_free_areas(void)
287 unsigned long order, flags;
288 unsigned long total = 0;
290 printk("Free pages: %6dkB\n ( ",nr_free_pages<<(PAGE_SHIFT-10));
291 printk("Free: %d (%d %d %d)\n",
292 nr_free_pages,
293 freepages.min,
294 freepages.low,
295 freepages.high);
296 spin_lock_irqsave(&page_alloc_lock, flags);
297 for (order=0 ; order < NR_MEM_LISTS; order++) {
298 struct page * tmp;
299 unsigned long nr = 0;
300 for (tmp = free_area[order].next ; tmp != memory_head(free_area+order) ; tmp = tmp->next) {
301 nr ++;
303 total += nr * ((PAGE_SIZE>>10) << order);
304 printk("%lu*%lukB ", nr, (unsigned long)((PAGE_SIZE>>10) << order));
306 spin_unlock_irqrestore(&page_alloc_lock, flags);
307 printk("= %lukB)\n", total);
308 #ifdef SWAP_CACHE_INFO
309 show_swap_cache_info();
310 #endif
313 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
316 * set up the free-area data structures:
317 * - mark all pages reserved
318 * - mark all memory queues empty
319 * - clear the memory bitmaps
321 unsigned long __init free_area_init(unsigned long start_mem, unsigned long end_mem)
323 mem_map_t * p;
324 unsigned long mask = PAGE_MASK;
325 unsigned long i;
328 * Select nr of pages we try to keep free for important stuff
329 * with a minimum of 10 pages and a maximum of 256 pages, so
330 * that we don't waste too much memory on large systems.
331 * This is fairly arbitrary, but based on some behaviour
332 * analysis.
334 i = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT+7);
335 if (i < 10)
336 i = 10;
337 if (i > 256)
338 i = 256;
339 freepages.min = i;
340 freepages.low = i * 2;
341 freepages.high = i * 3;
342 mem_map = (mem_map_t *) LONG_ALIGN(start_mem);
343 p = mem_map + MAP_NR(end_mem);
344 start_mem = LONG_ALIGN((unsigned long) p);
345 memset(mem_map, 0, start_mem - (unsigned long) mem_map);
346 do {
347 --p;
348 atomic_set(&p->count, 0);
349 p->flags = (1 << PG_DMA) | (1 << PG_reserved);
350 p->map_nr = p - mem_map;
351 } while (p > mem_map);
353 for (i = 0 ; i < NR_MEM_LISTS ; i++) {
354 unsigned long bitmap_size;
355 init_mem_queue(free_area+i);
356 mask += mask;
357 end_mem = (end_mem + ~mask) & mask;
358 bitmap_size = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT + i);
359 bitmap_size = (bitmap_size + 7) >> 3;
360 bitmap_size = LONG_ALIGN(bitmap_size);
361 free_area[i].map = (unsigned int *) start_mem;
362 memset((void *) start_mem, 0, bitmap_size);
363 start_mem += bitmap_size;
365 return start_mem;
369 * The tests may look silly, but it essentially makes sure that
370 * no other process did a swap-in on us just as we were waiting.
372 * Also, don't bother to add to the swap cache if this page-in
373 * was due to a write access.
375 void swap_in(struct task_struct * tsk, struct vm_area_struct * vma,
376 pte_t * page_table, unsigned long entry, int write_access)
378 unsigned long page;
379 struct page *page_map;
381 page_map = read_swap_cache(entry);
383 if (pte_val(*page_table) != entry) {
384 if (page_map)
385 free_page_and_swap_cache(page_address(page_map));
386 return;
388 if (!page_map) {
389 set_pte(page_table, BAD_PAGE);
390 swap_free(entry);
391 oom(tsk);
392 return;
395 page = page_address(page_map);
396 vma->vm_mm->rss++;
397 tsk->min_flt++;
398 swap_free(entry);
400 if (!write_access || is_page_shared(page_map)) {
401 set_pte(page_table, mk_pte(page, vma->vm_page_prot));
402 return;
405 /* The page is unshared, and we want write access. In this
406 case, it is safe to tear down the swap cache and give the
407 page over entirely to this process. */
409 delete_from_swap_cache(page_map);
410 set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
411 return;