2 * linux/mm/page_alloc.c
4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
5 * Swap reorganised 29.12.95, Stephen Tweedie
8 #include <linux/config.h>
10 #include <linux/sched.h>
11 #include <linux/head.h>
12 #include <linux/kernel.h>
13 #include <linux/kernel_stat.h>
14 #include <linux/errno.h>
15 #include <linux/string.h>
16 #include <linux/stat.h>
17 #include <linux/swap.h>
19 #include <linux/swapctl.h>
20 #include <linux/interrupt.h>
21 #include <linux/init.h>
22 #include <linux/pagemap.h>
25 #include <asm/system.h> /* for cli()/sti() */
26 #include <asm/uaccess.h> /* for copy_to/from_user */
27 #include <asm/bitops.h>
28 #include <asm/pgtable.h>
29 #include <asm/spinlock.h>
31 int nr_swap_pages
= 0;
32 int nr_free_pages
= 0;
35 * Free area management
37 * The free_area_list arrays point to the queue heads of the free areas
42 /* the AP+ needs to allocate 8MB contiguous, aligned chunks of ram
43 for the ring buffers */
44 #define NR_MEM_LISTS 12
46 #define NR_MEM_LISTS 6
49 /* The start of this MUST match the start of "struct page" */
50 struct free_area_struct
{
56 #define memory_head(x) ((struct page *)(x))
58 static struct free_area_struct free_area
[NR_MEM_LISTS
];
60 static inline void init_mem_queue(struct free_area_struct
* head
)
62 head
->next
= memory_head(head
);
63 head
->prev
= memory_head(head
);
66 static inline void add_mem_queue(struct free_area_struct
* head
, struct page
* entry
)
68 struct page
* next
= head
->next
;
70 entry
->prev
= memory_head(head
);
76 static inline void remove_mem_queue(struct page
* entry
)
78 struct page
* next
= entry
->next
;
79 struct page
* prev
= entry
->prev
;
85 * Free_page() adds the page to the free lists. This is optimized for
86 * fast normal cases (no error jumps taken normally).
88 * The way to optimize jumps for gcc-2.2.2 is to:
89 * - select the "normal" case and put it inside the if () { XXX }
90 * - no else-statements if you can avoid them
92 * With the above two rules, you get a straight-line execution path
93 * for the normal case, giving better asm-code.
97 * Buddy system. Hairy. You really aren't expected to understand this
99 * Hint: -mask = 1+~mask
101 spinlock_t page_alloc_lock
= SPIN_LOCK_UNLOCKED
;
104 * This routine is used by the kernel swap daemon to determine
105 * whether we have "enough" free pages. It is fairly arbitrary,
106 * having a low-water and high-water mark.
109 * 0 - urgent need for memory
110 * 1 - need some memory, but do it slowly in the background
111 * 2 - no need to even think about it.
113 int free_memory_available(void)
115 static int available
= 1;
117 if (nr_free_pages
< freepages
.low
) {
122 if (nr_free_pages
> freepages
.high
) {
130 static inline void free_pages_ok(unsigned long map_nr
, unsigned long order
)
132 struct free_area_struct
*area
= free_area
+ order
;
133 unsigned long index
= map_nr
>> (1 + order
);
134 unsigned long mask
= (~0UL) << order
;
137 spin_lock_irqsave(&page_alloc_lock
, flags
);
139 #define list(x) (mem_map+(x))
142 nr_free_pages
-= mask
;
143 while (mask
+ (1 << (NR_MEM_LISTS
-1))) {
144 if (!test_and_change_bit(index
, area
->map
))
146 remove_mem_queue(list(map_nr
^ -mask
));
152 add_mem_queue(area
, list(map_nr
));
156 spin_unlock_irqrestore(&page_alloc_lock
, flags
);
159 void __free_page(struct page
*page
)
161 if (!PageReserved(page
) && atomic_dec_and_test(&page
->count
)) {
162 if (PageSwapCache(page
))
163 panic ("Freeing swap cache page");
164 free_pages_ok(page
->map_nr
, 0);
167 if (PageSwapCache(page
) && atomic_read(&page
->count
) == 1)
168 printk(KERN_WARNING
"VM: Releasing swap cache page at %p",
169 __builtin_return_address(0));
172 void free_pages(unsigned long addr
, unsigned long order
)
174 unsigned long map_nr
= MAP_NR(addr
);
176 if (map_nr
< max_mapnr
) {
177 mem_map_t
* map
= mem_map
+ map_nr
;
178 if (PageReserved(map
))
180 if (atomic_dec_and_test(&map
->count
)) {
181 if (PageSwapCache(map
))
182 panic ("Freeing swap cache pages");
183 free_pages_ok(map_nr
, order
);
186 if (PageSwapCache(map
) && atomic_read(&map
->count
) == 1)
188 "VM: Releasing swap cache pages at %p",
189 __builtin_return_address(0));
194 * Some ugly macros to speed up __get_free_pages()..
196 #define MARK_USED(index, order, area) \
197 change_bit((index) >> (1+(order)), (area)->map)
198 #define CAN_DMA(x) (PageDMA(x))
199 #define ADDRESS(x) (PAGE_OFFSET + ((x) << PAGE_SHIFT))
200 #define RMQUEUE(order, dma) \
201 do { struct free_area_struct * area = free_area+order; \
202 unsigned long new_order = order; \
203 do { struct page *prev = memory_head(area), *ret = prev->next; \
204 while (memory_head(area) != ret) { \
205 if (!dma || CAN_DMA(ret)) { \
206 unsigned long map_nr = ret->map_nr; \
207 (prev->next = ret->next)->prev = prev; \
208 MARK_USED(map_nr, new_order, area); \
209 nr_free_pages -= 1 << order; \
210 EXPAND(ret, map_nr, order, new_order, area); \
211 spin_unlock_irqrestore(&page_alloc_lock, flags); \
212 return ADDRESS(map_nr); \
217 new_order++; area++; \
218 } while (new_order < NR_MEM_LISTS); \
221 #define EXPAND(map,index,low,high,area) \
222 do { unsigned long size = 1 << high; \
223 while (high > low) { \
224 area--; high--; size >>= 1; \
225 add_mem_queue(area, map); \
226 MARK_USED(index, high, area); \
230 atomic_set(&map->count, 1); \
231 map->age = PAGE_INITIAL_AGE; \
234 unsigned long __get_free_pages(int gfp_mask
, unsigned long order
)
238 if (order
>= NR_MEM_LISTS
)
241 if (gfp_mask
& __GFP_WAIT
) {
242 if (in_interrupt()) {
243 static int count
= 0;
245 printk("gfp called nonatomically from interrupt %p\n",
246 __builtin_return_address(0));
251 if (freepages
.min
> nr_free_pages
) {
253 freed
= try_to_free_pages(gfp_mask
, SWAP_CLUSTER_MAX
);
255 * Low priority (user) allocations must not
256 * succeed if we didn't have enough memory
257 * and we couldn't get more..
259 if (!freed
&& !(gfp_mask
& (__GFP_MED
| __GFP_HIGH
)))
263 spin_lock_irqsave(&page_alloc_lock
, flags
);
264 RMQUEUE(order
, (gfp_mask
& GFP_DMA
));
265 spin_unlock_irqrestore(&page_alloc_lock
, flags
);
268 * If we failed to find anything, we'll return NULL, but we'll
269 * wake up kswapd _now_ ad even wait for it synchronously if
270 * we can.. This way we'll at least make some forward progress
273 wake_up(&kswapd_wait
);
274 if (gfp_mask
& __GFP_WAIT
)
281 * Show free area list (used inside shift_scroll-lock stuff)
282 * We also calculate the percentage fragmentation. We do this by counting the
283 * memory on each free list with the exception of the first item on the list.
285 void show_free_areas(void)
287 unsigned long order
, flags
;
288 unsigned long total
= 0;
290 printk("Free pages: %6dkB\n ( ",nr_free_pages
<<(PAGE_SHIFT
-10));
291 printk("Free: %d (%d %d %d)\n",
296 spin_lock_irqsave(&page_alloc_lock
, flags
);
297 for (order
=0 ; order
< NR_MEM_LISTS
; order
++) {
299 unsigned long nr
= 0;
300 for (tmp
= free_area
[order
].next
; tmp
!= memory_head(free_area
+order
) ; tmp
= tmp
->next
) {
303 total
+= nr
* ((PAGE_SIZE
>>10) << order
);
304 printk("%lu*%lukB ", nr
, (unsigned long)((PAGE_SIZE
>>10) << order
));
306 spin_unlock_irqrestore(&page_alloc_lock
, flags
);
307 printk("= %lukB)\n", total
);
308 #ifdef SWAP_CACHE_INFO
309 show_swap_cache_info();
313 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
316 * set up the free-area data structures:
317 * - mark all pages reserved
318 * - mark all memory queues empty
319 * - clear the memory bitmaps
321 unsigned long __init
free_area_init(unsigned long start_mem
, unsigned long end_mem
)
324 unsigned long mask
= PAGE_MASK
;
328 * Select nr of pages we try to keep free for important stuff
329 * with a minimum of 10 pages and a maximum of 256 pages, so
330 * that we don't waste too much memory on large systems.
331 * This is fairly arbitrary, but based on some behaviour
334 i
= (end_mem
- PAGE_OFFSET
) >> (PAGE_SHIFT
+7);
340 freepages
.low
= i
* 2;
341 freepages
.high
= i
* 3;
342 mem_map
= (mem_map_t
*) LONG_ALIGN(start_mem
);
343 p
= mem_map
+ MAP_NR(end_mem
);
344 start_mem
= LONG_ALIGN((unsigned long) p
);
345 memset(mem_map
, 0, start_mem
- (unsigned long) mem_map
);
348 atomic_set(&p
->count
, 0);
349 p
->flags
= (1 << PG_DMA
) | (1 << PG_reserved
);
350 p
->map_nr
= p
- mem_map
;
351 } while (p
> mem_map
);
353 for (i
= 0 ; i
< NR_MEM_LISTS
; i
++) {
354 unsigned long bitmap_size
;
355 init_mem_queue(free_area
+i
);
357 end_mem
= (end_mem
+ ~mask
) & mask
;
358 bitmap_size
= (end_mem
- PAGE_OFFSET
) >> (PAGE_SHIFT
+ i
);
359 bitmap_size
= (bitmap_size
+ 7) >> 3;
360 bitmap_size
= LONG_ALIGN(bitmap_size
);
361 free_area
[i
].map
= (unsigned int *) start_mem
;
362 memset((void *) start_mem
, 0, bitmap_size
);
363 start_mem
+= bitmap_size
;
369 * The tests may look silly, but it essentially makes sure that
370 * no other process did a swap-in on us just as we were waiting.
372 * Also, don't bother to add to the swap cache if this page-in
373 * was due to a write access.
375 void swap_in(struct task_struct
* tsk
, struct vm_area_struct
* vma
,
376 pte_t
* page_table
, unsigned long entry
, int write_access
)
379 struct page
*page_map
;
381 page_map
= read_swap_cache(entry
);
383 if (pte_val(*page_table
) != entry
) {
385 free_page_and_swap_cache(page_address(page_map
));
389 set_pte(page_table
, BAD_PAGE
);
395 page
= page_address(page_map
);
400 if (!write_access
|| is_page_shared(page_map
)) {
401 set_pte(page_table
, mk_pte(page
, vma
->vm_page_prot
));
405 /* The page is unshared, and we want write access. In this
406 case, it is safe to tear down the swap cache and give the
407 page over entirely to this process. */
409 delete_from_swap_cache(page_map
);
410 set_pte(page_table
, pte_mkwrite(pte_mkdirty(mk_pte(page
, vma
->vm_page_prot
))));