2 * linux/mm/page_alloc.c
4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
5 * Swap reorganised 29.12.95, Stephen Tweedie
8 #include <linux/config.h>
10 #include <linux/sched.h>
11 #include <linux/head.h>
12 #include <linux/kernel.h>
13 #include <linux/kernel_stat.h>
14 #include <linux/errno.h>
15 #include <linux/string.h>
16 #include <linux/stat.h>
17 #include <linux/swap.h>
19 #include <linux/swapctl.h>
20 #include <linux/interrupt.h>
21 #include <linux/init.h>
22 #include <linux/pagemap.h>
25 #include <asm/system.h> /* for cli()/sti() */
26 #include <asm/uaccess.h> /* for copy_to/from_user */
27 #include <asm/bitops.h>
28 #include <asm/pgtable.h>
29 #include <asm/spinlock.h>
31 int nr_swap_pages
= 0;
32 int nr_free_pages
= 0;
35 * Free area management
37 * The free_area_list arrays point to the queue heads of the free areas
42 /* the AP+ needs to allocate 8MB contiguous, aligned chunks of ram
43 for the ring buffers */
44 #define NR_MEM_LISTS 12
46 #define NR_MEM_LISTS 6
49 /* The start of this MUST match the start of "struct page" */
50 struct free_area_struct
{
56 #define memory_head(x) ((struct page *)(x))
58 static struct free_area_struct free_area
[NR_MEM_LISTS
];
60 static inline void init_mem_queue(struct free_area_struct
* head
)
62 head
->next
= memory_head(head
);
63 head
->prev
= memory_head(head
);
66 static inline void add_mem_queue(struct free_area_struct
* head
, struct page
* entry
)
68 struct page
* next
= head
->next
;
70 entry
->prev
= memory_head(head
);
76 static inline void remove_mem_queue(struct page
* entry
)
78 struct page
* next
= entry
->next
;
79 struct page
* prev
= entry
->prev
;
85 * Free_page() adds the page to the free lists. This is optimized for
86 * fast normal cases (no error jumps taken normally).
88 * The way to optimize jumps for gcc-2.2.2 is to:
89 * - select the "normal" case and put it inside the if () { XXX }
90 * - no else-statements if you can avoid them
92 * With the above two rules, you get a straight-line execution path
93 * for the normal case, giving better asm-code.
97 * Buddy system. Hairy. You really aren't expected to understand this
99 * Hint: -mask = 1+~mask
101 spinlock_t page_alloc_lock
= SPIN_LOCK_UNLOCKED
;
104 * This routine is used by the kernel swap daemon to determine
105 * whether we have "enough" free pages. It is fairly arbitrary,
106 * having a low-water and high-water mark.
109 * 0 - urgent need for memory
110 * 1 - need some memory, but do it slowly in the background
111 * 2 - no need to even think about it.
113 int free_memory_available(void)
115 static int available
= 1;
117 if (nr_free_pages
< freepages
.low
) {
122 if (nr_free_pages
> freepages
.high
) {
130 static inline void free_pages_ok(unsigned long map_nr
, unsigned long order
)
132 struct free_area_struct
*area
= free_area
+ order
;
133 unsigned long index
= map_nr
>> (1 + order
);
134 unsigned long mask
= (~0UL) << order
;
137 spin_lock_irqsave(&page_alloc_lock
, flags
);
139 #define list(x) (mem_map+(x))
142 nr_free_pages
-= mask
;
143 while (mask
+ (1 << (NR_MEM_LISTS
-1))) {
144 if (!test_and_change_bit(index
, area
->map
))
146 remove_mem_queue(list(map_nr
^ -mask
));
152 add_mem_queue(area
, list(map_nr
));
156 spin_unlock_irqrestore(&page_alloc_lock
, flags
);
159 void __free_page(struct page
*page
)
161 if (!PageReserved(page
) && atomic_dec_and_test(&page
->count
)) {
162 if (PageSwapCache(page
))
163 panic ("Freeing swap cache page");
164 free_pages_ok(page
->map_nr
, 0);
167 if (PageSwapCache(page
) && atomic_read(&page
->count
) == 1)
168 printk(KERN_WARNING
"VM: Releasing swap cache page at %p",
169 __builtin_return_address(0));
172 void free_pages(unsigned long addr
, unsigned long order
)
174 unsigned long map_nr
= MAP_NR(addr
);
176 if (map_nr
< max_mapnr
) {
177 mem_map_t
* map
= mem_map
+ map_nr
;
178 if (PageReserved(map
))
180 if (atomic_dec_and_test(&map
->count
)) {
181 if (PageSwapCache(map
))
182 panic ("Freeing swap cache pages");
183 free_pages_ok(map_nr
, order
);
186 if (PageSwapCache(map
) && atomic_read(&map
->count
) == 1)
188 "VM: Releasing swap cache pages at %p",
189 __builtin_return_address(0));
194 * Some ugly macros to speed up __get_free_pages()..
196 #define MARK_USED(index, order, area) \
197 change_bit((index) >> (1+(order)), (area)->map)
198 #define CAN_DMA(x) (PageDMA(x))
199 #define ADDRESS(x) (PAGE_OFFSET + ((x) << PAGE_SHIFT))
200 #define RMQUEUE(order, dma) \
201 do { struct free_area_struct * area = free_area+order; \
202 unsigned long new_order = order; \
203 do { struct page *prev = memory_head(area), *ret = prev->next; \
204 while (memory_head(area) != ret) { \
205 if (!dma || CAN_DMA(ret)) { \
206 unsigned long map_nr = ret->map_nr; \
207 (prev->next = ret->next)->prev = prev; \
208 MARK_USED(map_nr, new_order, area); \
209 nr_free_pages -= 1 << order; \
210 EXPAND(ret, map_nr, order, new_order, area); \
211 spin_unlock_irqrestore(&page_alloc_lock, flags); \
212 return ADDRESS(map_nr); \
217 new_order++; area++; \
218 } while (new_order < NR_MEM_LISTS); \
221 #define EXPAND(map,index,low,high,area) \
222 do { unsigned long size = 1 << high; \
223 while (high > low) { \
224 area--; high--; size >>= 1; \
225 add_mem_queue(area, map); \
226 MARK_USED(index, high, area); \
230 atomic_set(&map->count, 1); \
231 map->age = PAGE_INITIAL_AGE; \
234 unsigned long __get_free_pages(int gfp_mask
, unsigned long order
)
238 if (order
>= NR_MEM_LISTS
)
241 if (gfp_mask
& __GFP_WAIT
) {
243 if (in_interrupt()) {
244 static int count
= 0;
246 printk("gfp called nonatomically from interrupt %p\n",
247 __builtin_return_address(0));
252 if (freepages
.min
> nr_free_pages
) {
254 freed
= try_to_free_pages(gfp_mask
, SWAP_CLUSTER_MAX
);
256 * Low priority (user) allocations must not
257 * succeed if we didn't have enough memory
258 * and we couldn't get more..
260 if (!freed
&& !(gfp_mask
& (__GFP_MED
| __GFP_HIGH
)))
264 spin_lock_irqsave(&page_alloc_lock
, flags
);
265 RMQUEUE(order
, (gfp_mask
& GFP_DMA
));
266 spin_unlock_irqrestore(&page_alloc_lock
, flags
);
272 * Show free area list (used inside shift_scroll-lock stuff)
273 * We also calculate the percentage fragmentation. We do this by counting the
274 * memory on each free list with the exception of the first item on the list.
276 void show_free_areas(void)
278 unsigned long order
, flags
;
279 unsigned long total
= 0;
281 printk("Free pages: %6dkB\n ( ",nr_free_pages
<<(PAGE_SHIFT
-10));
282 printk("Free: %d (%d %d %d)\n",
287 spin_lock_irqsave(&page_alloc_lock
, flags
);
288 for (order
=0 ; order
< NR_MEM_LISTS
; order
++) {
290 unsigned long nr
= 0;
291 for (tmp
= free_area
[order
].next
; tmp
!= memory_head(free_area
+order
) ; tmp
= tmp
->next
) {
294 total
+= nr
* ((PAGE_SIZE
>>10) << order
);
295 printk("%lu*%lukB ", nr
, (unsigned long)((PAGE_SIZE
>>10) << order
));
297 spin_unlock_irqrestore(&page_alloc_lock
, flags
);
298 printk("= %lukB)\n", total
);
299 #ifdef SWAP_CACHE_INFO
300 show_swap_cache_info();
304 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
307 * set up the free-area data structures:
308 * - mark all pages reserved
309 * - mark all memory queues empty
310 * - clear the memory bitmaps
312 __initfunc(unsigned long free_area_init(unsigned long start_mem
, unsigned long end_mem
))
315 unsigned long mask
= PAGE_MASK
;
319 * Select nr of pages we try to keep free for important stuff
320 * with a minimum of 10 pages and a maximum of 256 pages, so
321 * that we don't waste too much memory on large systems.
322 * This is fairly arbitrary, but based on some behaviour
325 i
= (end_mem
- PAGE_OFFSET
) >> (PAGE_SHIFT
+7);
331 freepages
.low
= i
* 2;
332 freepages
.high
= i
* 3;
333 mem_map
= (mem_map_t
*) LONG_ALIGN(start_mem
);
334 p
= mem_map
+ MAP_NR(end_mem
);
335 start_mem
= LONG_ALIGN((unsigned long) p
);
336 memset(mem_map
, 0, start_mem
- (unsigned long) mem_map
);
339 atomic_set(&p
->count
, 0);
340 p
->flags
= (1 << PG_DMA
) | (1 << PG_reserved
);
341 p
->map_nr
= p
- mem_map
;
342 } while (p
> mem_map
);
344 for (i
= 0 ; i
< NR_MEM_LISTS
; i
++) {
345 unsigned long bitmap_size
;
346 init_mem_queue(free_area
+i
);
348 end_mem
= (end_mem
+ ~mask
) & mask
;
349 bitmap_size
= (end_mem
- PAGE_OFFSET
) >> (PAGE_SHIFT
+ i
);
350 bitmap_size
= (bitmap_size
+ 7) >> 3;
351 bitmap_size
= LONG_ALIGN(bitmap_size
);
352 free_area
[i
].map
= (unsigned int *) start_mem
;
353 memset((void *) start_mem
, 0, bitmap_size
);
354 start_mem
+= bitmap_size
;
360 * The tests may look silly, but it essentially makes sure that
361 * no other process did a swap-in on us just as we were waiting.
363 * Also, don't bother to add to the swap cache if this page-in
364 * was due to a write access.
366 void swap_in(struct task_struct
* tsk
, struct vm_area_struct
* vma
,
367 pte_t
* page_table
, unsigned long entry
, int write_access
)
370 struct page
*page_map
;
372 page_map
= read_swap_cache(entry
);
374 if (pte_val(*page_table
) != entry
) {
376 free_page_and_swap_cache(page_address(page_map
));
380 set_pte(page_table
, BAD_PAGE
);
386 page
= page_address(page_map
);
391 if (!write_access
|| is_page_shared(page_map
)) {
392 set_pte(page_table
, mk_pte(page
, vma
->vm_page_prot
));
396 /* The page is unshared, and we want write access. In this
397 case, it is safe to tear down the swap cache and give the
398 page over entirely to this process. */
400 delete_from_swap_cache(page_map
);
401 set_pte(page_table
, pte_mkwrite(pte_mkdirty(mk_pte(page
, vma
->vm_page_prot
))));