Ok. I didn't make 2.4.0 in 2000. Tough. I tried, but we had some
[davej-history.git] / mm / page_alloc.c
blobdca35de59bda441882bdc9658fc44fe463a3ef86
1 /*
2 * linux/mm/page_alloc.c
4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
5 * Swap reorganised 29.12.95, Stephen Tweedie
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7 * Reshaped it to be a zoned allocator, Ingo Molnar, Red Hat, 1999
8 * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999
9 * Zone balancing, Kanoj Sarcar, SGI, Jan 2000
12 #include <linux/config.h>
13 #include <linux/mm.h>
14 #include <linux/swap.h>
15 #include <linux/swapctl.h>
16 #include <linux/interrupt.h>
17 #include <linux/pagemap.h>
18 #include <linux/bootmem.h>
20 int nr_swap_pages;
21 int nr_active_pages;
22 int nr_inactive_dirty_pages;
23 pg_data_t *pgdat_list;
25 static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
26 static int zone_balance_ratio[MAX_NR_ZONES] = { 32, 128, 128, };
27 static int zone_balance_min[MAX_NR_ZONES] = { 10 , 10, 10, };
28 static int zone_balance_max[MAX_NR_ZONES] = { 255 , 255, 255, };
30 struct list_head active_list;
31 struct list_head inactive_dirty_list;
33 * Free_page() adds the page to the free lists. This is optimized for
34 * fast normal cases (no error jumps taken normally).
36 * The way to optimize jumps for gcc-2.2.2 is to:
37 * - select the "normal" case and put it inside the if () { XXX }
38 * - no else-statements if you can avoid them
40 * With the above two rules, you get a straight-line execution path
41 * for the normal case, giving better asm-code.
44 #define memlist_init(x) INIT_LIST_HEAD(x)
45 #define memlist_add_head list_add
46 #define memlist_add_tail list_add_tail
47 #define memlist_del list_del
48 #define memlist_entry list_entry
49 #define memlist_next(x) ((x)->next)
50 #define memlist_prev(x) ((x)->prev)
53 * Temporary debugging check.
55 #define BAD_RANGE(zone,x) (((zone) != (x)->zone) || (((x)-mem_map) < (zone)->offset) || (((x)-mem_map) >= (zone)->offset+(zone)->size))
58 * Buddy system. Hairy. You really aren't expected to understand this
60 * Hint: -mask = 1+~mask
63 static void FASTCALL(__free_pages_ok (struct page *page, unsigned long order));
64 static void __free_pages_ok (struct page *page, unsigned long order)
66 unsigned long index, page_idx, mask, flags;
67 free_area_t *area;
68 struct page *base;
69 zone_t *zone;
72 * Subtle. We do not want to test this in the inlined part of
73 * __free_page() - it's a rare condition and just increases
74 * cache footprint unnecesserily. So we do an 'incorrect'
75 * decrement on page->count for reserved pages, but this part
76 * makes it safe.
78 if (PageReserved(page))
79 return;
81 if (page->buffers)
82 BUG();
83 if (page->mapping)
84 BUG();
85 if (!VALID_PAGE(page))
86 BUG();
87 if (PageSwapCache(page))
88 BUG();
89 if (PageLocked(page))
90 BUG();
91 if (PageDecrAfter(page))
92 BUG();
93 if (PageActive(page))
94 BUG();
95 if (PageInactiveDirty(page))
96 BUG();
97 if (PageInactiveClean(page))
98 BUG();
100 page->flags &= ~((1<<PG_referenced) | (1<<PG_dirty));
101 page->age = PAGE_AGE_START;
103 zone = page->zone;
105 mask = (~0UL) << order;
106 base = mem_map + zone->offset;
107 page_idx = page - base;
108 if (page_idx & ~mask)
109 BUG();
110 index = page_idx >> (1 + order);
112 area = zone->free_area + order;
114 spin_lock_irqsave(&zone->lock, flags);
116 zone->free_pages -= mask;
118 while (mask + (1 << (MAX_ORDER-1))) {
119 struct page *buddy1, *buddy2;
121 if (area >= zone->free_area + MAX_ORDER)
122 BUG();
123 if (!test_and_change_bit(index, area->map))
125 * the buddy page is still allocated.
127 break;
129 * Move the buddy up one level.
131 buddy1 = base + (page_idx ^ -mask);
132 buddy2 = base + page_idx;
133 if (BAD_RANGE(zone,buddy1))
134 BUG();
135 if (BAD_RANGE(zone,buddy2))
136 BUG();
138 memlist_del(&buddy1->list);
139 mask <<= 1;
140 area++;
141 index >>= 1;
142 page_idx &= mask;
144 memlist_add_head(&(base + page_idx)->list, &area->free_list);
146 spin_unlock_irqrestore(&zone->lock, flags);
149 * We don't want to protect this variable from race conditions
150 * since it's nothing important, but we do want to make sure
151 * it never gets negative.
153 if (memory_pressure > NR_CPUS)
154 memory_pressure--;
157 #define MARK_USED(index, order, area) \
158 change_bit((index) >> (1+(order)), (area)->map)
160 static inline struct page * expand (zone_t *zone, struct page *page,
161 unsigned long index, int low, int high, free_area_t * area)
163 unsigned long size = 1 << high;
165 while (high > low) {
166 if (BAD_RANGE(zone,page))
167 BUG();
168 area--;
169 high--;
170 size >>= 1;
171 memlist_add_head(&(page)->list, &(area)->free_list);
172 MARK_USED(index, high, area);
173 index += size;
174 page += size;
176 if (BAD_RANGE(zone,page))
177 BUG();
178 return page;
181 static FASTCALL(struct page * rmqueue(zone_t *zone, unsigned long order));
182 static struct page * rmqueue(zone_t *zone, unsigned long order)
184 free_area_t * area = zone->free_area + order;
185 unsigned long curr_order = order;
186 struct list_head *head, *curr;
187 unsigned long flags;
188 struct page *page;
190 spin_lock_irqsave(&zone->lock, flags);
191 do {
192 head = &area->free_list;
193 curr = memlist_next(head);
195 if (curr != head) {
196 unsigned int index;
198 page = memlist_entry(curr, struct page, list);
199 if (BAD_RANGE(zone,page))
200 BUG();
201 memlist_del(curr);
202 index = (page - mem_map) - zone->offset;
203 MARK_USED(index, curr_order, area);
204 zone->free_pages -= 1 << order;
206 page = expand(zone, page, index, order, curr_order, area);
207 spin_unlock_irqrestore(&zone->lock, flags);
209 set_page_count(page, 1);
210 if (BAD_RANGE(zone,page))
211 BUG();
212 DEBUG_ADD_PAGE
213 return page;
215 curr_order++;
216 area++;
217 } while (curr_order < MAX_ORDER);
218 spin_unlock_irqrestore(&zone->lock, flags);
220 return NULL;
223 #define PAGES_MIN 0
224 #define PAGES_LOW 1
225 #define PAGES_HIGH 2
228 * This function does the dirty work for __alloc_pages
229 * and is separated out to keep the code size smaller.
230 * (suggested by Davem at 1:30 AM, typed by Rik at 6 AM)
232 static struct page * __alloc_pages_limit(zonelist_t *zonelist,
233 unsigned long order, int limit, int direct_reclaim)
235 zone_t **zone = zonelist->zones;
237 for (;;) {
238 zone_t *z = *(zone++);
239 unsigned long water_mark;
241 if (!z)
242 break;
243 if (!z->size)
244 BUG();
247 * We allocate if the number of free + inactive_clean
248 * pages is above the watermark.
250 switch (limit) {
251 default:
252 case PAGES_MIN:
253 water_mark = z->pages_min;
254 break;
255 case PAGES_LOW:
256 water_mark = z->pages_low;
257 break;
258 case PAGES_HIGH:
259 water_mark = z->pages_high;
262 if (z->free_pages + z->inactive_clean_pages > water_mark) {
263 struct page *page = NULL;
264 /* If possible, reclaim a page directly. */
265 if (direct_reclaim && z->free_pages < z->pages_min + 8)
266 page = reclaim_page(z);
267 /* If that fails, fall back to rmqueue. */
268 if (!page)
269 page = rmqueue(z, order);
270 if (page)
271 return page;
275 /* Found nothing. */
276 return NULL;
281 * This is the 'heart' of the zoned buddy allocator:
283 struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order)
285 zone_t **zone;
286 int direct_reclaim = 0;
287 unsigned int gfp_mask = zonelist->gfp_mask;
288 struct page * page;
291 * Allocations put pressure on the VM subsystem.
293 memory_pressure++;
296 * (If anyone calls gfp from interrupts nonatomically then it
297 * will sooner or later tripped up by a schedule().)
299 * We are falling back to lower-level zones if allocation
300 * in a higher zone fails.
304 * Can we take pages directly from the inactive_clean
305 * list?
307 if (order == 0 && (gfp_mask & __GFP_WAIT) &&
308 !(current->flags & PF_MEMALLOC))
309 direct_reclaim = 1;
312 * If we are about to get low on free pages and we also have
313 * an inactive page shortage, wake up kswapd.
315 if (inactive_shortage() > inactive_target / 2 && free_shortage())
316 wakeup_kswapd(0);
318 * If we are about to get low on free pages and cleaning
319 * the inactive_dirty pages would fix the situation,
320 * wake up bdflush.
322 else if (free_shortage() && nr_inactive_dirty_pages > free_shortage()
323 && nr_inactive_dirty_pages >= freepages.high)
324 wakeup_bdflush(0);
326 try_again:
328 * First, see if we have any zones with lots of free memory.
330 * We allocate free memory first because it doesn't contain
331 * any data ... DUH!
333 zone = zonelist->zones;
334 for (;;) {
335 zone_t *z = *(zone++);
336 if (!z)
337 break;
338 if (!z->size)
339 BUG();
341 if (z->free_pages >= z->pages_low) {
342 page = rmqueue(z, order);
343 if (page)
344 return page;
345 } else if (z->free_pages < z->pages_min &&
346 waitqueue_active(&kreclaimd_wait)) {
347 wake_up_interruptible(&kreclaimd_wait);
352 * Try to allocate a page from a zone with a HIGH
353 * amount of free + inactive_clean pages.
355 * If there is a lot of activity, inactive_target
356 * will be high and we'll have a good chance of
357 * finding a page using the HIGH limit.
359 page = __alloc_pages_limit(zonelist, order, PAGES_HIGH, direct_reclaim);
360 if (page)
361 return page;
364 * Then try to allocate a page from a zone with more
365 * than zone->pages_low free + inactive_clean pages.
367 * When the working set is very large and VM activity
368 * is low, we're most likely to have our allocation
369 * succeed here.
371 page = __alloc_pages_limit(zonelist, order, PAGES_LOW, direct_reclaim);
372 if (page)
373 return page;
376 * OK, none of the zones on our zonelist has lots
377 * of pages free.
379 * We wake up kswapd, in the hope that kswapd will
380 * resolve this situation before memory gets tight.
382 * We also yield the CPU, because that:
383 * - gives kswapd a chance to do something
384 * - slows down allocations, in particular the
385 * allocations from the fast allocator that's
386 * causing the problems ...
387 * - ... which minimises the impact the "bad guys"
388 * have on the rest of the system
389 * - if we don't have __GFP_IO set, kswapd may be
390 * able to free some memory we can't free ourselves
392 wakeup_kswapd(0);
393 if (gfp_mask & __GFP_WAIT) {
394 __set_current_state(TASK_RUNNING);
395 current->policy |= SCHED_YIELD;
396 schedule();
400 * After waking up kswapd, we try to allocate a page
401 * from any zone which isn't critical yet.
403 * Kswapd should, in most situations, bring the situation
404 * back to normal in no time.
406 page = __alloc_pages_limit(zonelist, order, PAGES_MIN, direct_reclaim);
407 if (page)
408 return page;
411 * Damn, we didn't succeed.
413 * This can be due to 2 reasons:
414 * - we're doing a higher-order allocation
415 * --> move pages to the free list until we succeed
416 * - we're /really/ tight on memory
417 * --> wait on the kswapd waitqueue until memory is freed
419 if (!(current->flags & PF_MEMALLOC)) {
421 * Are we dealing with a higher order allocation?
423 * Move pages from the inactive_clean to the free list
424 * in the hope of creating a large, physically contiguous
425 * piece of free memory.
427 if (order > 0 && (gfp_mask & __GFP_WAIT)) {
428 zone = zonelist->zones;
429 /* First, clean some dirty pages. */
430 page_launder(gfp_mask, 1);
431 for (;;) {
432 zone_t *z = *(zone++);
433 if (!z)
434 break;
435 if (!z->size)
436 continue;
437 while (z->inactive_clean_pages) {
438 struct page * page;
439 /* Move one page to the free list. */
440 page = reclaim_page(z);
441 if (!page)
442 break;
443 __free_page(page);
444 /* Try if the allocation succeeds. */
445 page = rmqueue(z, order);
446 if (page)
447 return page;
452 * When we arrive here, we are really tight on memory.
454 * We wake up kswapd and sleep until kswapd wakes us
455 * up again. After that we loop back to the start.
457 * We have to do this because something else might eat
458 * the memory kswapd frees for us and we need to be
459 * reliable. Note that we don't loop back for higher
460 * order allocations since it is possible that kswapd
461 * simply cannot free a large enough contiguous area
462 * of memory *ever*.
464 if ((gfp_mask & (__GFP_WAIT|__GFP_IO)) == (__GFP_WAIT|__GFP_IO)) {
465 wakeup_kswapd(1);
466 memory_pressure++;
467 if (!order)
468 goto try_again;
470 * If __GFP_IO isn't set, we can't wait on kswapd because
471 * kswapd just might need some IO locks /we/ are holding ...
473 * SUBTLE: The scheduling point above makes sure that
474 * kswapd does get the chance to free memory we can't
475 * free ourselves...
477 } else if (gfp_mask & __GFP_WAIT) {
478 try_to_free_pages(gfp_mask);
479 memory_pressure++;
480 if (!order)
481 goto try_again;
487 * Final phase: allocate anything we can!
489 * Higher order allocations, GFP_ATOMIC allocations and
490 * recursive allocations (PF_MEMALLOC) end up here.
492 * Only recursive allocations can use the very last pages
493 * in the system, otherwise it would be just too easy to
494 * deadlock the system...
496 zone = zonelist->zones;
497 for (;;) {
498 zone_t *z = *(zone++);
499 struct page * page = NULL;
500 if (!z)
501 break;
502 if (!z->size)
503 BUG();
506 * SUBTLE: direct_reclaim is only possible if the task
507 * becomes PF_MEMALLOC while looping above. This will
508 * happen when the OOM killer selects this task for
509 * instant execution...
511 if (direct_reclaim) {
512 page = reclaim_page(z);
513 if (page)
514 return page;
517 /* XXX: is pages_min/4 a good amount to reserve for this? */
518 if (z->free_pages < z->pages_min / 4 &&
519 !(current->flags & PF_MEMALLOC))
520 continue;
521 page = rmqueue(z, order);
522 if (page)
523 return page;
526 /* No luck.. */
527 printk(KERN_ERR "__alloc_pages: %lu-order allocation failed.\n", order);
528 return NULL;
532 * Common helper functions.
534 unsigned long __get_free_pages(int gfp_mask, unsigned long order)
536 struct page * page;
538 page = alloc_pages(gfp_mask, order);
539 if (!page)
540 return 0;
541 return (unsigned long) page_address(page);
544 unsigned long get_zeroed_page(int gfp_mask)
546 struct page * page;
548 page = alloc_pages(gfp_mask, 0);
549 if (page) {
550 void *address = page_address(page);
551 clear_page(address);
552 return (unsigned long) address;
554 return 0;
557 void __free_pages(struct page *page, unsigned long order)
559 if (put_page_testzero(page))
560 __free_pages_ok(page, order);
563 void free_pages(unsigned long addr, unsigned long order)
565 struct page *fpage;
567 #ifdef CONFIG_DISCONTIGMEM
568 if (addr == 0) return;
569 #endif
570 fpage = virt_to_page(addr);
571 if (VALID_PAGE(fpage))
572 __free_pages(fpage, order);
576 * Total amount of free (allocatable) RAM:
578 unsigned int nr_free_pages (void)
580 unsigned int sum;
581 zone_t *zone;
582 pg_data_t *pgdat = pgdat_list;
584 sum = 0;
585 while (pgdat) {
586 for (zone = pgdat->node_zones; zone < pgdat->node_zones + MAX_NR_ZONES; zone++)
587 sum += zone->free_pages;
588 pgdat = pgdat->node_next;
590 return sum;
594 * Total amount of inactive_clean (allocatable) RAM:
596 unsigned int nr_inactive_clean_pages (void)
598 unsigned int sum;
599 zone_t *zone;
600 pg_data_t *pgdat = pgdat_list;
602 sum = 0;
603 while (pgdat) {
604 for (zone = pgdat->node_zones; zone < pgdat->node_zones + MAX_NR_ZONES; zone++)
605 sum += zone->inactive_clean_pages;
606 pgdat = pgdat->node_next;
608 return sum;
612 * Amount of free RAM allocatable as buffer memory:
614 unsigned int nr_free_buffer_pages (void)
616 unsigned int sum;
618 sum = nr_free_pages();
619 sum += nr_inactive_clean_pages();
620 sum += nr_inactive_dirty_pages;
623 * Keep our write behind queue filled, even if
624 * kswapd lags a bit right now.
626 if (sum < freepages.high + inactive_target)
627 sum = freepages.high + inactive_target;
629 * We don't want dirty page writebehind to put too
630 * much pressure on the working set, but we want it
631 * to be possible to have some dirty pages in the
632 * working set without upsetting the writebehind logic.
634 sum += nr_active_pages >> 4;
636 return sum;
639 #if CONFIG_HIGHMEM
640 unsigned int nr_free_highpages (void)
642 pg_data_t *pgdat = pgdat_list;
643 unsigned int pages = 0;
645 while (pgdat) {
646 pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
647 pgdat = pgdat->node_next;
649 return pages;
651 #endif
654 * Show free area list (used inside shift_scroll-lock stuff)
655 * We also calculate the percentage fragmentation. We do this by counting the
656 * memory on each free list with the exception of the first item on the list.
658 void show_free_areas_core(pg_data_t *pgdat)
660 unsigned long order;
661 unsigned type;
663 printk("Free pages: %6dkB (%6dkB HighMem)\n",
664 nr_free_pages() << (PAGE_SHIFT-10),
665 nr_free_highpages() << (PAGE_SHIFT-10));
667 printk("( Active: %d, inactive_dirty: %d, inactive_clean: %d, free: %d (%d %d %d) )\n",
668 nr_active_pages,
669 nr_inactive_dirty_pages,
670 nr_inactive_clean_pages(),
671 nr_free_pages(),
672 freepages.min,
673 freepages.low,
674 freepages.high);
676 for (type = 0; type < MAX_NR_ZONES; type++) {
677 struct list_head *head, *curr;
678 zone_t *zone = pgdat->node_zones + type;
679 unsigned long nr, total, flags;
681 total = 0;
682 if (zone->size) {
683 spin_lock_irqsave(&zone->lock, flags);
684 for (order = 0; order < MAX_ORDER; order++) {
685 head = &(zone->free_area + order)->free_list;
686 curr = head;
687 nr = 0;
688 for (;;) {
689 curr = memlist_next(curr);
690 if (curr == head)
691 break;
692 nr++;
694 total += nr * (1 << order);
695 printk("%lu*%lukB ", nr,
696 (PAGE_SIZE>>10) << order);
698 spin_unlock_irqrestore(&zone->lock, flags);
700 printk("= %lukB)\n", total * (PAGE_SIZE>>10));
703 #ifdef SWAP_CACHE_INFO
704 show_swap_cache_info();
705 #endif
708 void show_free_areas(void)
710 show_free_areas_core(pgdat_list);
714 * Builds allocation fallback zone lists.
716 static inline void build_zonelists(pg_data_t *pgdat)
718 int i, j, k;
720 for (i = 0; i < NR_GFPINDEX; i++) {
721 zonelist_t *zonelist;
722 zone_t *zone;
724 zonelist = pgdat->node_zonelists + i;
725 memset(zonelist, 0, sizeof(*zonelist));
727 zonelist->gfp_mask = i;
728 j = 0;
729 k = ZONE_NORMAL;
730 if (i & __GFP_HIGHMEM)
731 k = ZONE_HIGHMEM;
732 if (i & __GFP_DMA)
733 k = ZONE_DMA;
735 switch (k) {
736 default:
737 BUG();
739 * fallthrough:
741 case ZONE_HIGHMEM:
742 zone = pgdat->node_zones + ZONE_HIGHMEM;
743 if (zone->size) {
744 #ifndef CONFIG_HIGHMEM
745 BUG();
746 #endif
747 zonelist->zones[j++] = zone;
749 case ZONE_NORMAL:
750 zone = pgdat->node_zones + ZONE_NORMAL;
751 if (zone->size)
752 zonelist->zones[j++] = zone;
753 case ZONE_DMA:
754 zone = pgdat->node_zones + ZONE_DMA;
755 if (zone->size)
756 zonelist->zones[j++] = zone;
758 zonelist->zones[j++] = NULL;
762 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
765 * Set up the zone data structures:
766 * - mark all pages reserved
767 * - mark all memory queues empty
768 * - clear the memory bitmaps
770 void __init free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap,
771 unsigned long *zones_size, unsigned long zone_start_paddr,
772 unsigned long *zholes_size, struct page *lmem_map)
774 struct page *p;
775 unsigned long i, j;
776 unsigned long map_size;
777 unsigned long totalpages, offset, realtotalpages;
778 unsigned int cumulative = 0;
780 totalpages = 0;
781 for (i = 0; i < MAX_NR_ZONES; i++) {
782 unsigned long size = zones_size[i];
783 totalpages += size;
785 realtotalpages = totalpages;
786 if (zholes_size)
787 for (i = 0; i < MAX_NR_ZONES; i++)
788 realtotalpages -= zholes_size[i];
790 printk("On node %d totalpages: %lu\n", nid, realtotalpages);
792 memlist_init(&active_list);
793 memlist_init(&inactive_dirty_list);
796 * Some architectures (with lots of mem and discontinous memory
797 * maps) have to search for a good mem_map area:
798 * For discontigmem, the conceptual mem map array starts from
799 * PAGE_OFFSET, we need to align the actual array onto a mem map
800 * boundary, so that MAP_NR works.
802 map_size = (totalpages + 1)*sizeof(struct page);
803 if (lmem_map == (struct page *)0) {
804 lmem_map = (struct page *) alloc_bootmem_node(pgdat, map_size);
805 lmem_map = (struct page *)(PAGE_OFFSET +
806 MAP_ALIGN((unsigned long)lmem_map - PAGE_OFFSET));
808 *gmap = pgdat->node_mem_map = lmem_map;
809 pgdat->node_size = totalpages;
810 pgdat->node_start_paddr = zone_start_paddr;
811 pgdat->node_start_mapnr = (lmem_map - mem_map);
814 * Initially all pages are reserved - free ones are freed
815 * up by free_all_bootmem() once the early boot process is
816 * done.
818 for (p = lmem_map; p < lmem_map + totalpages; p++) {
819 set_page_count(p, 0);
820 SetPageReserved(p);
821 init_waitqueue_head(&p->wait);
822 memlist_init(&p->list);
825 offset = lmem_map - mem_map;
826 for (j = 0; j < MAX_NR_ZONES; j++) {
827 zone_t *zone = pgdat->node_zones + j;
828 unsigned long mask;
829 unsigned long size, realsize;
831 realsize = size = zones_size[j];
832 if (zholes_size)
833 realsize -= zholes_size[j];
835 printk("zone(%lu): %lu pages.\n", j, size);
836 zone->size = size;
837 zone->name = zone_names[j];
838 zone->lock = SPIN_LOCK_UNLOCKED;
839 zone->zone_pgdat = pgdat;
840 zone->free_pages = 0;
841 zone->inactive_clean_pages = 0;
842 zone->inactive_dirty_pages = 0;
843 memlist_init(&zone->inactive_clean_list);
844 if (!size)
845 continue;
847 zone->offset = offset;
848 cumulative += size;
849 mask = (realsize / zone_balance_ratio[j]);
850 if (mask < zone_balance_min[j])
851 mask = zone_balance_min[j];
852 else if (mask > zone_balance_max[j])
853 mask = zone_balance_max[j];
854 zone->pages_min = mask;
855 zone->pages_low = mask*2;
856 zone->pages_high = mask*3;
858 * Add these free targets to the global free target;
859 * we have to be SURE that freepages.high is higher
860 * than SUM [zone->pages_min] for all zones, otherwise
861 * we may have bad bad problems.
863 * This means we cannot make the freepages array writable
864 * in /proc, but have to add a separate extra_free_target
865 * for people who require it to catch load spikes in eg.
866 * gigabit ethernet routing...
868 freepages.min += mask;
869 freepages.low += mask*2;
870 freepages.high += mask*3;
871 zone->zone_mem_map = mem_map + offset;
872 zone->zone_start_mapnr = offset;
873 zone->zone_start_paddr = zone_start_paddr;
875 for (i = 0; i < size; i++) {
876 struct page *page = mem_map + offset + i;
877 page->zone = zone;
878 if (j != ZONE_HIGHMEM) {
879 page->virtual = __va(zone_start_paddr);
880 zone_start_paddr += PAGE_SIZE;
884 offset += size;
885 mask = -1;
886 for (i = 0; i < MAX_ORDER; i++) {
887 unsigned long bitmap_size;
889 memlist_init(&zone->free_area[i].free_list);
890 mask += mask;
891 size = (size + ~mask) & mask;
892 bitmap_size = size >> i;
893 bitmap_size = (bitmap_size + 7) >> 3;
894 bitmap_size = LONG_ALIGN(bitmap_size);
895 zone->free_area[i].map =
896 (unsigned int *) alloc_bootmem_node(pgdat, bitmap_size);
899 build_zonelists(pgdat);
902 void __init free_area_init(unsigned long *zones_size)
904 free_area_init_core(0, &contig_page_data, &mem_map, zones_size, 0, 0, 0);
907 static int __init setup_mem_frac(char *str)
909 int j = 0;
911 while (get_option(&str, &zone_balance_ratio[j++]) == 2);
912 printk("setup_mem_frac: ");
913 for (j = 0; j < MAX_NR_ZONES; j++) printk("%d ", zone_balance_ratio[j]);
914 printk("\n");
915 return 1;
918 __setup("memfrac=", setup_mem_frac);