Merge with 2.5.75.
[linux-2.6/linux-mips.git] / mm / page_alloc.c
blob16077203e5a49b848ea35c0a7492d48e101e456b
1 /*
2 * linux/mm/page_alloc.c
4 * Manages the free list, the system allocates free pages here.
5 * Note that kmalloc() lives in slab.c
7 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
8 * Swap reorganised 29.12.95, Stephen Tweedie
9 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
10 * Reshaped it to be a zoned allocator, Ingo Molnar, Red Hat, 1999
11 * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999
12 * Zone balancing, Kanoj Sarcar, SGI, Jan 2000
13 * Per cpu hot/cold page lists, bulk allocation, Martin J. Bligh, Sept 2002
14 * (lots of bits borrowed from Ingo Molnar & Andrew Morton)
17 #include <linux/config.h>
18 #include <linux/stddef.h>
19 #include <linux/mm.h>
20 #include <linux/swap.h>
21 #include <linux/interrupt.h>
22 #include <linux/pagemap.h>
23 #include <linux/bootmem.h>
24 #include <linux/compiler.h>
25 #include <linux/module.h>
26 #include <linux/suspend.h>
27 #include <linux/pagevec.h>
28 #include <linux/blkdev.h>
29 #include <linux/slab.h>
30 #include <linux/notifier.h>
31 #include <linux/topology.h>
32 #include <linux/sysctl.h>
33 #include <linux/cpu.h>
35 #include <asm/tlbflush.h>
37 DECLARE_BITMAP(node_online_map, MAX_NUMNODES);
38 DECLARE_BITMAP(memblk_online_map, MAX_NR_MEMBLKS);
39 struct pglist_data *pgdat_list;
40 unsigned long totalram_pages;
41 unsigned long totalhigh_pages;
42 int nr_swap_pages;
43 int numnodes = 1;
44 int sysctl_lower_zone_protection = 0;
46 EXPORT_SYMBOL(totalram_pages);
47 EXPORT_SYMBOL(nr_swap_pages);
50 * Used by page_zone() to look up the address of the struct zone whose
51 * id is encoded in the upper bits of page->flags
53 struct zone *zone_table[MAX_NR_ZONES*MAX_NR_NODES];
54 EXPORT_SYMBOL(zone_table);
56 static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
57 int min_free_kbytes = 1024;
60 * Temporary debugging check for pages not lying within a given zone.
62 static int bad_range(struct zone *zone, struct page *page)
64 if (page_to_pfn(page) >= zone->zone_start_pfn + zone->spanned_pages)
65 return 1;
66 if (page_to_pfn(page) < zone->zone_start_pfn)
67 return 1;
68 if (zone != page_zone(page))
69 return 1;
70 return 0;
73 static void bad_page(const char *function, struct page *page)
75 printk("Bad page state at %s\n", function);
76 printk("flags:0x%08lx mapping:%p mapped:%d count:%d\n",
77 page->flags, page->mapping,
78 page_mapped(page), page_count(page));
79 printk("Backtrace:\n");
80 dump_stack();
81 printk("Trying to fix it up, but a reboot is needed\n");
82 page->flags &= ~(1 << PG_private |
83 1 << PG_locked |
84 1 << PG_lru |
85 1 << PG_active |
86 1 << PG_dirty |
87 1 << PG_writeback);
88 set_page_count(page, 0);
89 page->mapping = NULL;
92 #ifndef CONFIG_HUGETLB_PAGE
93 #define prep_compound_page(page, order) do { } while (0)
94 #define destroy_compound_page(page, order) do { } while (0)
95 #else
97 * Higher-order pages are called "compound pages". They are structured thusly:
99 * The first PAGE_SIZE page is called the "head page".
101 * The remaining PAGE_SIZE pages are called "tail pages".
103 * All pages have PG_compound set. All pages have their lru.next pointing at
104 * the head page (even the head page has this).
106 * The head page's lru.prev, if non-zero, holds the address of the compound
107 * page's put_page() function.
109 * The order of the allocation is stored in the first tail page's lru.prev.
110 * This is only for debug at present. This usage means that zero-order pages
111 * may not be compound.
113 static void prep_compound_page(struct page *page, unsigned long order)
115 int i;
116 int nr_pages = 1 << order;
118 page->lru.prev = NULL;
119 page[1].lru.prev = (void *)order;
120 for (i = 0; i < nr_pages; i++) {
121 struct page *p = page + i;
123 SetPageCompound(p);
124 p->lru.next = (void *)page;
128 static void destroy_compound_page(struct page *page, unsigned long order)
130 int i;
131 int nr_pages = 1 << order;
133 if (page[1].lru.prev != (void *)order)
134 bad_page(__FUNCTION__, page);
136 for (i = 0; i < nr_pages; i++) {
137 struct page *p = page + i;
139 if (!PageCompound(p))
140 bad_page(__FUNCTION__, page);
141 if (p->lru.next != (void *)page)
142 bad_page(__FUNCTION__, page);
143 ClearPageCompound(p);
146 #endif /* CONFIG_HUGETLB_PAGE */
149 * Freeing function for a buddy system allocator.
151 * The concept of a buddy system is to maintain direct-mapped table
152 * (containing bit values) for memory blocks of various "orders".
153 * The bottom level table contains the map for the smallest allocatable
154 * units of memory (here, pages), and each level above it describes
155 * pairs of units from the levels below, hence, "buddies".
156 * At a high level, all that happens here is marking the table entry
157 * at the bottom level available, and propagating the changes upward
158 * as necessary, plus some accounting needed to play nicely with other
159 * parts of the VM system.
160 * At each level, we keep one bit for each pair of blocks, which
161 * is set to 1 iff only one of the pair is allocated. So when we
162 * are allocating or freeing one, we can derive the state of the
163 * other. That is, if we allocate a small block, and both were
164 * free, the remainder of the region must be split into blocks.
165 * If a block is freed, and its buddy is also free, then this
166 * triggers coalescing into a block of larger size.
168 * -- wli
171 static inline void __free_pages_bulk (struct page *page, struct page *base,
172 struct zone *zone, struct free_area *area, unsigned long mask,
173 unsigned int order)
175 unsigned long page_idx, index;
177 if (order)
178 destroy_compound_page(page, order);
179 page_idx = page - base;
180 if (page_idx & ~mask)
181 BUG();
182 index = page_idx >> (1 + order);
184 zone->free_pages -= mask;
185 while (mask + (1 << (MAX_ORDER-1))) {
186 struct page *buddy1, *buddy2;
188 BUG_ON(area >= zone->free_area + MAX_ORDER);
189 if (!__test_and_change_bit(index, area->map))
191 * the buddy page is still allocated.
193 break;
195 * Move the buddy up one level.
196 * This code is taking advantage of the identity:
197 * -mask = 1+~mask
199 buddy1 = base + (page_idx ^ -mask);
200 buddy2 = base + page_idx;
201 BUG_ON(bad_range(zone, buddy1));
202 BUG_ON(bad_range(zone, buddy2));
203 list_del(&buddy1->list);
204 mask <<= 1;
205 area++;
206 index >>= 1;
207 page_idx &= mask;
209 list_add(&(base + page_idx)->list, &area->free_list);
212 static inline void free_pages_check(const char *function, struct page *page)
214 if ( page_mapped(page) ||
215 page->mapping != NULL ||
216 page_count(page) != 0 ||
217 (page->flags & (
218 1 << PG_lru |
219 1 << PG_private |
220 1 << PG_locked |
221 1 << PG_active |
222 1 << PG_reclaim |
223 1 << PG_writeback )))
224 bad_page(function, page);
225 if (PageDirty(page))
226 ClearPageDirty(page);
230 * Frees a list of pages.
231 * Assumes all pages on list are in same zone, and of same order.
232 * count is the number of pages to free, or 0 for all on the list.
234 * If the zone was previously in an "all pages pinned" state then look to
235 * see if this freeing clears that state.
237 * And clear the zone's pages_scanned counter, to hold off the "all pages are
238 * pinned" detection logic.
240 static int
241 free_pages_bulk(struct zone *zone, int count,
242 struct list_head *list, unsigned int order)
244 unsigned long mask, flags;
245 struct free_area *area;
246 struct page *base, *page = NULL;
247 int ret = 0;
249 mask = (~0UL) << order;
250 base = zone->zone_mem_map;
251 area = zone->free_area + order;
252 spin_lock_irqsave(&zone->lock, flags);
253 zone->all_unreclaimable = 0;
254 zone->pages_scanned = 0;
255 while (!list_empty(list) && count--) {
256 page = list_entry(list->prev, struct page, list);
257 /* have to delete it as __free_pages_bulk list manipulates */
258 list_del(&page->list);
259 __free_pages_bulk(page, base, zone, area, mask, order);
260 ret++;
262 spin_unlock_irqrestore(&zone->lock, flags);
263 return ret;
266 void __free_pages_ok(struct page *page, unsigned int order)
268 LIST_HEAD(list);
270 mod_page_state(pgfree, 1 << order);
271 free_pages_check(__FUNCTION__, page);
272 list_add(&page->list, &list);
273 kernel_map_pages(page, 1<<order, 0);
274 free_pages_bulk(page_zone(page), 1, &list, order);
277 #define MARK_USED(index, order, area) \
278 __change_bit((index) >> (1+(order)), (area)->map)
280 static inline struct page *
281 expand(struct zone *zone, struct page *page,
282 unsigned long index, int low, int high, struct free_area *area)
284 unsigned long size = 1 << high;
286 while (high > low) {
287 BUG_ON(bad_range(zone, page));
288 area--;
289 high--;
290 size >>= 1;
291 list_add(&page->list, &area->free_list);
292 MARK_USED(index, high, area);
293 index += size;
294 page += size;
296 return page;
299 static inline void set_page_refs(struct page *page, int order)
301 #ifdef CONFIG_MMU
302 set_page_count(page, 1);
303 #else
304 int i;
307 * We need to reference all the pages for this order, otherwise if
308 * anyone accesses one of the pages with (get/put) it will be freed.
310 for (i = 0; i < (1 << order); i++)
311 set_page_count(page+i, 1);
312 #endif /* CONFIG_MMU */
316 * This page is about to be returned from the page allocator
318 static void prep_new_page(struct page *page, int order)
320 if (page->mapping || page_mapped(page) ||
321 (page->flags & (
322 1 << PG_private |
323 1 << PG_locked |
324 1 << PG_lru |
325 1 << PG_active |
326 1 << PG_dirty |
327 1 << PG_reclaim |
328 1 << PG_writeback )))
329 bad_page(__FUNCTION__, page);
331 page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
332 1 << PG_referenced | 1 << PG_arch_1 |
333 1 << PG_checked | 1 << PG_mappedtodisk);
334 set_page_refs(page, order);
338 * Do the hard work of removing an element from the buddy allocator.
339 * Call me with the zone->lock already held.
341 static struct page *__rmqueue(struct zone *zone, unsigned int order)
343 struct free_area * area;
344 unsigned int current_order;
345 struct page *page;
346 unsigned int index;
348 for (current_order = order; current_order < MAX_ORDER; ++current_order) {
349 area = zone->free_area + current_order;
350 if (list_empty(&area->free_list))
351 continue;
353 page = list_entry(area->free_list.next, struct page, list);
354 list_del(&page->list);
355 index = page - zone->zone_mem_map;
356 if (current_order != MAX_ORDER-1)
357 MARK_USED(index, current_order, area);
358 zone->free_pages -= 1UL << order;
359 return expand(zone, page, index, order, current_order, area);
362 return NULL;
366 * Obtain a specified number of elements from the buddy allocator, all under
367 * a single hold of the lock, for efficiency. Add them to the supplied list.
368 * Returns the number of new pages which were placed at *list.
370 static int rmqueue_bulk(struct zone *zone, unsigned int order,
371 unsigned long count, struct list_head *list)
373 unsigned long flags;
374 int i;
375 int allocated = 0;
376 struct page *page;
378 spin_lock_irqsave(&zone->lock, flags);
379 for (i = 0; i < count; ++i) {
380 page = __rmqueue(zone, order);
381 if (page == NULL)
382 break;
383 allocated++;
384 list_add_tail(&page->list, list);
386 spin_unlock_irqrestore(&zone->lock, flags);
387 return allocated;
390 #ifdef CONFIG_SOFTWARE_SUSPEND
391 int is_head_of_free_region(struct page *page)
393 struct zone *zone = page_zone(page);
394 unsigned long flags;
395 int order;
396 struct list_head *curr;
399 * Should not matter as we need quiescent system for
400 * suspend anyway, but...
402 spin_lock_irqsave(&zone->lock, flags);
403 for (order = MAX_ORDER - 1; order >= 0; --order)
404 list_for_each(curr, &zone->free_area[order].free_list)
405 if (page == list_entry(curr, struct page, list)) {
406 spin_unlock_irqrestore(&zone->lock, flags);
407 return 1 << order;
409 spin_unlock_irqrestore(&zone->lock, flags);
410 return 0;
414 * Spill all of this CPU's per-cpu pages back into the buddy allocator.
416 void drain_local_pages(void)
418 unsigned long flags;
419 struct zone *zone;
420 int i;
422 local_irq_save(flags);
423 for_each_zone(zone) {
424 struct per_cpu_pageset *pset;
426 pset = &zone->pageset[smp_processor_id()];
427 for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
428 struct per_cpu_pages *pcp;
430 pcp = &pset->pcp[i];
431 pcp->count -= free_pages_bulk(zone, pcp->count,
432 &pcp->list, 0);
435 local_irq_restore(flags);
437 #endif /* CONFIG_SOFTWARE_SUSPEND */
440 * Free a 0-order page
442 static void FASTCALL(free_hot_cold_page(struct page *page, int cold));
443 static void free_hot_cold_page(struct page *page, int cold)
445 struct zone *zone = page_zone(page);
446 struct per_cpu_pages *pcp;
447 unsigned long flags;
449 kernel_map_pages(page, 1, 0);
450 inc_page_state(pgfree);
451 free_pages_check(__FUNCTION__, page);
452 pcp = &zone->pageset[get_cpu()].pcp[cold];
453 local_irq_save(flags);
454 if (pcp->count >= pcp->high)
455 pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
456 list_add(&page->list, &pcp->list);
457 pcp->count++;
458 local_irq_restore(flags);
459 put_cpu();
462 void free_hot_page(struct page *page)
464 free_hot_cold_page(page, 0);
467 void free_cold_page(struct page *page)
469 free_hot_cold_page(page, 1);
473 * Really, prep_compound_page() should be called from __rmqueue_bulk(). But
474 * we cheat by calling it from here, in the order > 0 path. Saves a branch
475 * or two.
478 static struct page *buffered_rmqueue(struct zone *zone, int order, int cold)
480 unsigned long flags;
481 struct page *page = NULL;
483 if (order == 0) {
484 struct per_cpu_pages *pcp;
486 pcp = &zone->pageset[get_cpu()].pcp[cold];
487 local_irq_save(flags);
488 if (pcp->count <= pcp->low)
489 pcp->count += rmqueue_bulk(zone, 0,
490 pcp->batch, &pcp->list);
491 if (pcp->count) {
492 page = list_entry(pcp->list.next, struct page, list);
493 list_del(&page->list);
494 pcp->count--;
496 local_irq_restore(flags);
497 put_cpu();
500 if (page == NULL) {
501 spin_lock_irqsave(&zone->lock, flags);
502 page = __rmqueue(zone, order);
503 spin_unlock_irqrestore(&zone->lock, flags);
504 if (order && page)
505 prep_compound_page(page, order);
508 if (page != NULL) {
509 BUG_ON(bad_range(zone, page));
510 mod_page_state(pgalloc, 1 << order);
511 prep_new_page(page, order);
513 return page;
517 * This is the 'heart' of the zoned buddy allocator.
519 * Herein lies the mysterious "incremental min". That's the
521 * min += z->pages_low;
523 * thing. The intent here is to provide additional protection to low zones for
524 * allocation requests which _could_ use higher zones. So a GFP_HIGHMEM
525 * request is not allowed to dip as deeply into the normal zone as a GFP_KERNEL
526 * request. This preserves additional space in those lower zones for requests
527 * which really do need memory from those zones. It means that on a decent
528 * sized machine, GFP_HIGHMEM and GFP_KERNEL requests basically leave the DMA
529 * zone untouched.
531 struct page *
532 __alloc_pages(unsigned int gfp_mask, unsigned int order,
533 struct zonelist *zonelist)
535 const int wait = gfp_mask & __GFP_WAIT;
536 unsigned long min;
537 struct zone **zones, *classzone;
538 struct page *page;
539 int i;
540 int cold;
541 int do_retry;
542 struct reclaim_state reclaim_state;
544 if (wait)
545 might_sleep();
547 cold = 0;
548 if (gfp_mask & __GFP_COLD)
549 cold = 1;
551 zones = zonelist->zones; /* the list of zones suitable for gfp_mask */
552 classzone = zones[0];
553 if (classzone == NULL) /* no zones in the zonelist */
554 return NULL;
556 /* Go through the zonelist once, looking for a zone with enough free */
557 min = 1UL << order;
558 for (i = 0; zones[i] != NULL; i++) {
559 struct zone *z = zones[i];
561 min += z->pages_low;
562 if (z->free_pages >= min ||
563 (!wait && z->free_pages >= z->pages_high)) {
564 page = buffered_rmqueue(z, order, cold);
565 if (page)
566 goto got_pg;
568 min += z->pages_low * sysctl_lower_zone_protection;
571 /* we're somewhat low on memory, failed to find what we needed */
572 for (i = 0; zones[i] != NULL; i++)
573 wakeup_kswapd(zones[i]);
575 /* Go through the zonelist again, taking __GFP_HIGH into account */
576 min = 1UL << order;
577 for (i = 0; zones[i] != NULL; i++) {
578 unsigned long local_min;
579 struct zone *z = zones[i];
581 local_min = z->pages_min;
582 if (gfp_mask & __GFP_HIGH)
583 local_min >>= 2;
584 min += local_min;
585 if (z->free_pages >= min ||
586 (!wait && z->free_pages >= z->pages_high)) {
587 page = buffered_rmqueue(z, order, cold);
588 if (page)
589 goto got_pg;
591 min += local_min * sysctl_lower_zone_protection;
594 /* here we're in the low on memory slow path */
596 rebalance:
597 if ((current->flags & (PF_MEMALLOC | PF_MEMDIE)) && !in_interrupt()) {
598 /* go through the zonelist yet again, ignoring mins */
599 for (i = 0; zones[i] != NULL; i++) {
600 struct zone *z = zones[i];
602 page = buffered_rmqueue(z, order, cold);
603 if (page)
604 goto got_pg;
606 goto nopage;
609 /* Atomic allocations - we can't balance anything */
610 if (!wait)
611 goto nopage;
613 current->flags |= PF_MEMALLOC;
614 reclaim_state.reclaimed_slab = 0;
615 current->reclaim_state = &reclaim_state;
617 try_to_free_pages(classzone, gfp_mask, order);
619 current->reclaim_state = NULL;
620 current->flags &= ~PF_MEMALLOC;
622 /* go through the zonelist yet one more time */
623 min = 1UL << order;
624 for (i = 0; zones[i] != NULL; i++) {
625 struct zone *z = zones[i];
627 min += z->pages_min;
628 if (z->free_pages >= min ||
629 (!wait && z->free_pages >= z->pages_high)) {
630 page = buffered_rmqueue(z, order, cold);
631 if (page)
632 goto got_pg;
634 min += z->pages_low * sysctl_lower_zone_protection;
638 * Don't let big-order allocations loop unless the caller explicitly
639 * requests that. Wait for some write requests to complete then retry.
641 * In this implementation, __GFP_REPEAT means __GFP_NOFAIL, but that
642 * may not be true in other implementations.
644 do_retry = 0;
645 if (!(gfp_mask & __GFP_NORETRY)) {
646 if ((order <= 3) || (gfp_mask & __GFP_REPEAT))
647 do_retry = 1;
648 if (gfp_mask & __GFP_NOFAIL)
649 do_retry = 1;
651 if (do_retry) {
652 blk_congestion_wait(WRITE, HZ/50);
653 goto rebalance;
656 nopage:
657 if (!(gfp_mask & __GFP_NOWARN)) {
658 printk("%s: page allocation failure."
659 " order:%d, mode:0x%x\n",
660 current->comm, order, gfp_mask);
662 return NULL;
663 got_pg:
664 kernel_map_pages(page, 1 << order, 1);
665 return page;
669 * Common helper functions.
671 unsigned long __get_free_pages(unsigned int gfp_mask, unsigned int order)
673 struct page * page;
675 page = alloc_pages(gfp_mask, order);
676 if (!page)
677 return 0;
678 return (unsigned long) page_address(page);
681 unsigned long get_zeroed_page(unsigned int gfp_mask)
683 struct page * page;
686 * get_zeroed_page() returns a 32-bit address, which cannot represent
687 * a highmem page
689 BUG_ON(gfp_mask & __GFP_HIGHMEM);
691 page = alloc_pages(gfp_mask, 0);
692 if (page) {
693 void *address = page_address(page);
694 clear_page(address);
695 return (unsigned long) address;
697 return 0;
700 void __pagevec_free(struct pagevec *pvec)
702 int i = pagevec_count(pvec);
704 while (--i >= 0)
705 free_hot_cold_page(pvec->pages[i], pvec->cold);
708 void __free_pages(struct page *page, unsigned int order)
710 if (!PageReserved(page) && put_page_testzero(page)) {
711 if (order == 0)
712 free_hot_page(page);
713 else
714 __free_pages_ok(page, order);
718 void free_pages(unsigned long addr, unsigned int order)
720 if (addr != 0) {
721 BUG_ON(!virt_addr_valid(addr));
722 __free_pages(virt_to_page(addr), order);
727 * Total amount of free (allocatable) RAM:
729 unsigned int nr_free_pages(void)
731 unsigned int sum = 0;
732 struct zone *zone;
734 for_each_zone(zone)
735 sum += zone->free_pages;
737 return sum;
739 EXPORT_SYMBOL(nr_free_pages);
741 unsigned int nr_used_zone_pages(void)
743 unsigned int pages = 0;
744 struct zone *zone;
746 for_each_zone(zone)
747 pages += zone->nr_active + zone->nr_inactive;
749 return pages;
752 #ifdef CONFIG_NUMA
753 unsigned int nr_free_pages_pgdat(pg_data_t *pgdat)
755 unsigned int i, sum = 0;
757 for (i = 0; i < MAX_NR_ZONES; i++)
758 sum += pgdat->node_zones[i].free_pages;
760 return sum;
762 #endif
764 static unsigned int nr_free_zone_pages(int offset)
766 pg_data_t *pgdat;
767 unsigned int sum = 0;
769 for_each_pgdat(pgdat) {
770 struct zonelist *zonelist = pgdat->node_zonelists + offset;
771 struct zone **zonep = zonelist->zones;
772 struct zone *zone;
774 for (zone = *zonep++; zone; zone = *zonep++) {
775 unsigned long size = zone->present_pages;
776 unsigned long high = zone->pages_high;
777 if (size > high)
778 sum += size - high;
782 return sum;
786 * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL
788 unsigned int nr_free_buffer_pages(void)
790 return nr_free_zone_pages(GFP_USER & GFP_ZONEMASK);
794 * Amount of free RAM allocatable within all zones
796 unsigned int nr_free_pagecache_pages(void)
798 return nr_free_zone_pages(GFP_HIGHUSER & GFP_ZONEMASK);
801 #ifdef CONFIG_HIGHMEM
802 unsigned int nr_free_highpages (void)
804 pg_data_t *pgdat;
805 unsigned int pages = 0;
807 for_each_pgdat(pgdat)
808 pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
810 return pages;
812 #endif
814 #ifdef CONFIG_NUMA
815 static void show_node(struct zone *zone)
817 printk("Node %d ", zone->zone_pgdat->node_id);
819 #else
820 #define show_node(zone) do { } while (0)
821 #endif
824 * Accumulate the page_state information across all CPUs.
825 * The result is unavoidably approximate - it can change
826 * during and after execution of this function.
828 DEFINE_PER_CPU(struct page_state, page_states) = {0};
829 EXPORT_PER_CPU_SYMBOL(page_states);
831 atomic_t nr_pagecache = ATOMIC_INIT(0);
832 EXPORT_SYMBOL(nr_pagecache);
833 #ifdef CONFIG_SMP
834 DEFINE_PER_CPU(long, nr_pagecache_local) = 0;
835 #endif
837 void __get_page_state(struct page_state *ret, int nr)
839 int cpu = 0;
841 memset(ret, 0, sizeof(*ret));
842 while (cpu < NR_CPUS) {
843 unsigned long *in, *out, off;
845 if (!cpu_online(cpu)) {
846 cpu++;
847 continue;
850 in = (unsigned long *)&per_cpu(page_states, cpu);
851 cpu++;
852 if (cpu < NR_CPUS && cpu_online(cpu))
853 prefetch(&per_cpu(page_states, cpu));
854 out = (unsigned long *)ret;
855 for (off = 0; off < nr; off++)
856 *out++ += *in++;
860 void get_page_state(struct page_state *ret)
862 int nr;
864 nr = offsetof(struct page_state, GET_PAGE_STATE_LAST);
865 nr /= sizeof(unsigned long);
867 __get_page_state(ret, nr + 1);
870 void get_full_page_state(struct page_state *ret)
872 __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long));
875 void get_zone_counts(unsigned long *active,
876 unsigned long *inactive, unsigned long *free)
878 struct zone *zone;
880 *active = 0;
881 *inactive = 0;
882 *free = 0;
883 for_each_zone(zone) {
884 *active += zone->nr_active;
885 *inactive += zone->nr_inactive;
886 *free += zone->free_pages;
890 void si_meminfo(struct sysinfo *val)
892 val->totalram = totalram_pages;
893 val->sharedram = 0;
894 val->freeram = nr_free_pages();
895 val->bufferram = nr_blockdev_pages();
896 #ifdef CONFIG_HIGHMEM
897 val->totalhigh = totalhigh_pages;
898 val->freehigh = nr_free_highpages();
899 #else
900 val->totalhigh = 0;
901 val->freehigh = 0;
902 #endif
903 val->mem_unit = PAGE_SIZE;
906 #ifdef CONFIG_NUMA
907 void si_meminfo_node(struct sysinfo *val, int nid)
909 pg_data_t *pgdat = NODE_DATA(nid);
911 val->totalram = pgdat->node_present_pages;
912 val->freeram = nr_free_pages_pgdat(pgdat);
913 val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages;
914 val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages;
915 val->mem_unit = PAGE_SIZE;
917 #endif
919 #define K(x) ((x) << (PAGE_SHIFT-10))
922 * Show free area list (used inside shift_scroll-lock stuff)
923 * We also calculate the percentage fragmentation. We do this by counting the
924 * memory on each free list with the exception of the first item on the list.
926 void show_free_areas(void)
928 struct page_state ps;
929 int cpu, temperature;
930 unsigned long active;
931 unsigned long inactive;
932 unsigned long free;
933 struct zone *zone;
935 for_each_zone(zone) {
936 show_node(zone);
937 printk("%s per-cpu:", zone->name);
939 if (!zone->present_pages) {
940 printk(" empty\n");
941 continue;
942 } else
943 printk("\n");
945 for (cpu = 0; cpu < NR_CPUS; ++cpu) {
946 struct per_cpu_pageset *pageset = zone->pageset + cpu;
947 for (temperature = 0; temperature < 2; temperature++)
948 printk("cpu %d %s: low %d, high %d, batch %d\n",
949 cpu,
950 temperature ? "cold" : "hot",
951 pageset->pcp[temperature].low,
952 pageset->pcp[temperature].high,
953 pageset->pcp[temperature].batch);
957 get_page_state(&ps);
958 get_zone_counts(&active, &inactive, &free);
960 printk("\nFree pages: %11ukB (%ukB HighMem)\n",
961 K(nr_free_pages()),
962 K(nr_free_highpages()));
964 printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu "
965 "unstable:%lu free:%u\n",
966 active,
967 inactive,
968 ps.nr_dirty,
969 ps.nr_writeback,
970 ps.nr_unstable,
971 nr_free_pages());
973 for_each_zone(zone) {
974 show_node(zone);
975 printk("%s"
976 " free:%lukB"
977 " min:%lukB"
978 " low:%lukB"
979 " high:%lukB"
980 " active:%lukB"
981 " inactive:%lukB"
982 "\n",
983 zone->name,
984 K(zone->free_pages),
985 K(zone->pages_min),
986 K(zone->pages_low),
987 K(zone->pages_high),
988 K(zone->nr_active),
989 K(zone->nr_inactive)
993 for_each_zone(zone) {
994 struct list_head *elem;
995 unsigned long nr, flags, order, total = 0;
997 show_node(zone);
998 printk("%s: ", zone->name);
999 if (!zone->present_pages) {
1000 printk("empty\n");
1001 continue;
1004 spin_lock_irqsave(&zone->lock, flags);
1005 for (order = 0; order < MAX_ORDER; order++) {
1006 nr = 0;
1007 list_for_each(elem, &zone->free_area[order].free_list)
1008 ++nr;
1009 total += nr << order;
1010 printk("%lu*%lukB ", nr, K(1UL) << order);
1012 spin_unlock_irqrestore(&zone->lock, flags);
1013 printk("= %lukB\n", K(total));
1016 show_swap_cache_info();
1020 * Builds allocation fallback zone lists.
1022 static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist, int j, int k)
1024 switch (k) {
1025 struct zone *zone;
1026 default:
1027 BUG();
1028 case ZONE_HIGHMEM:
1029 zone = pgdat->node_zones + ZONE_HIGHMEM;
1030 if (zone->present_pages) {
1031 #ifndef CONFIG_HIGHMEM
1032 BUG();
1033 #endif
1034 zonelist->zones[j++] = zone;
1036 case ZONE_NORMAL:
1037 zone = pgdat->node_zones + ZONE_NORMAL;
1038 if (zone->present_pages)
1039 zonelist->zones[j++] = zone;
1040 case ZONE_DMA:
1041 zone = pgdat->node_zones + ZONE_DMA;
1042 if (zone->present_pages)
1043 zonelist->zones[j++] = zone;
1046 return j;
1049 static void __init build_zonelists(pg_data_t *pgdat)
1051 int i, j, k, node, local_node;
1053 local_node = pgdat->node_id;
1054 printk("Building zonelist for node : %d\n", local_node);
1055 for (i = 0; i < MAX_NR_ZONES; i++) {
1056 struct zonelist *zonelist;
1058 zonelist = pgdat->node_zonelists + i;
1059 memset(zonelist, 0, sizeof(*zonelist));
1061 j = 0;
1062 k = ZONE_NORMAL;
1063 if (i & __GFP_HIGHMEM)
1064 k = ZONE_HIGHMEM;
1065 if (i & __GFP_DMA)
1066 k = ZONE_DMA;
1068 j = build_zonelists_node(pgdat, zonelist, j, k);
1070 * Now we build the zonelist so that it contains the zones
1071 * of all the other nodes.
1072 * We don't want to pressure a particular node, so when
1073 * building the zones for node N, we make sure that the
1074 * zones coming right after the local ones are those from
1075 * node N+1 (modulo N)
1077 for (node = local_node + 1; node < numnodes; node++)
1078 j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
1079 for (node = 0; node < local_node; node++)
1080 j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
1082 zonelist->zones[j++] = NULL;
1086 void __init build_all_zonelists(void)
1088 int i;
1090 for(i = 0 ; i < numnodes ; i++)
1091 build_zonelists(NODE_DATA(i));
1095 * Helper functions to size the waitqueue hash table.
1096 * Essentially these want to choose hash table sizes sufficiently
1097 * large so that collisions trying to wait on pages are rare.
1098 * But in fact, the number of active page waitqueues on typical
1099 * systems is ridiculously low, less than 200. So this is even
1100 * conservative, even though it seems large.
1102 * The constant PAGES_PER_WAITQUEUE specifies the ratio of pages to
1103 * waitqueues, i.e. the size of the waitq table given the number of pages.
1105 #define PAGES_PER_WAITQUEUE 256
1107 static inline unsigned long wait_table_size(unsigned long pages)
1109 unsigned long size = 1;
1111 pages /= PAGES_PER_WAITQUEUE;
1113 while (size < pages)
1114 size <<= 1;
1117 * Once we have dozens or even hundreds of threads sleeping
1118 * on IO we've got bigger problems than wait queue collision.
1119 * Limit the size of the wait table to a reasonable size.
1121 size = min(size, 4096UL);
1123 return max(size, 4UL);
1127 * This is an integer logarithm so that shifts can be used later
1128 * to extract the more random high bits from the multiplicative
1129 * hash function before the remainder is taken.
1131 static inline unsigned long wait_table_bits(unsigned long size)
1133 return ffz(~size);
1136 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
1138 static void __init calculate_zone_totalpages(struct pglist_data *pgdat,
1139 unsigned long *zones_size, unsigned long *zholes_size)
1141 unsigned long realtotalpages, totalpages = 0;
1142 int i;
1144 for (i = 0; i < MAX_NR_ZONES; i++)
1145 totalpages += zones_size[i];
1146 pgdat->node_spanned_pages = totalpages;
1148 realtotalpages = totalpages;
1149 if (zholes_size)
1150 for (i = 0; i < MAX_NR_ZONES; i++)
1151 realtotalpages -= zholes_size[i];
1152 pgdat->node_present_pages = realtotalpages;
1153 printk("On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages);
1157 * Get space for the valid bitmap.
1159 static void __init calculate_zone_bitmap(struct pglist_data *pgdat,
1160 unsigned long *zones_size)
1162 unsigned long size = 0;
1163 int i;
1165 for (i = 0; i < MAX_NR_ZONES; i++)
1166 size += zones_size[i];
1167 size = LONG_ALIGN((size + 7) >> 3);
1168 if (size) {
1169 pgdat->valid_addr_bitmap =
1170 (unsigned long *)alloc_bootmem_node(pgdat, size);
1171 memset(pgdat->valid_addr_bitmap, 0, size);
1176 * Initially all pages are reserved - free ones are freed
1177 * up by free_all_bootmem() once the early boot process is
1178 * done. Non-atomic initialization, single-pass.
1180 void __init memmap_init_zone(struct page *start, unsigned long size, int nid,
1181 unsigned long zone, unsigned long start_pfn)
1183 struct page *page;
1185 for (page = start; page < (start + size); page++) {
1186 set_page_zone(page, nid * MAX_NR_ZONES + zone);
1187 set_page_count(page, 0);
1188 SetPageReserved(page);
1189 INIT_LIST_HEAD(&page->list);
1190 #ifdef WANT_PAGE_VIRTUAL
1191 /* The shift won't overflow because ZONE_NORMAL is below 4G. */
1192 if (zone != ZONE_HIGHMEM)
1193 set_page_address(page, __va(start_pfn << PAGE_SHIFT));
1194 #endif
1195 start_pfn++;
1199 #ifndef __HAVE_ARCH_MEMMAP_INIT
1200 #define memmap_init(start, size, nid, zone, start_pfn) \
1201 memmap_init_zone((start), (size), (nid), (zone), (start_pfn))
1202 #endif
1205 * Set up the zone data structures:
1206 * - mark all pages reserved
1207 * - mark all memory queues empty
1208 * - clear the memory bitmaps
1210 static void __init free_area_init_core(struct pglist_data *pgdat,
1211 unsigned long *zones_size, unsigned long *zholes_size)
1213 unsigned long i, j;
1214 const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1);
1215 int cpu, nid = pgdat->node_id;
1216 struct page *lmem_map = pgdat->node_mem_map;
1217 unsigned long zone_start_pfn = pgdat->node_start_pfn;
1219 pgdat->nr_zones = 0;
1220 init_waitqueue_head(&pgdat->kswapd_wait);
1222 for (j = 0; j < MAX_NR_ZONES; j++) {
1223 struct zone *zone = pgdat->node_zones + j;
1224 unsigned long size, realsize;
1225 unsigned long batch;
1227 zone_table[nid * MAX_NR_ZONES + j] = zone;
1228 realsize = size = zones_size[j];
1229 if (zholes_size)
1230 realsize -= zholes_size[j];
1232 zone->spanned_pages = size;
1233 zone->present_pages = realsize;
1234 zone->name = zone_names[j];
1235 spin_lock_init(&zone->lock);
1236 spin_lock_init(&zone->lru_lock);
1237 zone->zone_pgdat = pgdat;
1238 zone->free_pages = 0;
1241 * The per-cpu-pages pools are set to around 1000th of the
1242 * size of the zone. But no more than 1/4 of a meg - there's
1243 * no point in going beyond the size of L2 cache.
1245 * OK, so we don't know how big the cache is. So guess.
1247 batch = zone->present_pages / 1024;
1248 if (batch * PAGE_SIZE > 256 * 1024)
1249 batch = (256 * 1024) / PAGE_SIZE;
1250 batch /= 4; /* We effectively *= 4 below */
1251 if (batch < 1)
1252 batch = 1;
1254 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1255 struct per_cpu_pages *pcp;
1257 pcp = &zone->pageset[cpu].pcp[0]; /* hot */
1258 pcp->count = 0;
1259 pcp->low = 2 * batch;
1260 pcp->high = 6 * batch;
1261 pcp->batch = 1 * batch;
1262 INIT_LIST_HEAD(&pcp->list);
1264 pcp = &zone->pageset[cpu].pcp[1]; /* cold */
1265 pcp->count = 0;
1266 pcp->low = 0;
1267 pcp->high = 2 * batch;
1268 pcp->batch = 1 * batch;
1269 INIT_LIST_HEAD(&pcp->list);
1271 printk(" %s zone: %lu pages, LIFO batch:%lu\n",
1272 zone_names[j], realsize, batch);
1273 INIT_LIST_HEAD(&zone->active_list);
1274 INIT_LIST_HEAD(&zone->inactive_list);
1275 atomic_set(&zone->refill_counter, 0);
1276 zone->nr_active = 0;
1277 zone->nr_inactive = 0;
1278 if (!size)
1279 continue;
1282 * The per-page waitqueue mechanism uses hashed waitqueues
1283 * per zone.
1285 zone->wait_table_size = wait_table_size(size);
1286 zone->wait_table_bits =
1287 wait_table_bits(zone->wait_table_size);
1288 zone->wait_table = (wait_queue_head_t *)
1289 alloc_bootmem_node(pgdat, zone->wait_table_size
1290 * sizeof(wait_queue_head_t));
1292 for(i = 0; i < zone->wait_table_size; ++i)
1293 init_waitqueue_head(zone->wait_table + i);
1295 pgdat->nr_zones = j+1;
1297 zone->zone_mem_map = lmem_map;
1298 zone->zone_start_pfn = zone_start_pfn;
1300 if ((zone_start_pfn) & (zone_required_alignment-1))
1301 printk("BUG: wrong zone alignment, it will crash\n");
1303 memmap_init(lmem_map, size, nid, j, zone_start_pfn);
1305 zone_start_pfn += size;
1306 lmem_map += size;
1308 for (i = 0; ; i++) {
1309 unsigned long bitmap_size;
1311 INIT_LIST_HEAD(&zone->free_area[i].free_list);
1312 if (i == MAX_ORDER-1) {
1313 zone->free_area[i].map = NULL;
1314 break;
1318 * Page buddy system uses "index >> (i+1)",
1319 * where "index" is at most "size-1".
1321 * The extra "+3" is to round down to byte
1322 * size (8 bits per byte assumption). Thus
1323 * we get "(size-1) >> (i+4)" as the last byte
1324 * we can access.
1326 * The "+1" is because we want to round the
1327 * byte allocation up rather than down. So
1328 * we should have had a "+7" before we shifted
1329 * down by three. Also, we have to add one as
1330 * we actually _use_ the last bit (it's [0,n]
1331 * inclusive, not [0,n[).
1333 * So we actually had +7+1 before we shift
1334 * down by 3. But (n+8) >> 3 == (n >> 3) + 1
1335 * (modulo overflows, which we do not have).
1337 * Finally, we LONG_ALIGN because all bitmap
1338 * operations are on longs.
1340 bitmap_size = (size-1) >> (i+4);
1341 bitmap_size = LONG_ALIGN(bitmap_size+1);
1342 zone->free_area[i].map =
1343 (unsigned long *) alloc_bootmem_node(pgdat, bitmap_size);
1348 void __init free_area_init_node(int nid, struct pglist_data *pgdat,
1349 struct page *node_mem_map, unsigned long *zones_size,
1350 unsigned long node_start_pfn, unsigned long *zholes_size)
1352 unsigned long size;
1354 pgdat->node_id = nid;
1355 pgdat->node_start_pfn = node_start_pfn;
1356 calculate_zone_totalpages(pgdat, zones_size, zholes_size);
1357 if (!node_mem_map) {
1358 size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
1359 node_mem_map = alloc_bootmem_node(pgdat, size);
1361 pgdat->node_mem_map = node_mem_map;
1363 free_area_init_core(pgdat, zones_size, zholes_size);
1364 memblk_set_online(node_to_memblk(nid));
1366 calculate_zone_bitmap(pgdat, zones_size);
1369 #ifndef CONFIG_DISCONTIGMEM
1370 static bootmem_data_t contig_bootmem_data;
1371 struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data };
1373 void __init free_area_init(unsigned long *zones_size)
1375 free_area_init_node(0, &contig_page_data, NULL, zones_size,
1376 __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL);
1377 mem_map = contig_page_data.node_mem_map;
1379 #endif
1381 #ifdef CONFIG_PROC_FS
1383 #include <linux/seq_file.h>
1385 static void *frag_start(struct seq_file *m, loff_t *pos)
1387 pg_data_t *pgdat;
1388 loff_t node = *pos;
1390 for (pgdat = pgdat_list; pgdat && node; pgdat = pgdat->pgdat_next)
1391 --node;
1393 return pgdat;
1396 static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
1398 pg_data_t *pgdat = (pg_data_t *)arg;
1400 (*pos)++;
1401 return pgdat->pgdat_next;
1404 static void frag_stop(struct seq_file *m, void *arg)
1409 * This walks the freelist for each zone. Whilst this is slow, I'd rather
1410 * be slow here than slow down the fast path by keeping stats - mjbligh
1412 static int frag_show(struct seq_file *m, void *arg)
1414 pg_data_t *pgdat = (pg_data_t *)arg;
1415 struct zone *zone;
1416 struct zone *node_zones = pgdat->node_zones;
1417 unsigned long flags;
1418 int order;
1420 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
1421 if (!zone->present_pages)
1422 continue;
1424 spin_lock_irqsave(&zone->lock, flags);
1425 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1426 for (order = 0; order < MAX_ORDER; ++order) {
1427 unsigned long nr_bufs = 0;
1428 struct list_head *elem;
1430 list_for_each(elem, &(zone->free_area[order].free_list))
1431 ++nr_bufs;
1432 seq_printf(m, "%6lu ", nr_bufs);
1434 spin_unlock_irqrestore(&zone->lock, flags);
1435 seq_putc(m, '\n');
1437 return 0;
1440 struct seq_operations fragmentation_op = {
1441 .start = frag_start,
1442 .next = frag_next,
1443 .stop = frag_stop,
1444 .show = frag_show,
1447 static char *vmstat_text[] = {
1448 "nr_dirty",
1449 "nr_writeback",
1450 "nr_unstable",
1451 "nr_page_table_pages",
1452 "nr_mapped",
1453 "nr_slab",
1455 "pgpgin",
1456 "pgpgout",
1457 "pswpin",
1458 "pswpout",
1459 "pgalloc",
1461 "pgfree",
1462 "pgactivate",
1463 "pgdeactivate",
1464 "pgfault",
1465 "pgmajfault",
1467 "pgscan",
1468 "pgrefill",
1469 "pgsteal",
1470 "pginodesteal",
1471 "kswapd_steal",
1473 "kswapd_inodesteal",
1474 "pageoutrun",
1475 "allocstall",
1476 "pgrotated",
1479 static void *vmstat_start(struct seq_file *m, loff_t *pos)
1481 struct page_state *ps;
1483 if (*pos >= ARRAY_SIZE(vmstat_text))
1484 return NULL;
1486 ps = kmalloc(sizeof(*ps), GFP_KERNEL);
1487 m->private = ps;
1488 if (!ps)
1489 return ERR_PTR(-ENOMEM);
1490 get_full_page_state(ps);
1491 ps->pgpgin /= 2; /* sectors -> kbytes */
1492 ps->pgpgout /= 2;
1493 return (unsigned long *)ps + *pos;
1496 static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1498 (*pos)++;
1499 if (*pos >= ARRAY_SIZE(vmstat_text))
1500 return NULL;
1501 return (unsigned long *)m->private + *pos;
1504 static int vmstat_show(struct seq_file *m, void *arg)
1506 unsigned long *l = arg;
1507 unsigned long off = l - (unsigned long *)m->private;
1509 seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
1510 return 0;
1513 static void vmstat_stop(struct seq_file *m, void *arg)
1515 kfree(m->private);
1516 m->private = NULL;
1519 struct seq_operations vmstat_op = {
1520 .start = vmstat_start,
1521 .next = vmstat_next,
1522 .stop = vmstat_stop,
1523 .show = vmstat_show,
1526 #endif /* CONFIG_PROC_FS */
1528 static void __devinit init_page_alloc_cpu(int cpu)
1530 struct page_state *ps = &per_cpu(page_states, cpu);
1531 memset(ps, 0, sizeof(*ps));
1534 static int __devinit page_alloc_cpu_notify(struct notifier_block *self,
1535 unsigned long action, void *hcpu)
1537 int cpu = (unsigned long)hcpu;
1538 switch(action) {
1539 case CPU_UP_PREPARE:
1540 init_page_alloc_cpu(cpu);
1541 break;
1542 default:
1543 break;
1545 return NOTIFY_OK;
1548 static struct notifier_block __devinitdata page_alloc_nb = {
1549 .notifier_call = page_alloc_cpu_notify,
1552 void __init page_alloc_init(void)
1554 init_page_alloc_cpu(smp_processor_id());
1555 register_cpu_notifier(&page_alloc_nb);
1559 * setup_per_zone_pages_min - called when min_free_kbytes changes. Ensures
1560 * that the pages_{min,low,high} values for each zone are set correctly
1561 * with respect to min_free_kbytes.
1563 void setup_per_zone_pages_min(void)
1565 unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
1566 unsigned long lowmem_pages = 0;
1567 struct zone *zone;
1568 unsigned long flags;
1570 /* Calculate total number of !ZONE_HIGHMEM pages */
1571 for_each_zone(zone)
1572 if (!is_highmem(zone))
1573 lowmem_pages += zone->present_pages;
1575 for_each_zone(zone) {
1576 spin_lock_irqsave(&zone->lru_lock, flags);
1577 if (is_highmem(zone)) {
1579 * Often, highmem doesn't need to reserve any pages.
1580 * But the pages_min/low/high values are also used for
1581 * batching up page reclaim activity so we need a
1582 * decent value here.
1584 int min_pages;
1586 min_pages = zone->present_pages / 1024;
1587 if (min_pages < SWAP_CLUSTER_MAX)
1588 min_pages = SWAP_CLUSTER_MAX;
1589 if (min_pages > 128)
1590 min_pages = 128;
1591 zone->pages_min = min_pages;
1592 } else {
1593 /* if it's a lowmem zone, reserve a number of pages
1594 * proportionate to the zone's size.
1596 zone->pages_min = (pages_min * zone->present_pages) /
1597 lowmem_pages;
1600 zone->pages_low = zone->pages_min * 2;
1601 zone->pages_high = zone->pages_min * 3;
1602 spin_unlock_irqrestore(&zone->lru_lock, flags);
1607 * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so
1608 * that we can call setup_per_zone_pages_min() whenever min_free_kbytes
1609 * changes.
1611 int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
1612 struct file *file, void *buffer, size_t *length)
1614 proc_dointvec(table, write, file, buffer, length);
1615 setup_per_zone_pages_min();
1616 return 0;