mm/vmstat.c

   1 /*
   2  *  linux/mm/vmstat.c
   3  *
   4  *  Manages VM statistics
   5  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   6  *
   7  *  zoned VM statistics
   8  *  Copyright (C) 2006 Silicon Graphics, Inc.,
   9  *              Christoph Lameter <christoph@lameter.com>
  10  */
  11
  12 #include <linux/mm.h>
  13 #include <linux/module.h>
  14 #include <linux/cpu.h>
  15 #include <linux/sched.h>
  16
  17 #ifdef CONFIG_VM_EVENT_COUNTERS
  18 DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
  19 EXPORT_PER_CPU_SYMBOL(vm_event_states);
  20
  21 static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask)
  22 {
  23         int cpu = 0;
  24         int i;
  25
  26         memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
  27
  28         cpu = first_cpu(*cpumask);
  29         while (cpu < NR_CPUS) {
  30                 struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
  31
  32                 cpu = next_cpu(cpu, *cpumask);
  33
  34                 if (cpu < NR_CPUS)
  35                         prefetch(&per_cpu(vm_event_states, cpu));
  36
  37
  38                 for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
  39                         ret[i] += this->event[i];
  40         }
  41 }
  42
  43 /*
  44  * Accumulate the vm event counters across all CPUs.
  45  * The result is unavoidably approximate - it can change
  46  * during and after execution of this function.
  47 */
  48 void all_vm_events(unsigned long *ret)
  49 {
  50         sum_vm_events(ret, &cpu_online_map);
  51 }
  52 EXPORT_SYMBOL_GPL(all_vm_events);
  53
  54 #ifdef CONFIG_HOTPLUG
  55 /*
  56  * Fold the foreign cpu events into our own.
  57  *
  58  * This is adding to the events on one processor
  59  * but keeps the global counts constant.
  60  */
  61 void vm_events_fold_cpu(int cpu)
  62 {
  63         struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
  64         int i;
  65
  66         for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
  67                 count_vm_events(i, fold_state->event[i]);
  68                 fold_state->event[i] = 0;
  69         }
  70 }
  71 #endif /* CONFIG_HOTPLUG */
  72
  73 #endif /* CONFIG_VM_EVENT_COUNTERS */
  74
  75 /*
  76  * Manage combined zone based / global counters
  77  *
  78  * vm_stat contains the global counters
  79  */
  80 atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
  81 EXPORT_SYMBOL(vm_stat);
  82
  83 #ifdef CONFIG_SMP
  84
  85 static int calculate_threshold(struct zone *zone)
  86 {
  87         int threshold;
  88         int mem;        /* memory in 128 MB units */
  89
  90         /*
  91          * The threshold scales with the number of processors and the amount
  92          * of memory per zone. More memory means that we can defer updates for
  93          * longer, more processors could lead to more contention.
  94          * fls() is used to have a cheap way of logarithmic scaling.
  95          *
  96          * Some sample thresholds:
  97          *
  98          * Threshold    Processors      (fls)   Zonesize        fls(mem+1)
  99          * ------------------------------------------------------------------
 100          * 8            1               1       0.9-1 GB        4
 101          * 16           2               2       0.9-1 GB        4
 102          * 20           2               2       1-2 GB          5
 103          * 24           2               2       2-4 GB          6
 104          * 28           2               2       4-8 GB          7
 105          * 32           2               2       8-16 GB         8
 106          * 4            2               2       <128M           1
 107          * 30           4               3       2-4 GB          5
 108          * 48           4               3       8-16 GB         8
 109          * 32           8               4       1-2 GB          4
 110          * 32           8               4       0.9-1GB         4
 111          * 10           16              5       <128M           1
 112          * 40           16              5       900M            4
 113          * 70           64              7       2-4 GB          5
 114          * 84           64              7       4-8 GB          6
 115          * 108          512             9       4-8 GB          6
 116          * 125          1024            10      8-16 GB         8
 117          * 125          1024            10      16-32 GB        9
 118          */
 119
 120         mem = zone->present_pages >> (27 - PAGE_SHIFT);
 121
 122         threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
 123
 124         /*
 125          * Maximum threshold is 125
 126          */
 127         threshold = min(125, threshold);
 128
 129         return threshold;
 130 }
 131
 132 /*
 133  * Refresh the thresholds for each zone.
 134  */
 135 static void refresh_zone_stat_thresholds(void)
 136 {
 137         struct zone *zone;
 138         int cpu;
 139         int threshold;
 140
 141         for_each_zone(zone) {
 142
 143                 if (!zone->present_pages)
 144                         continue;
 145
 146                 threshold = calculate_threshold(zone);
 147
 148                 for_each_online_cpu(cpu)
 149                         zone_pcp(zone, cpu)->stat_threshold = threshold;
 150         }
 151 }
 152
 153 /*
 154  * For use when we know that interrupts are disabled.
 155  */
 156 void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
 157                                 int delta)
 158 {
 159         struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
 160         s8 *p = pcp->vm_stat_diff + item;
 161         long x;
 162
 163         x = delta + *p;
 164
 165         if (unlikely(x > pcp->stat_threshold || x < -pcp->stat_threshold)) {
 166                 zone_page_state_add(x, zone, item);
 167                 x = 0;
 168         }
 169         *p = x;
 170 }
 171 EXPORT_SYMBOL(__mod_zone_page_state);
 172
 173 /*
 174  * For an unknown interrupt state
 175  */
 176 void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
 177                                         int delta)
 178 {
 179         unsigned long flags;
 180
 181         local_irq_save(flags);
 182         __mod_zone_page_state(zone, item, delta);
 183         local_irq_restore(flags);
 184 }
 185 EXPORT_SYMBOL(mod_zone_page_state);
 186
 187 /*
 188  * Optimized increment and decrement functions.
 189  *
 190  * These are only for a single page and therefore can take a struct page *
 191  * argument instead of struct zone *. This allows the inclusion of the code
 192  * generated for page_zone(page) into the optimized functions.
 193  *
 194  * No overflow check is necessary and therefore the differential can be
 195  * incremented or decremented in place which may allow the compilers to
 196  * generate better code.
 197  * The increment or decrement is known and therefore one boundary check can
 198  * be omitted.
 199  *
 200  * NOTE: These functions are very performance sensitive. Change only
 201  * with care.
 202  *
 203  * Some processors have inc/dec instructions that are atomic vs an interrupt.
 204  * However, the code must first determine the differential location in a zone
 205  * based on the processor number and then inc/dec the counter. There is no
 206  * guarantee without disabling preemption that the processor will not change
 207  * in between and therefore the atomicity vs. interrupt cannot be exploited
 208  * in a useful way here.
 209  */
 210 void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
 211 {
 212         struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
 213         s8 *p = pcp->vm_stat_diff + item;
 214
 215         (*p)++;
 216
 217         if (unlikely(*p > pcp->stat_threshold)) {
 218                 int overstep = pcp->stat_threshold / 2;
 219
 220                 zone_page_state_add(*p + overstep, zone, item);
 221                 *p = -overstep;
 222         }
 223 }
 224
 225 void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
 226 {
 227         __inc_zone_state(page_zone(page), item);
 228 }
 229 EXPORT_SYMBOL(__inc_zone_page_state);
 230
 231 void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
 232 {
 233         struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
 234         s8 *p = pcp->vm_stat_diff + item;
 235
 236         (*p)--;
 237
 238         if (unlikely(*p < - pcp->stat_threshold)) {
 239                 int overstep = pcp->stat_threshold / 2;
 240
 241                 zone_page_state_add(*p - overstep, zone, item);
 242                 *p = overstep;
 243         }
 244 }
 245
 246 void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
 247 {
 248         __dec_zone_state(page_zone(page), item);
 249 }
 250 EXPORT_SYMBOL(__dec_zone_page_state);
 251
 252 void inc_zone_state(struct zone *zone, enum zone_stat_item item)
 253 {
 254         unsigned long flags;
 255
 256         local_irq_save(flags);
 257         __inc_zone_state(zone, item);
 258         local_irq_restore(flags);
 259 }
 260
 261 void inc_zone_page_state(struct page *page, enum zone_stat_item item)
 262 {
 263         unsigned long flags;
 264         struct zone *zone;
 265
 266         zone = page_zone(page);
 267         local_irq_save(flags);
 268         __inc_zone_state(zone, item);
 269         local_irq_restore(flags);
 270 }
 271 EXPORT_SYMBOL(inc_zone_page_state);
 272
 273 void dec_zone_page_state(struct page *page, enum zone_stat_item item)
 274 {
 275         unsigned long flags;
 276
 277         local_irq_save(flags);
 278         __dec_zone_page_state(page, item);
 279         local_irq_restore(flags);
 280 }
 281 EXPORT_SYMBOL(dec_zone_page_state);
 282
 283 /*
 284  * Update the zone counters for one cpu.
 285  *
 286  * Note that refresh_cpu_vm_stats strives to only access
 287  * node local memory. The per cpu pagesets on remote zones are placed
 288  * in the memory local to the processor using that pageset. So the
 289  * loop over all zones will access a series of cachelines local to
 290  * the processor.
 291  *
 292  * The call to zone_page_state_add updates the cachelines with the
 293  * statistics in the remote zone struct as well as the global cachelines
 294  * with the global counters. These could cause remote node cache line
 295  * bouncing and will have to be only done when necessary.
 296  */
 297 void refresh_cpu_vm_stats(int cpu)
 298 {
 299         struct zone *zone;
 300         int i;
 301         unsigned long flags;
 302
 303         for_each_zone(zone) {
 304                 struct per_cpu_pageset *p;
 305
 306                 if (!populated_zone(zone))
 307                         continue;
 308
 309                 p = zone_pcp(zone, cpu);
 310
 311                 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
 312                         if (p->vm_stat_diff[i]) {
 313                                 local_irq_save(flags);
 314                                 zone_page_state_add(p->vm_stat_diff[i],
 315                                         zone, i);
 316                                 p->vm_stat_diff[i] = 0;
 317 #ifdef CONFIG_NUMA
 318                                 /* 3 seconds idle till flush */
 319                                 p->expire = 3;
 320 #endif
 321                                 local_irq_restore(flags);
 322                         }
 323 #ifdef CONFIG_NUMA
 324                 /*
 325                  * Deal with draining the remote pageset of this
 326                  * processor
 327                  *
 328                  * Check if there are pages remaining in this pageset
 329                  * if not then there is nothing to expire.
 330                  */
 331                 if (!p->expire || (!p->pcp[0].count && !p->pcp[1].count))
 332                         continue;
 333
 334                 /*
 335                  * We never drain zones local to this processor.
 336                  */
 337                 if (zone_to_nid(zone) == numa_node_id()) {
 338                         p->expire = 0;
 339                         continue;
 340                 }
 341
 342                 p->expire--;
 343                 if (p->expire)
 344                         continue;
 345
 346                 if (p->pcp[0].count)
 347                         drain_zone_pages(zone, p->pcp + 0);
 348
 349                 if (p->pcp[1].count)
 350                         drain_zone_pages(zone, p->pcp + 1);
 351 #endif
 352         }
 353 }
 354
 355 static void __refresh_cpu_vm_stats(void *dummy)
 356 {
 357         refresh_cpu_vm_stats(smp_processor_id());
 358 }
 359
 360 /*
 361  * Consolidate all counters.
 362  *
 363  * Note that the result is less inaccurate but still inaccurate
 364  * if concurrent processes are allowed to run.
 365  */
 366 void refresh_vm_stats(void)
 367 {
 368         on_each_cpu(__refresh_cpu_vm_stats, NULL, 0, 1);
 369 }
 370 EXPORT_SYMBOL(refresh_vm_stats);
 371
 372 #endif
 373
 374 #ifdef CONFIG_NUMA
 375 /*
 376  * zonelist = the list of zones passed to the allocator
 377  * z        = the zone from which the allocation occurred.
 378  *
 379  * Must be called with interrupts disabled.
 380  */
 381 void zone_statistics(struct zonelist *zonelist, struct zone *z)
 382 {
 383         if (z->zone_pgdat == zonelist->zones[0]->zone_pgdat) {
 384                 __inc_zone_state(z, NUMA_HIT);
 385         } else {
 386                 __inc_zone_state(z, NUMA_MISS);
 387                 __inc_zone_state(zonelist->zones[0], NUMA_FOREIGN);
 388         }
 389         if (z->node == numa_node_id())
 390                 __inc_zone_state(z, NUMA_LOCAL);
 391         else
 392                 __inc_zone_state(z, NUMA_OTHER);
 393 }
 394 #endif
 395
 396 #ifdef CONFIG_PROC_FS
 397
 398 #include <linux/seq_file.h>
 399
 400 static void *frag_start(struct seq_file *m, loff_t *pos)
 401 {
 402         pg_data_t *pgdat;
 403         loff_t node = *pos;
 404         for (pgdat = first_online_pgdat();
 405              pgdat && node;
 406              pgdat = next_online_pgdat(pgdat))
 407                 --node;
 408
 409         return pgdat;
 410 }
 411
 412 static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
 413 {
 414         pg_data_t *pgdat = (pg_data_t *)arg;
 415
 416         (*pos)++;
 417         return next_online_pgdat(pgdat);
 418 }
 419
 420 static void frag_stop(struct seq_file *m, void *arg)
 421 {
 422 }
 423
 424 /*
 425  * This walks the free areas for each zone.
 426  */
 427 static int frag_show(struct seq_file *m, void *arg)
 428 {
 429         pg_data_t *pgdat = (pg_data_t *)arg;
 430         struct zone *zone;
 431         struct zone *node_zones = pgdat->node_zones;
 432         unsigned long flags;
 433         int order;
 434
 435         for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
 436                 if (!populated_zone(zone))
 437                         continue;
 438
 439                 spin_lock_irqsave(&zone->lock, flags);
 440                 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
 441                 for (order = 0; order < MAX_ORDER; ++order)
 442                         seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
 443                 spin_unlock_irqrestore(&zone->lock, flags);
 444                 seq_putc(m, '\n');
 445         }
 446         return 0;
 447 }
 448
 449 const struct seq_operations fragmentation_op = {
 450         .start  = frag_start,
 451         .next   = frag_next,
 452         .stop   = frag_stop,
 453         .show   = frag_show,
 454 };
 455
 456 #ifdef CONFIG_ZONE_DMA
 457 #define TEXT_FOR_DMA(xx) xx "_dma",
 458 #else
 459 #define TEXT_FOR_DMA(xx)
 460 #endif
 461
 462 #ifdef CONFIG_ZONE_DMA32
 463 #define TEXT_FOR_DMA32(xx) xx "_dma32",
 464 #else
 465 #define TEXT_FOR_DMA32(xx)
 466 #endif
 467
 468 #ifdef CONFIG_HIGHMEM
 469 #define TEXT_FOR_HIGHMEM(xx) xx "_high",
 470 #else
 471 #define TEXT_FOR_HIGHMEM(xx)
 472 #endif
 473
 474 #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
 475                                         TEXT_FOR_HIGHMEM(xx) xx "_movable",
 476
 477 static const char * const vmstat_text[] = {
 478         /* Zoned VM counters */
 479         "nr_free_pages",
 480         "nr_inactive",
 481         "nr_active",
 482         "nr_anon_pages",
 483         "nr_mapped",
 484         "nr_file_pages",
 485         "nr_dirty",
 486         "nr_writeback",
 487         "nr_slab_reclaimable",
 488         "nr_slab_unreclaimable",
 489         "nr_page_table_pages",
 490         "nr_unstable",
 491         "nr_bounce",
 492         "nr_vmscan_write",
 493
 494 #ifdef CONFIG_NUMA
 495         "numa_hit",
 496         "numa_miss",
 497         "numa_foreign",
 498         "numa_interleave",
 499         "numa_local",
 500         "numa_other",
 501 #endif
 502
 503 #ifdef CONFIG_VM_EVENT_COUNTERS
 504         "pgpgin",
 505         "pgpgout",
 506         "pswpin",
 507         "pswpout",
 508
 509         TEXTS_FOR_ZONES("pgalloc")
 510
 511         "pgfree",
 512         "pgactivate",
 513         "pgdeactivate",
 514
 515         "pgfault",
 516         "pgmajfault",
 517
 518         TEXTS_FOR_ZONES("pgrefill")
 519         TEXTS_FOR_ZONES("pgsteal")
 520         TEXTS_FOR_ZONES("pgscan_kswapd")
 521         TEXTS_FOR_ZONES("pgscan_direct")
 522
 523         "pginodesteal",
 524         "slabs_scanned",
 525         "kswapd_steal",
 526         "kswapd_inodesteal",
 527         "pageoutrun",
 528         "allocstall",
 529
 530         "pgrotated",
 531 #endif
 532 };
 533
 534 /*
 535  * Output information about zones in @pgdat.
 536  */
 537 static int zoneinfo_show(struct seq_file *m, void *arg)
 538 {
 539         pg_data_t *pgdat = arg;
 540         struct zone *zone;
 541         struct zone *node_zones = pgdat->node_zones;
 542         unsigned long flags;
 543
 544         for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) {
 545                 int i;
 546
 547                 if (!populated_zone(zone))
 548                         continue;
 549
 550                 spin_lock_irqsave(&zone->lock, flags);
 551                 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
 552                 seq_printf(m,
 553                            "\n  pages free     %lu"
 554                            "\n        min      %lu"
 555                            "\n        low      %lu"
 556                            "\n        high     %lu"
 557                            "\n        scanned  %lu (a: %lu i: %lu)"
 558                            "\n        spanned  %lu"
 559                            "\n        present  %lu",
 560                            zone_page_state(zone, NR_FREE_PAGES),
 561                            zone->pages_min,
 562                            zone->pages_low,
 563                            zone->pages_high,
 564                            zone->pages_scanned,
 565                            zone->nr_scan_active, zone->nr_scan_inactive,
 566                            zone->spanned_pages,
 567                            zone->present_pages);
 568
 569                 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
 570                         seq_printf(m, "\n    %-12s %lu", vmstat_text[i],
 571                                         zone_page_state(zone, i));
 572
 573                 seq_printf(m,
 574                            "\n        protection: (%lu",
 575                            zone->lowmem_reserve[0]);
 576                 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
 577                         seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
 578                 seq_printf(m,
 579                            ")"
 580                            "\n  pagesets");
 581                 for_each_online_cpu(i) {
 582                         struct per_cpu_pageset *pageset;
 583                         int j;
 584
 585                         pageset = zone_pcp(zone, i);
 586                         for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
 587                                 seq_printf(m,
 588                                            "\n    cpu: %i pcp: %i"
 589                                            "\n              count: %i"
 590                                            "\n              high:  %i"
 591                                            "\n              batch: %i",
 592                                            i, j,
 593                                            pageset->pcp[j].count,
 594                                            pageset->pcp[j].high,
 595                                            pageset->pcp[j].batch);
 596                         }
 597 #ifdef CONFIG_SMP
 598                         seq_printf(m, "\n  vm stats threshold: %d",
 599                                         pageset->stat_threshold);
 600 #endif
 601                 }
 602                 seq_printf(m,
 603                            "\n  all_unreclaimable: %u"
 604                            "\n  prev_priority:     %i"
 605                            "\n  start_pfn:         %lu",
 606                            zone->all_unreclaimable,
 607                            zone->prev_priority,
 608                            zone->zone_start_pfn);
 609                 spin_unlock_irqrestore(&zone->lock, flags);
 610                 seq_putc(m, '\n');
 611         }
 612         return 0;
 613 }
 614
 615 const struct seq_operations zoneinfo_op = {
 616         .start  = frag_start, /* iterate over all zones. The same as in
 617                                * fragmentation. */
 618         .next   = frag_next,
 619         .stop   = frag_stop,
 620         .show   = zoneinfo_show,
 621 };
 622
 623 static void *vmstat_start(struct seq_file *m, loff_t *pos)
 624 {
 625         unsigned long *v;
 626 #ifdef CONFIG_VM_EVENT_COUNTERS
 627         unsigned long *e;
 628 #endif
 629         int i;
 630
 631         if (*pos >= ARRAY_SIZE(vmstat_text))
 632                 return NULL;
 633
 634 #ifdef CONFIG_VM_EVENT_COUNTERS
 635         v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long)
 636                         + sizeof(struct vm_event_state), GFP_KERNEL);
 637 #else
 638         v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long),
 639                         GFP_KERNEL);
 640 #endif
 641         m->private = v;
 642         if (!v)
 643                 return ERR_PTR(-ENOMEM);
 644         for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
 645                 v[i] = global_page_state(i);
 646 #ifdef CONFIG_VM_EVENT_COUNTERS
 647         e = v + NR_VM_ZONE_STAT_ITEMS;
 648         all_vm_events(e);
 649         e[PGPGIN] /= 2;         /* sectors -> kbytes */
 650         e[PGPGOUT] /= 2;
 651 #endif
 652         return v + *pos;
 653 }
 654
 655 static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
 656 {
 657         (*pos)++;
 658         if (*pos >= ARRAY_SIZE(vmstat_text))
 659                 return NULL;
 660         return (unsigned long *)m->private + *pos;
 661 }
 662
 663 static int vmstat_show(struct seq_file *m, void *arg)
 664 {
 665         unsigned long *l = arg;
 666         unsigned long off = l - (unsigned long *)m->private;
 667
 668         seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
 669         return 0;
 670 }
 671
 672 static void vmstat_stop(struct seq_file *m, void *arg)
 673 {
 674         kfree(m->private);
 675         m->private = NULL;
 676 }
 677
 678 const struct seq_operations vmstat_op = {
 679         .start  = vmstat_start,
 680         .next   = vmstat_next,
 681         .stop   = vmstat_stop,
 682         .show   = vmstat_show,
 683 };
 684
 685 #endif /* CONFIG_PROC_FS */
 686
 687 #ifdef CONFIG_SMP
 688 static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
 689 int sysctl_stat_interval __read_mostly = HZ;
 690
 691 static void vmstat_update(struct work_struct *w)
 692 {
 693         refresh_cpu_vm_stats(smp_processor_id());
 694         schedule_delayed_work(&__get_cpu_var(vmstat_work),
 695                 sysctl_stat_interval);
 696 }
 697
 698 static void __devinit start_cpu_timer(int cpu)
 699 {
 700         struct delayed_work *vmstat_work = &per_cpu(vmstat_work, cpu);
 701
 702         INIT_DELAYED_WORK_DEFERRABLE(vmstat_work, vmstat_update);
 703         schedule_delayed_work_on(cpu, vmstat_work, HZ + cpu);
 704 }
 705
 706 /*
 707  * Use the cpu notifier to insure that the thresholds are recalculated
 708  * when necessary.
 709  */
 710 static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
 711                 unsigned long action,
 712                 void *hcpu)
 713 {
 714         long cpu = (long)hcpu;
 715
 716         switch (action) {
 717         case CPU_ONLINE:
 718         case CPU_ONLINE_FROZEN:
 719                 start_cpu_timer(cpu);
 720                 break;
 721         case CPU_DOWN_PREPARE:
 722         case CPU_DOWN_PREPARE_FROZEN:
 723                 cancel_rearming_delayed_work(&per_cpu(vmstat_work, cpu));
 724                 per_cpu(vmstat_work, cpu).work.func = NULL;
 725                 break;
 726         case CPU_DOWN_FAILED:
 727         case CPU_DOWN_FAILED_FROZEN:
 728                 start_cpu_timer(cpu);
 729                 break;
 730         case CPU_DEAD:
 731         case CPU_DEAD_FROZEN:
 732                 refresh_zone_stat_thresholds();
 733                 break;
 734         default:
 735                 break;
 736         }
 737         return NOTIFY_OK;
 738 }
 739
 740 static struct notifier_block __cpuinitdata vmstat_notifier =
 741         { &vmstat_cpuup_callback, NULL, 0 };
 742
 743 int __init setup_vmstat(void)
 744 {
 745         int cpu;
 746
 747         refresh_zone_stat_thresholds();
 748         register_cpu_notifier(&vmstat_notifier);
 749
 750         for_each_online_cpu(cpu)
 751                 start_cpu_timer(cpu);
 752         return 0;
 753 }
 754 module_init(setup_vmstat)
 755 #endif