1 /* 2 * linux/mm/vmstat.c 3 * 4 * Manages VM statistics 5 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 6 * 7 * zoned VM statistics 8 * Copyright (C) 2006 Silicon Graphics, Inc., 9 * Christoph Lameter <christoph@lameter.com> 10 */ 11 #include <linux/fs.h> 12 #include <linux/mm.h> 13 #include <linux/err.h> 14 #include <linux/module.h> 15 #include <linux/slab.h> 16 #include <linux/cpu.h> 17 #include <linux/vmstat.h> 18 #include <linux/sched.h> 19 #include <linux/math64.h> 20 #include <linux/writeback.h> 21 #include <linux/compaction.h> 22 23 #ifdef CONFIG_VM_EVENT_COUNTERS 24 DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}}; 25 EXPORT_PER_CPU_SYMBOL(vm_event_states); 26 27 static void sum_vm_events(unsigned long *ret) 28 { 29 int cpu; 30 int i; 31 32 memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long)); 33 34 for_each_online_cpu(cpu) { 35 struct vm_event_state *this = &per_cpu(vm_event_states, cpu); 36 37 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) 38 ret[i] += this->event[i]; 39 } 40 } 41 42 /* 43 * Accumulate the vm event counters across all CPUs. 44 * The result is unavoidably approximate - it can change 45 * during and after execution of this function. 46 */ 47 void all_vm_events(unsigned long *ret) 48 { 49 get_online_cpus(); 50 sum_vm_events(ret); 51 put_online_cpus(); 52 } 53 EXPORT_SYMBOL_GPL(all_vm_events); 54 55 #ifdef CONFIG_HOTPLUG 56 /* 57 * Fold the foreign cpu events into our own. 58 * 59 * This is adding to the events on one processor 60 * but keeps the global counts constant. 61 */ 62 void vm_events_fold_cpu(int cpu) 63 { 64 struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu); 65 int i; 66 67 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) { 68 count_vm_events(i, fold_state->event[i]); 69 fold_state->event[i] = 0; 70 } 71 } 72 #endif /* CONFIG_HOTPLUG */ 73 74 #endif /* CONFIG_VM_EVENT_COUNTERS */ 75 76 /* 77 * Manage combined zone based / global counters 78 * 79 * vm_stat contains the global counters 80 */ 81 atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; 82 EXPORT_SYMBOL(vm_stat); 83 84 #ifdef CONFIG_SMP 85 86 static int calculate_threshold(struct zone *zone) 87 { 88 int threshold; 89 int mem; /* memory in 128 MB units */ 90 91 /* 92 * The threshold scales with the number of processors and the amount 93 * of memory per zone. More memory means that we can defer updates for 94 * longer, more processors could lead to more contention. 95 * fls() is used to have a cheap way of logarithmic scaling. 96 * 97 * Some sample thresholds: 98 * 99 * Threshold Processors (fls) Zonesize fls(mem+1) 100 * ------------------------------------------------------------------ 101 * 8 1 1 0.9-1 GB 4 102 * 16 2 2 0.9-1 GB 4 103 * 20 2 2 1-2 GB 5 104 * 24 2 2 2-4 GB 6 105 * 28 2 2 4-8 GB 7 106 * 32 2 2 8-16 GB 8 107 * 4 2 2 <128M 1 108 * 30 4 3 2-4 GB 5 109 * 48 4 3 8-16 GB 8 110 * 32 8 4 1-2 GB 4 111 * 32 8 4 0.9-1GB 4 112 * 10 16 5 <128M 1 113 * 40 16 5 900M 4 114 * 70 64 7 2-4 GB 5 115 * 84 64 7 4-8 GB 6 116 * 108 512 9 4-8 GB 6 117 * 125 1024 10 8-16 GB 8 118 * 125 1024 10 16-32 GB 9 119 */ 120 121 mem = zone->present_pages >> (27 - PAGE_SHIFT); 122 123 threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem)); 124 125 /* 126 * Maximum threshold is 125 127 */ 128 threshold = min(125, threshold); 129 130 return threshold; 131 } 132 133 /* 134 * Refresh the thresholds for each zone. 135 */ 136 static void refresh_zone_stat_thresholds(void) 137 { 138 struct zone *zone; 139 int cpu; 140 int threshold; 141 142 for_each_populated_zone(zone) { 143 unsigned long max_drift, tolerate_drift; 144 145 threshold = calculate_threshold(zone); 146 147 for_each_online_cpu(cpu) 148 per_cpu_ptr(zone->pageset, cpu)->stat_threshold 149 = threshold; 150 151 /* 152 * Only set percpu_drift_mark if there is a danger that 153 * NR_FREE_PAGES reports the low watermark is ok when in fact 154 * the min watermark could be breached by an allocation 155 */ 156 tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone); 157 max_drift = num_online_cpus() * threshold; 158 if (max_drift > tolerate_drift) 159 zone->percpu_drift_mark = high_wmark_pages(zone) + 160 max_drift; 161 } 162 } 163 164 /* 165 * For use when we know that interrupts are disabled. 166 */ 167 void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 168 int delta) 169 { 170 struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); 171 172 s8 *p = pcp->vm_stat_diff + item; 173 long x; 174 175 x = delta + *p; 176 177 if (unlikely(x > pcp->stat_threshold || x < -pcp->stat_threshold)) { 178 zone_page_state_add(x, zone, item); 179 x = 0; 180 } 181 *p = x; 182 } 183 EXPORT_SYMBOL(__mod_zone_page_state); 184 185 /* 186 * For an unknown interrupt state 187 */ 188 void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 189 int delta) 190 { 191 unsigned long flags; 192 193 local_irq_save(flags); 194 __mod_zone_page_state(zone, item, delta); 195 local_irq_restore(flags); 196 } 197 EXPORT_SYMBOL(mod_zone_page_state); 198 199 /* 200 * Optimized increment and decrement functions. 201 * 202 * These are only for a single page and therefore can take a struct page * 203 * argument instead of struct zone *. This allows the inclusion of the code 204 * generated for page_zone(page) into the optimized functions. 205 * 206 * No overflow check is necessary and therefore the differential can be 207 * incremented or decremented in place which may allow the compilers to 208 * generate better code. 209 * The increment or decrement is known and therefore one boundary check can 210 * be omitted. 211 * 212 * NOTE: These functions are very performance sensitive. Change only 213 * with care. 214 * 215 * Some processors have inc/dec instructions that are atomic vs an interrupt. 216 * However, the code must first determine the differential location in a zone 217 * based on the processor number and then inc/dec the counter. There is no 218 * guarantee without disabling preemption that the processor will not change 219 * in between and therefore the atomicity vs. interrupt cannot be exploited 220 * in a useful way here. 221 */ 222 void __inc_zone_state(struct zone *zone, enum zone_stat_item item) 223 { 224 struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); 225 s8 *p = pcp->vm_stat_diff + item; 226 227 (*p)++; 228 229 if (unlikely(*p > pcp->stat_threshold)) { 230 int overstep = pcp->stat_threshold / 2; 231 232 zone_page_state_add(*p + overstep, zone, item); 233 *p = -overstep; 234 } 235 } 236 237 void __inc_zone_page_state(struct page *page, enum zone_stat_item item) 238 { 239 __inc_zone_state(page_zone(page), item); 240 } 241 EXPORT_SYMBOL(__inc_zone_page_state); 242 243 void __dec_zone_state(struct zone *zone, enum zone_stat_item item) 244 { 245 struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); 246 s8 *p = pcp->vm_stat_diff + item; 247 248 (*p)--; 249 250 if (unlikely(*p < - pcp->stat_threshold)) { 251 int overstep = pcp->stat_threshold / 2; 252 253 zone_page_state_add(*p - overstep, zone, item); 254 *p = overstep; 255 } 256 } 257 258 void __dec_zone_page_state(struct page *page, enum zone_stat_item item) 259 { 260 __dec_zone_state(page_zone(page), item); 261 } 262 EXPORT_SYMBOL(__dec_zone_page_state); 263 264 void inc_zone_state(struct zone *zone, enum zone_stat_item item) 265 { 266 unsigned long flags; 267 268 local_irq_save(flags); 269 __inc_zone_state(zone, item); 270 local_irq_restore(flags); 271 } 272 273 void inc_zone_page_state(struct page *page, enum zone_stat_item item) 274 { 275 unsigned long flags; 276 struct zone *zone; 277 278 zone = page_zone(page); 279 local_irq_save(flags); 280 __inc_zone_state(zone, item); 281 local_irq_restore(flags); 282 } 283 EXPORT_SYMBOL(inc_zone_page_state); 284 285 void dec_zone_page_state(struct page *page, enum zone_stat_item item) 286 { 287 unsigned long flags; 288 289 local_irq_save(flags); 290 __dec_zone_page_state(page, item); 291 local_irq_restore(flags); 292 } 293 EXPORT_SYMBOL(dec_zone_page_state); 294 295 /* 296 * Update the zone counters for one cpu. 297 * 298 * The cpu specified must be either the current cpu or a processor that 299 * is not online. If it is the current cpu then the execution thread must 300 * be pinned to the current cpu. 301 * 302 * Note that refresh_cpu_vm_stats strives to only access 303 * node local memory. The per cpu pagesets on remote zones are placed 304 * in the memory local to the processor using that pageset. So the 305 * loop over all zones will access a series of cachelines local to 306 * the processor. 307 * 308 * The call to zone_page_state_add updates the cachelines with the 309 * statistics in the remote zone struct as well as the global cachelines 310 * with the global counters. These could cause remote node cache line 311 * bouncing and will have to be only done when necessary. 312 */ 313 void refresh_cpu_vm_stats(int cpu) 314 { 315 struct zone *zone; 316 int i; 317 int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; 318 319 for_each_populated_zone(zone) { 320 struct per_cpu_pageset *p; 321 322 p = per_cpu_ptr(zone->pageset, cpu); 323 324 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 325 if (p->vm_stat_diff[i]) { 326 unsigned long flags; 327 int v; 328 329 local_irq_save(flags); 330 v = p->vm_stat_diff[i]; 331 p->vm_stat_diff[i] = 0; 332 local_irq_restore(flags); 333 atomic_long_add(v, &zone->vm_stat[i]); 334 global_diff[i] += v; 335 #ifdef CONFIG_NUMA 336 /* 3 seconds idle till flush */ 337 p->expire = 3; 338 #endif 339 } 340 cond_resched(); 341 #ifdef CONFIG_NUMA 342 /* 343 * Deal with draining the remote pageset of this 344 * processor 345 * 346 * Check if there are pages remaining in this pageset 347 * if not then there is nothing to expire. 348 */ 349 if (!p->expire || !p->pcp.count) 350 continue; 351 352 /* 353 * We never drain zones local to this processor. 354 */ 355 if (zone_to_nid(zone) == numa_node_id()) { 356 p->expire = 0; 357 continue; 358 } 359 360 p->expire--; 361 if (p->expire) 362 continue; 363 364 if (p->pcp.count) 365 drain_zone_pages(zone, &p->pcp); 366 #endif 367 } 368 369 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 370 if (global_diff[i]) 371 atomic_long_add(global_diff[i], &vm_stat[i]); 372 } 373 374 #endif 375 376 #ifdef CONFIG_NUMA 377 /* 378 * zonelist = the list of zones passed to the allocator 379 * z = the zone from which the allocation occurred. 380 * 381 * Must be called with interrupts disabled. 382 */ 383 void zone_statistics(struct zone *preferred_zone, struct zone *z) 384 { 385 if (z->zone_pgdat == preferred_zone->zone_pgdat) { 386 __inc_zone_state(z, NUMA_HIT); 387 } else { 388 __inc_zone_state(z, NUMA_MISS); 389 __inc_zone_state(preferred_zone, NUMA_FOREIGN); 390 } 391 if (z->node == numa_node_id()) 392 __inc_zone_state(z, NUMA_LOCAL); 393 else 394 __inc_zone_state(z, NUMA_OTHER); 395 } 396 #endif 397 398 #ifdef CONFIG_COMPACTION 399 400 struct contig_page_info { 401 unsigned long free_pages; 402 unsigned long free_blocks_total; 403 unsigned long free_blocks_suitable; 404 }; 405 406 /* 407 * Calculate the number of free pages in a zone, how many contiguous 408 * pages are free and how many are large enough to satisfy an allocation of 409 * the target size. Note that this function makes no attempt to estimate 410 * how many suitable free blocks there *might* be if MOVABLE pages were 411 * migrated. Calculating that is possible, but expensive and can be 412 * figured out from userspace 413 */ 414 static void fill_contig_page_info(struct zone *zone, 415 unsigned int suitable_order, 416 struct contig_page_info *info) 417 { 418 unsigned int order; 419 420 info->free_pages = 0; 421 info->free_blocks_total = 0; 422 info->free_blocks_suitable = 0; 423 424 for (order = 0; order < MAX_ORDER; order++) { 425 unsigned long blocks; 426 427 /* Count number of free blocks */ 428 blocks = zone->free_area[order].nr_free; 429 info->free_blocks_total += blocks; 430 431 /* Count free base pages */ 432 info->free_pages += blocks << order; 433 434 /* Count the suitable free blocks */ 435 if (order >= suitable_order) 436 info->free_blocks_suitable += blocks << 437 (order - suitable_order); 438 } 439 } 440 441 /* 442 * A fragmentation index only makes sense if an allocation of a requested 443 * size would fail. If that is true, the fragmentation index indicates 444 * whether external fragmentation or a lack of memory was the problem. 445 * The value can be used to determine if page reclaim or compaction 446 * should be used 447 */ 448 static int __fragmentation_index(unsigned int order, struct contig_page_info *info) 449 { 450 unsigned long requested = 1UL << order; 451 452 if (!info->free_blocks_total) 453 return 0; 454 455 /* Fragmentation index only makes sense when a request would fail */ 456 if (info->free_blocks_suitable) 457 return -1000; 458 459 /* 460 * Index is between 0 and 1 so return within 3 decimal places 461 * 462 * 0 => allocation would fail due to lack of memory 463 * 1 => allocation would fail due to fragmentation 464 */ 465 return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total); 466 } 467 468 /* Same as __fragmentation index but allocs contig_page_info on stack */ 469 int fragmentation_index(struct zone *zone, unsigned int order) 470 { 471 struct contig_page_info info; 472 473 fill_contig_page_info(zone, order, &info); 474 return __fragmentation_index(order, &info); 475 } 476 #endif 477 478 #if defined(CONFIG_PROC_FS) || defined(CONFIG_COMPACTION) 479 #include <linux/proc_fs.h> 480 #include <linux/seq_file.h> 481 482 static char * const migratetype_names[MIGRATE_TYPES] = { 483 "Unmovable", 484 "Reclaimable", 485 "Movable", 486 "Reserve", 487 "Isolate", 488 }; 489 490 static void *frag_start(struct seq_file *m, loff_t *pos) 491 { 492 pg_data_t *pgdat; 493 loff_t node = *pos; 494 for (pgdat = first_online_pgdat(); 495 pgdat && node; 496 pgdat = next_online_pgdat(pgdat)) 497 --node; 498 499 return pgdat; 500 } 501 502 static void *frag_next(struct seq_file *m, void *arg, loff_t *pos) 503 { 504 pg_data_t *pgdat = (pg_data_t *)arg; 505 506 (*pos)++; 507 return next_online_pgdat(pgdat); 508 } 509 510 static void frag_stop(struct seq_file *m, void *arg) 511 { 512 } 513 514 /* Walk all the zones in a node and print using a callback */ 515 static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat, 516 void (*print)(struct seq_file *m, pg_data_t *, struct zone *)) 517 { 518 struct zone *zone; 519 struct zone *node_zones = pgdat->node_zones; 520 unsigned long flags; 521 522 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { 523 if (!populated_zone(zone)) 524 continue; 525 526 spin_lock_irqsave(&zone->lock, flags); 527 print(m, pgdat, zone); 528 spin_unlock_irqrestore(&zone->lock, flags); 529 } 530 } 531 #endif 532 533 #ifdef CONFIG_PROC_FS 534 static void frag_show_print(struct seq_file *m, pg_data_t *pgdat, 535 struct zone *zone) 536 { 537 int order; 538 539 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); 540 for (order = 0; order < MAX_ORDER; ++order) 541 seq_printf(m, "%6lu ", zone->free_area[order].nr_free); 542 seq_putc(m, '\n'); 543 } 544 545 /* 546 * This walks the free areas for each zone. 547 */ 548 static int frag_show(struct seq_file *m, void *arg) 549 { 550 pg_data_t *pgdat = (pg_data_t *)arg; 551 walk_zones_in_node(m, pgdat, frag_show_print); 552 return 0; 553 } 554 555 static void pagetypeinfo_showfree_print(struct seq_file *m, 556 pg_data_t *pgdat, struct zone *zone) 557 { 558 int order, mtype; 559 560 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) { 561 seq_printf(m, "Node %4d, zone %8s, type %12s ", 562 pgdat->node_id, 563 zone->name, 564 migratetype_names[mtype]); 565 for (order = 0; order < MAX_ORDER; ++order) { 566 unsigned long freecount = 0; 567 struct free_area *area; 568 struct list_head *curr; 569 570 area = &(zone->free_area[order]); 571 572 list_for_each(curr, &area->free_list[mtype]) 573 freecount++; 574 seq_printf(m, "%6lu ", freecount); 575 } 576 seq_putc(m, '\n'); 577 } 578 } 579 580 /* Print out the free pages at each order for each migatetype */ 581 static int pagetypeinfo_showfree(struct seq_file *m, void *arg) 582 { 583 int order; 584 pg_data_t *pgdat = (pg_data_t *)arg; 585 586 /* Print header */ 587 seq_printf(m, "%-43s ", "Free pages count per migrate type at order"); 588 for (order = 0; order < MAX_ORDER; ++order) 589 seq_printf(m, "%6d ", order); 590 seq_putc(m, '\n'); 591 592 walk_zones_in_node(m, pgdat, pagetypeinfo_showfree_print); 593 594 return 0; 595 } 596 597 static void pagetypeinfo_showblockcount_print(struct seq_file *m, 598 pg_data_t *pgdat, struct zone *zone) 599 { 600 int mtype; 601 unsigned long pfn; 602 unsigned long start_pfn = zone->zone_start_pfn; 603 unsigned long end_pfn = start_pfn + zone->spanned_pages; 604 unsigned long count[MIGRATE_TYPES] = { 0, }; 605 606 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 607 struct page *page; 608 609 if (!pfn_valid(pfn)) 610 continue; 611 612 page = pfn_to_page(pfn); 613 614 /* Watch for unexpected holes punched in the memmap */ 615 if (!memmap_valid_within(pfn, page, zone)) 616 continue; 617 618 mtype = get_pageblock_migratetype(page); 619 620 if (mtype < MIGRATE_TYPES) 621 count[mtype]++; 622 } 623 624 /* Print counts */ 625 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); 626 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) 627 seq_printf(m, "%12lu ", count[mtype]); 628 seq_putc(m, '\n'); 629 } 630 631 /* Print out the free pages at each order for each migratetype */ 632 static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg) 633 { 634 int mtype; 635 pg_data_t *pgdat = (pg_data_t *)arg; 636 637 seq_printf(m, "\n%-23s", "Number of blocks type "); 638 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) 639 seq_printf(m, "%12s ", migratetype_names[mtype]); 640 seq_putc(m, '\n'); 641 walk_zones_in_node(m, pgdat, pagetypeinfo_showblockcount_print); 642 643 return 0; 644 } 645 646 /* 647 * This prints out statistics in relation to grouping pages by mobility. 648 * It is expensive to collect so do not constantly read the file. 649 */ 650 static int pagetypeinfo_show(struct seq_file *m, void *arg) 651 { 652 pg_data_t *pgdat = (pg_data_t *)arg; 653 654 /* check memoryless node */ 655 if (!node_state(pgdat->node_id, N_HIGH_MEMORY)) 656 return 0; 657 658 seq_printf(m, "Page block order: %d\n", pageblock_order); 659 seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages); 660 seq_putc(m, '\n'); 661 pagetypeinfo_showfree(m, pgdat); 662 pagetypeinfo_showblockcount(m, pgdat); 663 664 return 0; 665 } 666 667 static const struct seq_operations fragmentation_op = { 668 .start = frag_start, 669 .next = frag_next, 670 .stop = frag_stop, 671 .show = frag_show, 672 }; 673 674 static int fragmentation_open(struct inode *inode, struct file *file) 675 { 676 return seq_open(file, &fragmentation_op); 677 } 678 679 static const struct file_operations fragmentation_file_operations = { 680 .open = fragmentation_open, 681 .read = seq_read, 682 .llseek = seq_lseek, 683 .release = seq_release, 684 }; 685 686 static const struct seq_operations pagetypeinfo_op = { 687 .start = frag_start, 688 .next = frag_next, 689 .stop = frag_stop, 690 .show = pagetypeinfo_show, 691 }; 692 693 static int pagetypeinfo_open(struct inode *inode, struct file *file) 694 { 695 return seq_open(file, &pagetypeinfo_op); 696 } 697 698 static const struct file_operations pagetypeinfo_file_ops = { 699 .open = pagetypeinfo_open, 700 .read = seq_read, 701 .llseek = seq_lseek, 702 .release = seq_release, 703 }; 704 705 #ifdef CONFIG_ZONE_DMA 706 #define TEXT_FOR_DMA(xx) xx "_dma", 707 #else 708 #define TEXT_FOR_DMA(xx) 709 #endif 710 711 #ifdef CONFIG_ZONE_DMA32 712 #define TEXT_FOR_DMA32(xx) xx "_dma32", 713 #else 714 #define TEXT_FOR_DMA32(xx) 715 #endif 716 717 #ifdef CONFIG_HIGHMEM 718 #define TEXT_FOR_HIGHMEM(xx) xx "_high", 719 #else 720 #define TEXT_FOR_HIGHMEM(xx) 721 #endif 722 723 #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \ 724 TEXT_FOR_HIGHMEM(xx) xx "_movable", 725 726 static const char * const vmstat_text[] = { 727 /* Zoned VM counters */ 728 "nr_free_pages", 729 "nr_inactive_anon", 730 "nr_active_anon", 731 "nr_inactive_file", 732 "nr_active_file", 733 "nr_unevictable", 734 "nr_mlock", 735 "nr_anon_pages", 736 "nr_mapped", 737 "nr_file_pages", 738 "nr_dirty", 739 "nr_writeback", 740 "nr_slab_reclaimable", 741 "nr_slab_unreclaimable", 742 "nr_page_table_pages", 743 "nr_kernel_stack", 744 "nr_unstable", 745 "nr_bounce", 746 "nr_vmscan_write", 747 "nr_writeback_temp", 748 "nr_isolated_anon", 749 "nr_isolated_file", 750 "nr_shmem", 751 "nr_dirtied", 752 "nr_written", 753 754 #ifdef CONFIG_NUMA 755 "numa_hit", 756 "numa_miss", 757 "numa_foreign", 758 "numa_interleave", 759 "numa_local", 760 "numa_other", 761 #endif 762 "nr_dirty_threshold", 763 "nr_dirty_background_threshold", 764 765 #ifdef CONFIG_VM_EVENT_COUNTERS 766 "pgpgin", 767 "pgpgout", 768 "pswpin", 769 "pswpout", 770 771 TEXTS_FOR_ZONES("pgalloc") 772 773 "pgfree", 774 "pgactivate", 775 "pgdeactivate", 776 777 "pgfault", 778 "pgmajfault", 779 780 TEXTS_FOR_ZONES("pgrefill") 781 TEXTS_FOR_ZONES("pgsteal") 782 TEXTS_FOR_ZONES("pgscan_kswapd") 783 TEXTS_FOR_ZONES("pgscan_direct") 784 785 #ifdef CONFIG_NUMA 786 "zone_reclaim_failed", 787 #endif 788 "pginodesteal", 789 "slabs_scanned", 790 "kswapd_steal", 791 "kswapd_inodesteal", 792 "kswapd_low_wmark_hit_quickly", 793 "kswapd_high_wmark_hit_quickly", 794 "kswapd_skip_congestion_wait", 795 "pageoutrun", 796 "allocstall", 797 798 "pgrotated", 799 800 #ifdef CONFIG_COMPACTION 801 "compact_blocks_moved", 802 "compact_pages_moved", 803 "compact_pagemigrate_failed", 804 "compact_stall", 805 "compact_fail", 806 "compact_success", 807 #endif 808 809 #ifdef CONFIG_HUGETLB_PAGE 810 "htlb_buddy_alloc_success", 811 "htlb_buddy_alloc_fail", 812 #endif 813 "unevictable_pgs_culled", 814 "unevictable_pgs_scanned", 815 "unevictable_pgs_rescued", 816 "unevictable_pgs_mlocked", 817 "unevictable_pgs_munlocked", 818 "unevictable_pgs_cleared", 819 "unevictable_pgs_stranded", 820 "unevictable_pgs_mlockfreed", 821 #endif 822 }; 823 824 static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, 825 struct zone *zone) 826 { 827 int i; 828 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name); 829 seq_printf(m, 830 "\n pages free %lu" 831 "\n min %lu" 832 "\n low %lu" 833 "\n high %lu" 834 "\n scanned %lu" 835 "\n spanned %lu" 836 "\n present %lu", 837 zone_nr_free_pages(zone), 838 min_wmark_pages(zone), 839 low_wmark_pages(zone), 840 high_wmark_pages(zone), 841 zone->pages_scanned, 842 zone->spanned_pages, 843 zone->present_pages); 844 845 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 846 seq_printf(m, "\n %-12s %lu", vmstat_text[i], 847 zone_page_state(zone, i)); 848 849 seq_printf(m, 850 "\n protection: (%lu", 851 zone->lowmem_reserve[0]); 852 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++) 853 seq_printf(m, ", %lu", zone->lowmem_reserve[i]); 854 seq_printf(m, 855 ")" 856 "\n pagesets"); 857 for_each_online_cpu(i) { 858 struct per_cpu_pageset *pageset; 859 860 pageset = per_cpu_ptr(zone->pageset, i); 861 seq_printf(m, 862 "\n cpu: %i" 863 "\n count: %i" 864 "\n high: %i" 865 "\n batch: %i", 866 i, 867 pageset->pcp.count, 868 pageset->pcp.high, 869 pageset->pcp.batch); 870 #ifdef CONFIG_SMP 871 seq_printf(m, "\n vm stats threshold: %d", 872 pageset->stat_threshold); 873 #endif 874 } 875 seq_printf(m, 876 "\n all_unreclaimable: %u" 877 "\n start_pfn: %lu" 878 "\n inactive_ratio: %u", 879 zone->all_unreclaimable, 880 zone->zone_start_pfn, 881 zone->inactive_ratio); 882 seq_putc(m, '\n'); 883 } 884 885 /* 886 * Output information about zones in @pgdat. 887 */ 888 static int zoneinfo_show(struct seq_file *m, void *arg) 889 { 890 pg_data_t *pgdat = (pg_data_t *)arg; 891 walk_zones_in_node(m, pgdat, zoneinfo_show_print); 892 return 0; 893 } 894 895 static const struct seq_operations zoneinfo_op = { 896 .start = frag_start, /* iterate over all zones. The same as in 897 * fragmentation. */ 898 .next = frag_next, 899 .stop = frag_stop, 900 .show = zoneinfo_show, 901 }; 902 903 static int zoneinfo_open(struct inode *inode, struct file *file) 904 { 905 return seq_open(file, &zoneinfo_op); 906 } 907 908 static const struct file_operations proc_zoneinfo_file_operations = { 909 .open = zoneinfo_open, 910 .read = seq_read, 911 .llseek = seq_lseek, 912 .release = seq_release, 913 }; 914 915 enum writeback_stat_item { 916 NR_DIRTY_THRESHOLD, 917 NR_DIRTY_BG_THRESHOLD, 918 NR_VM_WRITEBACK_STAT_ITEMS, 919 }; 920 921 static void *vmstat_start(struct seq_file *m, loff_t *pos) 922 { 923 unsigned long *v; 924 int i, stat_items_size; 925 926 if (*pos >= ARRAY_SIZE(vmstat_text)) 927 return NULL; 928 stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) + 929 NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long); 930 931 #ifdef CONFIG_VM_EVENT_COUNTERS 932 stat_items_size += sizeof(struct vm_event_state); 933 #endif 934 935 v = kmalloc(stat_items_size, GFP_KERNEL); 936 m->private = v; 937 if (!v) 938 return ERR_PTR(-ENOMEM); 939 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 940 v[i] = global_page_state(i); 941 v += NR_VM_ZONE_STAT_ITEMS; 942 943 global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD, 944 v + NR_DIRTY_THRESHOLD); 945 v += NR_VM_WRITEBACK_STAT_ITEMS; 946 947 #ifdef CONFIG_VM_EVENT_COUNTERS 948 all_vm_events(v); 949 v[PGPGIN] /= 2; /* sectors -> kbytes */ 950 v[PGPGOUT] /= 2; 951 #endif 952 return (unsigned long *)m->private + *pos; 953 } 954 955 static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos) 956 { 957 (*pos)++; 958 if (*pos >= ARRAY_SIZE(vmstat_text)) 959 return NULL; 960 return (unsigned long *)m->private + *pos; 961 } 962 963 static int vmstat_show(struct seq_file *m, void *arg) 964 { 965 unsigned long *l = arg; 966 unsigned long off = l - (unsigned long *)m->private; 967 968 seq_printf(m, "%s %lu\n", vmstat_text[off], *l); 969 return 0; 970 } 971 972 static void vmstat_stop(struct seq_file *m, void *arg) 973 { 974 kfree(m->private); 975 m->private = NULL; 976 } 977 978 static const struct seq_operations vmstat_op = { 979 .start = vmstat_start, 980 .next = vmstat_next, 981 .stop = vmstat_stop, 982 .show = vmstat_show, 983 }; 984 985 static int vmstat_open(struct inode *inode, struct file *file) 986 { 987 return seq_open(file, &vmstat_op); 988 } 989 990 static const struct file_operations proc_vmstat_file_operations = { 991 .open = vmstat_open, 992 .read = seq_read, 993 .llseek = seq_lseek, 994 .release = seq_release, 995 }; 996 #endif /* CONFIG_PROC_FS */ 997 998 #ifdef CONFIG_SMP 999 static DEFINE_PER_CPU(struct delayed_work, vmstat_work); 1000 int sysctl_stat_interval __read_mostly = HZ; 1001 1002 static void vmstat_update(struct work_struct *w) 1003 { 1004 refresh_cpu_vm_stats(smp_processor_id()); 1005 schedule_delayed_work(&__get_cpu_var(vmstat_work), 1006 round_jiffies_relative(sysctl_stat_interval)); 1007 } 1008 1009 static void __cpuinit start_cpu_timer(int cpu) 1010 { 1011 struct delayed_work *work = &per_cpu(vmstat_work, cpu); 1012 1013 INIT_DELAYED_WORK_DEFERRABLE(work, vmstat_update); 1014 schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu)); 1015 } 1016 1017 /* 1018 * Use the cpu notifier to insure that the thresholds are recalculated 1019 * when necessary. 1020 */ 1021 static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb, 1022 unsigned long action, 1023 void *hcpu) 1024 { 1025 long cpu = (long)hcpu; 1026 1027 switch (action) { 1028 case CPU_ONLINE: 1029 case CPU_ONLINE_FROZEN: 1030 refresh_zone_stat_thresholds(); 1031 start_cpu_timer(cpu); 1032 node_set_state(cpu_to_node(cpu), N_CPU); 1033 break; 1034 case CPU_DOWN_PREPARE: 1035 case CPU_DOWN_PREPARE_FROZEN: 1036 cancel_rearming_delayed_work(&per_cpu(vmstat_work, cpu)); 1037 per_cpu(vmstat_work, cpu).work.func = NULL; 1038 break; 1039 case CPU_DOWN_FAILED: 1040 case CPU_DOWN_FAILED_FROZEN: 1041 start_cpu_timer(cpu); 1042 break; 1043 case CPU_DEAD: 1044 case CPU_DEAD_FROZEN: 1045 refresh_zone_stat_thresholds(); 1046 break; 1047 default: 1048 break; 1049 } 1050 return NOTIFY_OK; 1051 } 1052 1053 static struct notifier_block __cpuinitdata vmstat_notifier = 1054 { &vmstat_cpuup_callback, NULL, 0 }; 1055 #endif 1056 1057 static int __init setup_vmstat(void) 1058 { 1059 #ifdef CONFIG_SMP 1060 int cpu; 1061 1062 refresh_zone_stat_thresholds(); 1063 register_cpu_notifier(&vmstat_notifier); 1064 1065 for_each_online_cpu(cpu) 1066 start_cpu_timer(cpu); 1067 #endif 1068 #ifdef CONFIG_PROC_FS 1069 proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); 1070 proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops); 1071 proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations); 1072 proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations); 1073 #endif 1074 return 0; 1075 } 1076 module_init(setup_vmstat) 1077 1078 #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION) 1079 #include <linux/debugfs.h> 1080 1081 static struct dentry *extfrag_debug_root; 1082 1083 /* 1084 * Return an index indicating how much of the available free memory is 1085 * unusable for an allocation of the requested size. 1086 */ 1087 static int unusable_free_index(unsigned int order, 1088 struct contig_page_info *info) 1089 { 1090 /* No free memory is interpreted as all free memory is unusable */ 1091 if (info->free_pages == 0) 1092 return 1000; 1093 1094 /* 1095 * Index should be a value between 0 and 1. Return a value to 3 1096 * decimal places. 1097 * 1098 * 0 => no fragmentation 1099 * 1 => high fragmentation 1100 */ 1101 return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages); 1102 1103 } 1104 1105 static void unusable_show_print(struct seq_file *m, 1106 pg_data_t *pgdat, struct zone *zone) 1107 { 1108 unsigned int order; 1109 int index; 1110 struct contig_page_info info; 1111 1112 seq_printf(m, "Node %d, zone %8s ", 1113 pgdat->node_id, 1114 zone->name); 1115 for (order = 0; order < MAX_ORDER; ++order) { 1116 fill_contig_page_info(zone, order, &info); 1117 index = unusable_free_index(order, &info); 1118 seq_printf(m, "%d.%03d ", index / 1000, index % 1000); 1119 } 1120 1121 seq_putc(m, '\n'); 1122 } 1123 1124 /* 1125 * Display unusable free space index 1126 * 1127 * The unusable free space index measures how much of the available free 1128 * memory cannot be used to satisfy an allocation of a given size and is a 1129 * value between 0 and 1. The higher the value, the more of free memory is 1130 * unusable and by implication, the worse the external fragmentation is. This 1131 * can be expressed as a percentage by multiplying by 100. 1132 */ 1133 static int unusable_show(struct seq_file *m, void *arg) 1134 { 1135 pg_data_t *pgdat = (pg_data_t *)arg; 1136 1137 /* check memoryless node */ 1138 if (!node_state(pgdat->node_id, N_HIGH_MEMORY)) 1139 return 0; 1140 1141 walk_zones_in_node(m, pgdat, unusable_show_print); 1142 1143 return 0; 1144 } 1145 1146 static const struct seq_operations unusable_op = { 1147 .start = frag_start, 1148 .next = frag_next, 1149 .stop = frag_stop, 1150 .show = unusable_show, 1151 }; 1152 1153 static int unusable_open(struct inode *inode, struct file *file) 1154 { 1155 return seq_open(file, &unusable_op); 1156 } 1157 1158 static const struct file_operations unusable_file_ops = { 1159 .open = unusable_open, 1160 .read = seq_read, 1161 .llseek = seq_lseek, 1162 .release = seq_release, 1163 }; 1164 1165 static void extfrag_show_print(struct seq_file *m, 1166 pg_data_t *pgdat, struct zone *zone) 1167 { 1168 unsigned int order; 1169 int index; 1170 1171 /* Alloc on stack as interrupts are disabled for zone walk */ 1172 struct contig_page_info info; 1173 1174 seq_printf(m, "Node %d, zone %8s ", 1175 pgdat->node_id, 1176 zone->name); 1177 for (order = 0; order < MAX_ORDER; ++order) { 1178 fill_contig_page_info(zone, order, &info); 1179 index = __fragmentation_index(order, &info); 1180 seq_printf(m, "%d.%03d ", index / 1000, index % 1000); 1181 } 1182 1183 seq_putc(m, '\n'); 1184 } 1185 1186 /* 1187 * Display fragmentation index for orders that allocations would fail for 1188 */ 1189 static int extfrag_show(struct seq_file *m, void *arg) 1190 { 1191 pg_data_t *pgdat = (pg_data_t *)arg; 1192 1193 walk_zones_in_node(m, pgdat, extfrag_show_print); 1194 1195 return 0; 1196 } 1197 1198 static const struct seq_operations extfrag_op = { 1199 .start = frag_start, 1200 .next = frag_next, 1201 .stop = frag_stop, 1202 .show = extfrag_show, 1203 }; 1204 1205 static int extfrag_open(struct inode *inode, struct file *file) 1206 { 1207 return seq_open(file, &extfrag_op); 1208 } 1209 1210 static const struct file_operations extfrag_file_ops = { 1211 .open = extfrag_open, 1212 .read = seq_read, 1213 .llseek = seq_lseek, 1214 .release = seq_release, 1215 }; 1216 1217 static int __init extfrag_debug_init(void) 1218 { 1219 extfrag_debug_root = debugfs_create_dir("extfrag", NULL); 1220 if (!extfrag_debug_root) 1221 return -ENOMEM; 1222 1223 if (!debugfs_create_file("unusable_index", 0444, 1224 extfrag_debug_root, NULL, &unusable_file_ops)) 1225 return -ENOMEM; 1226 1227 if (!debugfs_create_file("extfrag_index", 0444, 1228 extfrag_debug_root, NULL, &extfrag_file_ops)) 1229 return -ENOMEM; 1230 1231 return 0; 1232 } 1233 1234 module_init(extfrag_debug_init); 1235 #endif 1236