1 #include <linux/mm.h> 2 #include <linux/vmacache.h> 3 #include <linux/hugetlb.h> 4 #include <linux/huge_mm.h> 5 #include <linux/mount.h> 6 #include <linux/seq_file.h> 7 #include <linux/highmem.h> 8 #include <linux/ptrace.h> 9 #include <linux/slab.h> 10 #include <linux/pagemap.h> 11 #include <linux/mempolicy.h> 12 #include <linux/rmap.h> 13 #include <linux/swap.h> 14 #include <linux/swapops.h> 15 #include <linux/mmu_notifier.h> 16 #include <linux/page_idle.h> 17 18 #include <asm/elf.h> 19 #include <asm/uaccess.h> 20 #include <asm/tlbflush.h> 21 #include "internal.h" 22 23 void task_mem(struct seq_file *m, struct mm_struct *mm) 24 { 25 unsigned long data, text, lib, swap, ptes, pmds; 26 unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; 27 28 /* 29 * Note: to minimize their overhead, mm maintains hiwater_vm and 30 * hiwater_rss only when about to *lower* total_vm or rss. Any 31 * collector of these hiwater stats must therefore get total_vm 32 * and rss too, which will usually be the higher. Barriers? not 33 * worth the effort, such snapshots can always be inconsistent. 34 */ 35 hiwater_vm = total_vm = mm->total_vm; 36 if (hiwater_vm < mm->hiwater_vm) 37 hiwater_vm = mm->hiwater_vm; 38 hiwater_rss = total_rss = get_mm_rss(mm); 39 if (hiwater_rss < mm->hiwater_rss) 40 hiwater_rss = mm->hiwater_rss; 41 42 data = mm->total_vm - mm->shared_vm - mm->stack_vm; 43 text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; 44 lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; 45 swap = get_mm_counter(mm, MM_SWAPENTS); 46 ptes = PTRS_PER_PTE * sizeof(pte_t) * atomic_long_read(&mm->nr_ptes); 47 pmds = PTRS_PER_PMD * sizeof(pmd_t) * mm_nr_pmds(mm); 48 seq_printf(m, 49 "VmPeak:\t%8lu kB\n" 50 "VmSize:\t%8lu kB\n" 51 "VmLck:\t%8lu kB\n" 52 "VmPin:\t%8lu kB\n" 53 "VmHWM:\t%8lu kB\n" 54 "VmRSS:\t%8lu kB\n" 55 "VmData:\t%8lu kB\n" 56 "VmStk:\t%8lu kB\n" 57 "VmExe:\t%8lu kB\n" 58 "VmLib:\t%8lu kB\n" 59 "VmPTE:\t%8lu kB\n" 60 "VmPMD:\t%8lu kB\n" 61 "VmSwap:\t%8lu kB\n", 62 hiwater_vm << (PAGE_SHIFT-10), 63 total_vm << (PAGE_SHIFT-10), 64 mm->locked_vm << (PAGE_SHIFT-10), 65 mm->pinned_vm << (PAGE_SHIFT-10), 66 hiwater_rss << (PAGE_SHIFT-10), 67 total_rss << (PAGE_SHIFT-10), 68 data << (PAGE_SHIFT-10), 69 mm->stack_vm << (PAGE_SHIFT-10), text, lib, 70 ptes >> 10, 71 pmds >> 10, 72 swap << (PAGE_SHIFT-10)); 73 hugetlb_report_usage(m, mm); 74 } 75 76 unsigned long task_vsize(struct mm_struct *mm) 77 { 78 return PAGE_SIZE * mm->total_vm; 79 } 80 81 unsigned long task_statm(struct mm_struct *mm, 82 unsigned long *shared, unsigned long *text, 83 unsigned long *data, unsigned long *resident) 84 { 85 *shared = get_mm_counter(mm, MM_FILEPAGES); 86 *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) 87 >> PAGE_SHIFT; 88 *data = mm->total_vm - mm->shared_vm; 89 *resident = *shared + get_mm_counter(mm, MM_ANONPAGES); 90 return mm->total_vm; 91 } 92 93 #ifdef CONFIG_NUMA 94 /* 95 * Save get_task_policy() for show_numa_map(). 96 */ 97 static void hold_task_mempolicy(struct proc_maps_private *priv) 98 { 99 struct task_struct *task = priv->task; 100 101 task_lock(task); 102 priv->task_mempolicy = get_task_policy(task); 103 mpol_get(priv->task_mempolicy); 104 task_unlock(task); 105 } 106 static void release_task_mempolicy(struct proc_maps_private *priv) 107 { 108 mpol_put(priv->task_mempolicy); 109 } 110 #else 111 static void hold_task_mempolicy(struct proc_maps_private *priv) 112 { 113 } 114 static void release_task_mempolicy(struct proc_maps_private *priv) 115 { 116 } 117 #endif 118 119 static void vma_stop(struct proc_maps_private *priv) 120 { 121 struct mm_struct *mm = priv->mm; 122 123 release_task_mempolicy(priv); 124 up_read(&mm->mmap_sem); 125 mmput(mm); 126 } 127 128 static struct vm_area_struct * 129 m_next_vma(struct proc_maps_private *priv, struct vm_area_struct *vma) 130 { 131 if (vma == priv->tail_vma) 132 return NULL; 133 return vma->vm_next ?: priv->tail_vma; 134 } 135 136 static void m_cache_vma(struct seq_file *m, struct vm_area_struct *vma) 137 { 138 if (m->count < m->size) /* vma is copied successfully */ 139 m->version = m_next_vma(m->private, vma) ? vma->vm_start : -1UL; 140 } 141 142 static void *m_start(struct seq_file *m, loff_t *ppos) 143 { 144 struct proc_maps_private *priv = m->private; 145 unsigned long last_addr = m->version; 146 struct mm_struct *mm; 147 struct vm_area_struct *vma; 148 unsigned int pos = *ppos; 149 150 /* See m_cache_vma(). Zero at the start or after lseek. */ 151 if (last_addr == -1UL) 152 return NULL; 153 154 priv->task = get_proc_task(priv->inode); 155 if (!priv->task) 156 return ERR_PTR(-ESRCH); 157 158 mm = priv->mm; 159 if (!mm || !atomic_inc_not_zero(&mm->mm_users)) 160 return NULL; 161 162 down_read(&mm->mmap_sem); 163 hold_task_mempolicy(priv); 164 priv->tail_vma = get_gate_vma(mm); 165 166 if (last_addr) { 167 vma = find_vma(mm, last_addr); 168 if (vma && (vma = m_next_vma(priv, vma))) 169 return vma; 170 } 171 172 m->version = 0; 173 if (pos < mm->map_count) { 174 for (vma = mm->mmap; pos; pos--) { 175 m->version = vma->vm_start; 176 vma = vma->vm_next; 177 } 178 return vma; 179 } 180 181 /* we do not bother to update m->version in this case */ 182 if (pos == mm->map_count && priv->tail_vma) 183 return priv->tail_vma; 184 185 vma_stop(priv); 186 return NULL; 187 } 188 189 static void *m_next(struct seq_file *m, void *v, loff_t *pos) 190 { 191 struct proc_maps_private *priv = m->private; 192 struct vm_area_struct *next; 193 194 (*pos)++; 195 next = m_next_vma(priv, v); 196 if (!next) 197 vma_stop(priv); 198 return next; 199 } 200 201 static void m_stop(struct seq_file *m, void *v) 202 { 203 struct proc_maps_private *priv = m->private; 204 205 if (!IS_ERR_OR_NULL(v)) 206 vma_stop(priv); 207 if (priv->task) { 208 put_task_struct(priv->task); 209 priv->task = NULL; 210 } 211 } 212 213 static int proc_maps_open(struct inode *inode, struct file *file, 214 const struct seq_operations *ops, int psize) 215 { 216 struct proc_maps_private *priv = __seq_open_private(file, ops, psize); 217 218 if (!priv) 219 return -ENOMEM; 220 221 priv->inode = inode; 222 priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); 223 if (IS_ERR(priv->mm)) { 224 int err = PTR_ERR(priv->mm); 225 226 seq_release_private(inode, file); 227 return err; 228 } 229 230 return 0; 231 } 232 233 static int proc_map_release(struct inode *inode, struct file *file) 234 { 235 struct seq_file *seq = file->private_data; 236 struct proc_maps_private *priv = seq->private; 237 238 if (priv->mm) 239 mmdrop(priv->mm); 240 241 return seq_release_private(inode, file); 242 } 243 244 static int do_maps_open(struct inode *inode, struct file *file, 245 const struct seq_operations *ops) 246 { 247 return proc_maps_open(inode, file, ops, 248 sizeof(struct proc_maps_private)); 249 } 250 251 static pid_t pid_of_stack(struct proc_maps_private *priv, 252 struct vm_area_struct *vma, bool is_pid) 253 { 254 struct inode *inode = priv->inode; 255 struct task_struct *task; 256 pid_t ret = 0; 257 258 rcu_read_lock(); 259 task = pid_task(proc_pid(inode), PIDTYPE_PID); 260 if (task) { 261 task = task_of_stack(task, vma, is_pid); 262 if (task) 263 ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info); 264 } 265 rcu_read_unlock(); 266 267 return ret; 268 } 269 270 static void 271 show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) 272 { 273 struct mm_struct *mm = vma->vm_mm; 274 struct file *file = vma->vm_file; 275 struct proc_maps_private *priv = m->private; 276 vm_flags_t flags = vma->vm_flags; 277 unsigned long ino = 0; 278 unsigned long long pgoff = 0; 279 unsigned long start, end; 280 dev_t dev = 0; 281 const char *name = NULL; 282 283 if (file) { 284 struct inode *inode = file_inode(vma->vm_file); 285 dev = inode->i_sb->s_dev; 286 ino = inode->i_ino; 287 pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT; 288 } 289 290 /* We don't show the stack guard page in /proc/maps */ 291 start = vma->vm_start; 292 if (stack_guard_page_start(vma, start)) 293 start += PAGE_SIZE; 294 end = vma->vm_end; 295 if (stack_guard_page_end(vma, end)) 296 end -= PAGE_SIZE; 297 298 seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); 299 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", 300 start, 301 end, 302 flags & VM_READ ? 'r' : '-', 303 flags & VM_WRITE ? 'w' : '-', 304 flags & VM_EXEC ? 'x' : '-', 305 flags & VM_MAYSHARE ? 's' : 'p', 306 pgoff, 307 MAJOR(dev), MINOR(dev), ino); 308 309 /* 310 * Print the dentry name for named mappings, and a 311 * special [heap] marker for the heap: 312 */ 313 if (file) { 314 seq_pad(m, ' '); 315 seq_file_path(m, file, "\n"); 316 goto done; 317 } 318 319 if (vma->vm_ops && vma->vm_ops->name) { 320 name = vma->vm_ops->name(vma); 321 if (name) 322 goto done; 323 } 324 325 name = arch_vma_name(vma); 326 if (!name) { 327 pid_t tid; 328 329 if (!mm) { 330 name = "[vdso]"; 331 goto done; 332 } 333 334 if (vma->vm_start <= mm->brk && 335 vma->vm_end >= mm->start_brk) { 336 name = "[heap]"; 337 goto done; 338 } 339 340 tid = pid_of_stack(priv, vma, is_pid); 341 if (tid != 0) { 342 /* 343 * Thread stack in /proc/PID/task/TID/maps or 344 * the main process stack. 345 */ 346 if (!is_pid || (vma->vm_start <= mm->start_stack && 347 vma->vm_end >= mm->start_stack)) { 348 name = "[stack]"; 349 } else { 350 /* Thread stack in /proc/PID/maps */ 351 seq_pad(m, ' '); 352 seq_printf(m, "[stack:%d]", tid); 353 } 354 } 355 } 356 357 done: 358 if (name) { 359 seq_pad(m, ' '); 360 seq_puts(m, name); 361 } 362 seq_putc(m, '\n'); 363 } 364 365 static int show_map(struct seq_file *m, void *v, int is_pid) 366 { 367 show_map_vma(m, v, is_pid); 368 m_cache_vma(m, v); 369 return 0; 370 } 371 372 static int show_pid_map(struct seq_file *m, void *v) 373 { 374 return show_map(m, v, 1); 375 } 376 377 static int show_tid_map(struct seq_file *m, void *v) 378 { 379 return show_map(m, v, 0); 380 } 381 382 static const struct seq_operations proc_pid_maps_op = { 383 .start = m_start, 384 .next = m_next, 385 .stop = m_stop, 386 .show = show_pid_map 387 }; 388 389 static const struct seq_operations proc_tid_maps_op = { 390 .start = m_start, 391 .next = m_next, 392 .stop = m_stop, 393 .show = show_tid_map 394 }; 395 396 static int pid_maps_open(struct inode *inode, struct file *file) 397 { 398 return do_maps_open(inode, file, &proc_pid_maps_op); 399 } 400 401 static int tid_maps_open(struct inode *inode, struct file *file) 402 { 403 return do_maps_open(inode, file, &proc_tid_maps_op); 404 } 405 406 const struct file_operations proc_pid_maps_operations = { 407 .open = pid_maps_open, 408 .read = seq_read, 409 .llseek = seq_lseek, 410 .release = proc_map_release, 411 }; 412 413 const struct file_operations proc_tid_maps_operations = { 414 .open = tid_maps_open, 415 .read = seq_read, 416 .llseek = seq_lseek, 417 .release = proc_map_release, 418 }; 419 420 /* 421 * Proportional Set Size(PSS): my share of RSS. 422 * 423 * PSS of a process is the count of pages it has in memory, where each 424 * page is divided by the number of processes sharing it. So if a 425 * process has 1000 pages all to itself, and 1000 shared with one other 426 * process, its PSS will be 1500. 427 * 428 * To keep (accumulated) division errors low, we adopt a 64bit 429 * fixed-point pss counter to minimize division errors. So (pss >> 430 * PSS_SHIFT) would be the real byte count. 431 * 432 * A shift of 12 before division means (assuming 4K page size): 433 * - 1M 3-user-pages add up to 8KB errors; 434 * - supports mapcount up to 2^24, or 16M; 435 * - supports PSS up to 2^52 bytes, or 4PB. 436 */ 437 #define PSS_SHIFT 12 438 439 #ifdef CONFIG_PROC_PAGE_MONITOR 440 struct mem_size_stats { 441 unsigned long resident; 442 unsigned long shared_clean; 443 unsigned long shared_dirty; 444 unsigned long private_clean; 445 unsigned long private_dirty; 446 unsigned long referenced; 447 unsigned long anonymous; 448 unsigned long anonymous_thp; 449 unsigned long swap; 450 unsigned long shared_hugetlb; 451 unsigned long private_hugetlb; 452 u64 pss; 453 u64 swap_pss; 454 }; 455 456 static void smaps_account(struct mem_size_stats *mss, struct page *page, 457 unsigned long size, bool young, bool dirty) 458 { 459 int mapcount; 460 461 if (PageAnon(page)) 462 mss->anonymous += size; 463 464 mss->resident += size; 465 /* Accumulate the size in pages that have been accessed. */ 466 if (young || page_is_young(page) || PageReferenced(page)) 467 mss->referenced += size; 468 mapcount = page_mapcount(page); 469 if (mapcount >= 2) { 470 u64 pss_delta; 471 472 if (dirty || PageDirty(page)) 473 mss->shared_dirty += size; 474 else 475 mss->shared_clean += size; 476 pss_delta = (u64)size << PSS_SHIFT; 477 do_div(pss_delta, mapcount); 478 mss->pss += pss_delta; 479 } else { 480 if (dirty || PageDirty(page)) 481 mss->private_dirty += size; 482 else 483 mss->private_clean += size; 484 mss->pss += (u64)size << PSS_SHIFT; 485 } 486 } 487 488 static void smaps_pte_entry(pte_t *pte, unsigned long addr, 489 struct mm_walk *walk) 490 { 491 struct mem_size_stats *mss = walk->private; 492 struct vm_area_struct *vma = walk->vma; 493 struct page *page = NULL; 494 495 if (pte_present(*pte)) { 496 page = vm_normal_page(vma, addr, *pte); 497 } else if (is_swap_pte(*pte)) { 498 swp_entry_t swpent = pte_to_swp_entry(*pte); 499 500 if (!non_swap_entry(swpent)) { 501 int mapcount; 502 503 mss->swap += PAGE_SIZE; 504 mapcount = swp_swapcount(swpent); 505 if (mapcount >= 2) { 506 u64 pss_delta = (u64)PAGE_SIZE << PSS_SHIFT; 507 508 do_div(pss_delta, mapcount); 509 mss->swap_pss += pss_delta; 510 } else { 511 mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT; 512 } 513 } else if (is_migration_entry(swpent)) 514 page = migration_entry_to_page(swpent); 515 } 516 517 if (!page) 518 return; 519 smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte)); 520 } 521 522 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 523 static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, 524 struct mm_walk *walk) 525 { 526 struct mem_size_stats *mss = walk->private; 527 struct vm_area_struct *vma = walk->vma; 528 struct page *page; 529 530 /* FOLL_DUMP will return -EFAULT on huge zero page */ 531 page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP); 532 if (IS_ERR_OR_NULL(page)) 533 return; 534 mss->anonymous_thp += HPAGE_PMD_SIZE; 535 smaps_account(mss, page, HPAGE_PMD_SIZE, 536 pmd_young(*pmd), pmd_dirty(*pmd)); 537 } 538 #else 539 static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, 540 struct mm_walk *walk) 541 { 542 } 543 #endif 544 545 static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 546 struct mm_walk *walk) 547 { 548 struct vm_area_struct *vma = walk->vma; 549 pte_t *pte; 550 spinlock_t *ptl; 551 552 if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { 553 smaps_pmd_entry(pmd, addr, walk); 554 spin_unlock(ptl); 555 return 0; 556 } 557 558 if (pmd_trans_unstable(pmd)) 559 return 0; 560 /* 561 * The mmap_sem held all the way back in m_start() is what 562 * keeps khugepaged out of here and from collapsing things 563 * in here. 564 */ 565 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 566 for (; addr != end; pte++, addr += PAGE_SIZE) 567 smaps_pte_entry(pte, addr, walk); 568 pte_unmap_unlock(pte - 1, ptl); 569 cond_resched(); 570 return 0; 571 } 572 573 static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) 574 { 575 /* 576 * Don't forget to update Documentation/ on changes. 577 */ 578 static const char mnemonics[BITS_PER_LONG][2] = { 579 /* 580 * In case if we meet a flag we don't know about. 581 */ 582 [0 ... (BITS_PER_LONG-1)] = "??", 583 584 [ilog2(VM_READ)] = "rd", 585 [ilog2(VM_WRITE)] = "wr", 586 [ilog2(VM_EXEC)] = "ex", 587 [ilog2(VM_SHARED)] = "sh", 588 [ilog2(VM_MAYREAD)] = "mr", 589 [ilog2(VM_MAYWRITE)] = "mw", 590 [ilog2(VM_MAYEXEC)] = "me", 591 [ilog2(VM_MAYSHARE)] = "ms", 592 [ilog2(VM_GROWSDOWN)] = "gd", 593 [ilog2(VM_PFNMAP)] = "pf", 594 [ilog2(VM_DENYWRITE)] = "dw", 595 #ifdef CONFIG_X86_INTEL_MPX 596 [ilog2(VM_MPX)] = "mp", 597 #endif 598 [ilog2(VM_LOCKED)] = "lo", 599 [ilog2(VM_IO)] = "io", 600 [ilog2(VM_SEQ_READ)] = "sr", 601 [ilog2(VM_RAND_READ)] = "rr", 602 [ilog2(VM_DONTCOPY)] = "dc", 603 [ilog2(VM_DONTEXPAND)] = "de", 604 [ilog2(VM_ACCOUNT)] = "ac", 605 [ilog2(VM_NORESERVE)] = "nr", 606 [ilog2(VM_HUGETLB)] = "ht", 607 [ilog2(VM_ARCH_1)] = "ar", 608 [ilog2(VM_DONTDUMP)] = "dd", 609 #ifdef CONFIG_MEM_SOFT_DIRTY 610 [ilog2(VM_SOFTDIRTY)] = "sd", 611 #endif 612 [ilog2(VM_MIXEDMAP)] = "mm", 613 [ilog2(VM_HUGEPAGE)] = "hg", 614 [ilog2(VM_NOHUGEPAGE)] = "nh", 615 [ilog2(VM_MERGEABLE)] = "mg", 616 [ilog2(VM_UFFD_MISSING)]= "um", 617 [ilog2(VM_UFFD_WP)] = "uw", 618 }; 619 size_t i; 620 621 seq_puts(m, "VmFlags: "); 622 for (i = 0; i < BITS_PER_LONG; i++) { 623 if (vma->vm_flags & (1UL << i)) { 624 seq_printf(m, "%c%c ", 625 mnemonics[i][0], mnemonics[i][1]); 626 } 627 } 628 seq_putc(m, '\n'); 629 } 630 631 #ifdef CONFIG_HUGETLB_PAGE 632 static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, 633 unsigned long addr, unsigned long end, 634 struct mm_walk *walk) 635 { 636 struct mem_size_stats *mss = walk->private; 637 struct vm_area_struct *vma = walk->vma; 638 struct page *page = NULL; 639 640 if (pte_present(*pte)) { 641 page = vm_normal_page(vma, addr, *pte); 642 } else if (is_swap_pte(*pte)) { 643 swp_entry_t swpent = pte_to_swp_entry(*pte); 644 645 if (is_migration_entry(swpent)) 646 page = migration_entry_to_page(swpent); 647 } 648 if (page) { 649 int mapcount = page_mapcount(page); 650 651 if (mapcount >= 2) 652 mss->shared_hugetlb += huge_page_size(hstate_vma(vma)); 653 else 654 mss->private_hugetlb += huge_page_size(hstate_vma(vma)); 655 } 656 return 0; 657 } 658 #endif /* HUGETLB_PAGE */ 659 660 static int show_smap(struct seq_file *m, void *v, int is_pid) 661 { 662 struct vm_area_struct *vma = v; 663 struct mem_size_stats mss; 664 struct mm_walk smaps_walk = { 665 .pmd_entry = smaps_pte_range, 666 #ifdef CONFIG_HUGETLB_PAGE 667 .hugetlb_entry = smaps_hugetlb_range, 668 #endif 669 .mm = vma->vm_mm, 670 .private = &mss, 671 }; 672 673 memset(&mss, 0, sizeof mss); 674 /* mmap_sem is held in m_start */ 675 walk_page_vma(vma, &smaps_walk); 676 677 show_map_vma(m, vma, is_pid); 678 679 seq_printf(m, 680 "Size: %8lu kB\n" 681 "Rss: %8lu kB\n" 682 "Pss: %8lu kB\n" 683 "Shared_Clean: %8lu kB\n" 684 "Shared_Dirty: %8lu kB\n" 685 "Private_Clean: %8lu kB\n" 686 "Private_Dirty: %8lu kB\n" 687 "Referenced: %8lu kB\n" 688 "Anonymous: %8lu kB\n" 689 "AnonHugePages: %8lu kB\n" 690 "Shared_Hugetlb: %8lu kB\n" 691 "Private_Hugetlb: %7lu kB\n" 692 "Swap: %8lu kB\n" 693 "SwapPss: %8lu kB\n" 694 "KernelPageSize: %8lu kB\n" 695 "MMUPageSize: %8lu kB\n" 696 "Locked: %8lu kB\n", 697 (vma->vm_end - vma->vm_start) >> 10, 698 mss.resident >> 10, 699 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)), 700 mss.shared_clean >> 10, 701 mss.shared_dirty >> 10, 702 mss.private_clean >> 10, 703 mss.private_dirty >> 10, 704 mss.referenced >> 10, 705 mss.anonymous >> 10, 706 mss.anonymous_thp >> 10, 707 mss.shared_hugetlb >> 10, 708 mss.private_hugetlb >> 10, 709 mss.swap >> 10, 710 (unsigned long)(mss.swap_pss >> (10 + PSS_SHIFT)), 711 vma_kernel_pagesize(vma) >> 10, 712 vma_mmu_pagesize(vma) >> 10, 713 (vma->vm_flags & VM_LOCKED) ? 714 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); 715 716 show_smap_vma_flags(m, vma); 717 m_cache_vma(m, vma); 718 return 0; 719 } 720 721 static int show_pid_smap(struct seq_file *m, void *v) 722 { 723 return show_smap(m, v, 1); 724 } 725 726 static int show_tid_smap(struct seq_file *m, void *v) 727 { 728 return show_smap(m, v, 0); 729 } 730 731 static const struct seq_operations proc_pid_smaps_op = { 732 .start = m_start, 733 .next = m_next, 734 .stop = m_stop, 735 .show = show_pid_smap 736 }; 737 738 static const struct seq_operations proc_tid_smaps_op = { 739 .start = m_start, 740 .next = m_next, 741 .stop = m_stop, 742 .show = show_tid_smap 743 }; 744 745 static int pid_smaps_open(struct inode *inode, struct file *file) 746 { 747 return do_maps_open(inode, file, &proc_pid_smaps_op); 748 } 749 750 static int tid_smaps_open(struct inode *inode, struct file *file) 751 { 752 return do_maps_open(inode, file, &proc_tid_smaps_op); 753 } 754 755 const struct file_operations proc_pid_smaps_operations = { 756 .open = pid_smaps_open, 757 .read = seq_read, 758 .llseek = seq_lseek, 759 .release = proc_map_release, 760 }; 761 762 const struct file_operations proc_tid_smaps_operations = { 763 .open = tid_smaps_open, 764 .read = seq_read, 765 .llseek = seq_lseek, 766 .release = proc_map_release, 767 }; 768 769 enum clear_refs_types { 770 CLEAR_REFS_ALL = 1, 771 CLEAR_REFS_ANON, 772 CLEAR_REFS_MAPPED, 773 CLEAR_REFS_SOFT_DIRTY, 774 CLEAR_REFS_MM_HIWATER_RSS, 775 CLEAR_REFS_LAST, 776 }; 777 778 struct clear_refs_private { 779 enum clear_refs_types type; 780 }; 781 782 #ifdef CONFIG_MEM_SOFT_DIRTY 783 static inline void clear_soft_dirty(struct vm_area_struct *vma, 784 unsigned long addr, pte_t *pte) 785 { 786 /* 787 * The soft-dirty tracker uses #PF-s to catch writes 788 * to pages, so write-protect the pte as well. See the 789 * Documentation/vm/soft-dirty.txt for full description 790 * of how soft-dirty works. 791 */ 792 pte_t ptent = *pte; 793 794 if (pte_present(ptent)) { 795 ptent = ptep_modify_prot_start(vma->vm_mm, addr, pte); 796 ptent = pte_wrprotect(ptent); 797 ptent = pte_clear_soft_dirty(ptent); 798 ptep_modify_prot_commit(vma->vm_mm, addr, pte, ptent); 799 } else if (is_swap_pte(ptent)) { 800 ptent = pte_swp_clear_soft_dirty(ptent); 801 set_pte_at(vma->vm_mm, addr, pte, ptent); 802 } 803 } 804 #else 805 static inline void clear_soft_dirty(struct vm_area_struct *vma, 806 unsigned long addr, pte_t *pte) 807 { 808 } 809 #endif 810 811 #if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE) 812 static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, 813 unsigned long addr, pmd_t *pmdp) 814 { 815 pmd_t pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp); 816 817 pmd = pmd_wrprotect(pmd); 818 pmd = pmd_clear_soft_dirty(pmd); 819 820 if (vma->vm_flags & VM_SOFTDIRTY) 821 vma->vm_flags &= ~VM_SOFTDIRTY; 822 823 set_pmd_at(vma->vm_mm, addr, pmdp, pmd); 824 } 825 #else 826 static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, 827 unsigned long addr, pmd_t *pmdp) 828 { 829 } 830 #endif 831 832 static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, 833 unsigned long end, struct mm_walk *walk) 834 { 835 struct clear_refs_private *cp = walk->private; 836 struct vm_area_struct *vma = walk->vma; 837 pte_t *pte, ptent; 838 spinlock_t *ptl; 839 struct page *page; 840 841 if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { 842 if (cp->type == CLEAR_REFS_SOFT_DIRTY) { 843 clear_soft_dirty_pmd(vma, addr, pmd); 844 goto out; 845 } 846 847 page = pmd_page(*pmd); 848 849 /* Clear accessed and referenced bits. */ 850 pmdp_test_and_clear_young(vma, addr, pmd); 851 test_and_clear_page_young(page); 852 ClearPageReferenced(page); 853 out: 854 spin_unlock(ptl); 855 return 0; 856 } 857 858 if (pmd_trans_unstable(pmd)) 859 return 0; 860 861 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 862 for (; addr != end; pte++, addr += PAGE_SIZE) { 863 ptent = *pte; 864 865 if (cp->type == CLEAR_REFS_SOFT_DIRTY) { 866 clear_soft_dirty(vma, addr, pte); 867 continue; 868 } 869 870 if (!pte_present(ptent)) 871 continue; 872 873 page = vm_normal_page(vma, addr, ptent); 874 if (!page) 875 continue; 876 877 /* Clear accessed and referenced bits. */ 878 ptep_test_and_clear_young(vma, addr, pte); 879 test_and_clear_page_young(page); 880 ClearPageReferenced(page); 881 } 882 pte_unmap_unlock(pte - 1, ptl); 883 cond_resched(); 884 return 0; 885 } 886 887 static int clear_refs_test_walk(unsigned long start, unsigned long end, 888 struct mm_walk *walk) 889 { 890 struct clear_refs_private *cp = walk->private; 891 struct vm_area_struct *vma = walk->vma; 892 893 if (vma->vm_flags & VM_PFNMAP) 894 return 1; 895 896 /* 897 * Writing 1 to /proc/pid/clear_refs affects all pages. 898 * Writing 2 to /proc/pid/clear_refs only affects anonymous pages. 899 * Writing 3 to /proc/pid/clear_refs only affects file mapped pages. 900 * Writing 4 to /proc/pid/clear_refs affects all pages. 901 */ 902 if (cp->type == CLEAR_REFS_ANON && vma->vm_file) 903 return 1; 904 if (cp->type == CLEAR_REFS_MAPPED && !vma->vm_file) 905 return 1; 906 return 0; 907 } 908 909 static ssize_t clear_refs_write(struct file *file, const char __user *buf, 910 size_t count, loff_t *ppos) 911 { 912 struct task_struct *task; 913 char buffer[PROC_NUMBUF]; 914 struct mm_struct *mm; 915 struct vm_area_struct *vma; 916 enum clear_refs_types type; 917 int itype; 918 int rv; 919 920 memset(buffer, 0, sizeof(buffer)); 921 if (count > sizeof(buffer) - 1) 922 count = sizeof(buffer) - 1; 923 if (copy_from_user(buffer, buf, count)) 924 return -EFAULT; 925 rv = kstrtoint(strstrip(buffer), 10, &itype); 926 if (rv < 0) 927 return rv; 928 type = (enum clear_refs_types)itype; 929 if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST) 930 return -EINVAL; 931 932 task = get_proc_task(file_inode(file)); 933 if (!task) 934 return -ESRCH; 935 mm = get_task_mm(task); 936 if (mm) { 937 struct clear_refs_private cp = { 938 .type = type, 939 }; 940 struct mm_walk clear_refs_walk = { 941 .pmd_entry = clear_refs_pte_range, 942 .test_walk = clear_refs_test_walk, 943 .mm = mm, 944 .private = &cp, 945 }; 946 947 if (type == CLEAR_REFS_MM_HIWATER_RSS) { 948 /* 949 * Writing 5 to /proc/pid/clear_refs resets the peak 950 * resident set size to this mm's current rss value. 951 */ 952 down_write(&mm->mmap_sem); 953 reset_mm_hiwater_rss(mm); 954 up_write(&mm->mmap_sem); 955 goto out_mm; 956 } 957 958 down_read(&mm->mmap_sem); 959 if (type == CLEAR_REFS_SOFT_DIRTY) { 960 for (vma = mm->mmap; vma; vma = vma->vm_next) { 961 if (!(vma->vm_flags & VM_SOFTDIRTY)) 962 continue; 963 up_read(&mm->mmap_sem); 964 down_write(&mm->mmap_sem); 965 for (vma = mm->mmap; vma; vma = vma->vm_next) { 966 vma->vm_flags &= ~VM_SOFTDIRTY; 967 vma_set_page_prot(vma); 968 } 969 downgrade_write(&mm->mmap_sem); 970 break; 971 } 972 mmu_notifier_invalidate_range_start(mm, 0, -1); 973 } 974 walk_page_range(0, ~0UL, &clear_refs_walk); 975 if (type == CLEAR_REFS_SOFT_DIRTY) 976 mmu_notifier_invalidate_range_end(mm, 0, -1); 977 flush_tlb_mm(mm); 978 up_read(&mm->mmap_sem); 979 out_mm: 980 mmput(mm); 981 } 982 put_task_struct(task); 983 984 return count; 985 } 986 987 const struct file_operations proc_clear_refs_operations = { 988 .write = clear_refs_write, 989 .llseek = noop_llseek, 990 }; 991 992 typedef struct { 993 u64 pme; 994 } pagemap_entry_t; 995 996 struct pagemapread { 997 int pos, len; /* units: PM_ENTRY_BYTES, not bytes */ 998 pagemap_entry_t *buffer; 999 bool show_pfn; 1000 }; 1001 1002 #define PAGEMAP_WALK_SIZE (PMD_SIZE) 1003 #define PAGEMAP_WALK_MASK (PMD_MASK) 1004 1005 #define PM_ENTRY_BYTES sizeof(pagemap_entry_t) 1006 #define PM_PFRAME_BITS 55 1007 #define PM_PFRAME_MASK GENMASK_ULL(PM_PFRAME_BITS - 1, 0) 1008 #define PM_SOFT_DIRTY BIT_ULL(55) 1009 #define PM_MMAP_EXCLUSIVE BIT_ULL(56) 1010 #define PM_FILE BIT_ULL(61) 1011 #define PM_SWAP BIT_ULL(62) 1012 #define PM_PRESENT BIT_ULL(63) 1013 1014 #define PM_END_OF_BUFFER 1 1015 1016 static inline pagemap_entry_t make_pme(u64 frame, u64 flags) 1017 { 1018 return (pagemap_entry_t) { .pme = (frame & PM_PFRAME_MASK) | flags }; 1019 } 1020 1021 static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme, 1022 struct pagemapread *pm) 1023 { 1024 pm->buffer[pm->pos++] = *pme; 1025 if (pm->pos >= pm->len) 1026 return PM_END_OF_BUFFER; 1027 return 0; 1028 } 1029 1030 static int pagemap_pte_hole(unsigned long start, unsigned long end, 1031 struct mm_walk *walk) 1032 { 1033 struct pagemapread *pm = walk->private; 1034 unsigned long addr = start; 1035 int err = 0; 1036 1037 while (addr < end) { 1038 struct vm_area_struct *vma = find_vma(walk->mm, addr); 1039 pagemap_entry_t pme = make_pme(0, 0); 1040 /* End of address space hole, which we mark as non-present. */ 1041 unsigned long hole_end; 1042 1043 if (vma) 1044 hole_end = min(end, vma->vm_start); 1045 else 1046 hole_end = end; 1047 1048 for (; addr < hole_end; addr += PAGE_SIZE) { 1049 err = add_to_pagemap(addr, &pme, pm); 1050 if (err) 1051 goto out; 1052 } 1053 1054 if (!vma) 1055 break; 1056 1057 /* Addresses in the VMA. */ 1058 if (vma->vm_flags & VM_SOFTDIRTY) 1059 pme = make_pme(0, PM_SOFT_DIRTY); 1060 for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) { 1061 err = add_to_pagemap(addr, &pme, pm); 1062 if (err) 1063 goto out; 1064 } 1065 } 1066 out: 1067 return err; 1068 } 1069 1070 static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, 1071 struct vm_area_struct *vma, unsigned long addr, pte_t pte) 1072 { 1073 u64 frame = 0, flags = 0; 1074 struct page *page = NULL; 1075 1076 if (pte_present(pte)) { 1077 if (pm->show_pfn) 1078 frame = pte_pfn(pte); 1079 flags |= PM_PRESENT; 1080 page = vm_normal_page(vma, addr, pte); 1081 if (pte_soft_dirty(pte)) 1082 flags |= PM_SOFT_DIRTY; 1083 } else if (is_swap_pte(pte)) { 1084 swp_entry_t entry; 1085 if (pte_swp_soft_dirty(pte)) 1086 flags |= PM_SOFT_DIRTY; 1087 entry = pte_to_swp_entry(pte); 1088 frame = swp_type(entry) | 1089 (swp_offset(entry) << MAX_SWAPFILES_SHIFT); 1090 flags |= PM_SWAP; 1091 if (is_migration_entry(entry)) 1092 page = migration_entry_to_page(entry); 1093 } 1094 1095 if (page && !PageAnon(page)) 1096 flags |= PM_FILE; 1097 if (page && page_mapcount(page) == 1) 1098 flags |= PM_MMAP_EXCLUSIVE; 1099 if (vma->vm_flags & VM_SOFTDIRTY) 1100 flags |= PM_SOFT_DIRTY; 1101 1102 return make_pme(frame, flags); 1103 } 1104 1105 static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, 1106 struct mm_walk *walk) 1107 { 1108 struct vm_area_struct *vma = walk->vma; 1109 struct pagemapread *pm = walk->private; 1110 spinlock_t *ptl; 1111 pte_t *pte, *orig_pte; 1112 int err = 0; 1113 1114 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1115 if (pmd_trans_huge_lock(pmdp, vma, &ptl) == 1) { 1116 u64 flags = 0, frame = 0; 1117 pmd_t pmd = *pmdp; 1118 1119 if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd)) 1120 flags |= PM_SOFT_DIRTY; 1121 1122 /* 1123 * Currently pmd for thp is always present because thp 1124 * can not be swapped-out, migrated, or HWPOISONed 1125 * (split in such cases instead.) 1126 * This if-check is just to prepare for future implementation. 1127 */ 1128 if (pmd_present(pmd)) { 1129 struct page *page = pmd_page(pmd); 1130 1131 if (page_mapcount(page) == 1) 1132 flags |= PM_MMAP_EXCLUSIVE; 1133 1134 flags |= PM_PRESENT; 1135 if (pm->show_pfn) 1136 frame = pmd_pfn(pmd) + 1137 ((addr & ~PMD_MASK) >> PAGE_SHIFT); 1138 } 1139 1140 for (; addr != end; addr += PAGE_SIZE) { 1141 pagemap_entry_t pme = make_pme(frame, flags); 1142 1143 err = add_to_pagemap(addr, &pme, pm); 1144 if (err) 1145 break; 1146 if (pm->show_pfn && (flags & PM_PRESENT)) 1147 frame++; 1148 } 1149 spin_unlock(ptl); 1150 return err; 1151 } 1152 1153 if (pmd_trans_unstable(pmdp)) 1154 return 0; 1155 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1156 1157 /* 1158 * We can assume that @vma always points to a valid one and @end never 1159 * goes beyond vma->vm_end. 1160 */ 1161 orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl); 1162 for (; addr < end; pte++, addr += PAGE_SIZE) { 1163 pagemap_entry_t pme; 1164 1165 pme = pte_to_pagemap_entry(pm, vma, addr, *pte); 1166 err = add_to_pagemap(addr, &pme, pm); 1167 if (err) 1168 break; 1169 } 1170 pte_unmap_unlock(orig_pte, ptl); 1171 1172 cond_resched(); 1173 1174 return err; 1175 } 1176 1177 #ifdef CONFIG_HUGETLB_PAGE 1178 /* This function walks within one hugetlb entry in the single call */ 1179 static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, 1180 unsigned long addr, unsigned long end, 1181 struct mm_walk *walk) 1182 { 1183 struct pagemapread *pm = walk->private; 1184 struct vm_area_struct *vma = walk->vma; 1185 u64 flags = 0, frame = 0; 1186 int err = 0; 1187 pte_t pte; 1188 1189 if (vma->vm_flags & VM_SOFTDIRTY) 1190 flags |= PM_SOFT_DIRTY; 1191 1192 pte = huge_ptep_get(ptep); 1193 if (pte_present(pte)) { 1194 struct page *page = pte_page(pte); 1195 1196 if (!PageAnon(page)) 1197 flags |= PM_FILE; 1198 1199 if (page_mapcount(page) == 1) 1200 flags |= PM_MMAP_EXCLUSIVE; 1201 1202 flags |= PM_PRESENT; 1203 if (pm->show_pfn) 1204 frame = pte_pfn(pte) + 1205 ((addr & ~hmask) >> PAGE_SHIFT); 1206 } 1207 1208 for (; addr != end; addr += PAGE_SIZE) { 1209 pagemap_entry_t pme = make_pme(frame, flags); 1210 1211 err = add_to_pagemap(addr, &pme, pm); 1212 if (err) 1213 return err; 1214 if (pm->show_pfn && (flags & PM_PRESENT)) 1215 frame++; 1216 } 1217 1218 cond_resched(); 1219 1220 return err; 1221 } 1222 #endif /* HUGETLB_PAGE */ 1223 1224 /* 1225 * /proc/pid/pagemap - an array mapping virtual pages to pfns 1226 * 1227 * For each page in the address space, this file contains one 64-bit entry 1228 * consisting of the following: 1229 * 1230 * Bits 0-54 page frame number (PFN) if present 1231 * Bits 0-4 swap type if swapped 1232 * Bits 5-54 swap offset if swapped 1233 * Bit 55 pte is soft-dirty (see Documentation/vm/soft-dirty.txt) 1234 * Bit 56 page exclusively mapped 1235 * Bits 57-60 zero 1236 * Bit 61 page is file-page or shared-anon 1237 * Bit 62 page swapped 1238 * Bit 63 page present 1239 * 1240 * If the page is not present but in swap, then the PFN contains an 1241 * encoding of the swap file number and the page's offset into the 1242 * swap. Unmapped pages return a null PFN. This allows determining 1243 * precisely which pages are mapped (or in swap) and comparing mapped 1244 * pages between processes. 1245 * 1246 * Efficient users of this interface will use /proc/pid/maps to 1247 * determine which areas of memory are actually mapped and llseek to 1248 * skip over unmapped regions. 1249 */ 1250 static ssize_t pagemap_read(struct file *file, char __user *buf, 1251 size_t count, loff_t *ppos) 1252 { 1253 struct mm_struct *mm = file->private_data; 1254 struct pagemapread pm; 1255 struct mm_walk pagemap_walk = {}; 1256 unsigned long src; 1257 unsigned long svpfn; 1258 unsigned long start_vaddr; 1259 unsigned long end_vaddr; 1260 int ret = 0, copied = 0; 1261 1262 if (!mm || !atomic_inc_not_zero(&mm->mm_users)) 1263 goto out; 1264 1265 ret = -EINVAL; 1266 /* file position must be aligned */ 1267 if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES)) 1268 goto out_mm; 1269 1270 ret = 0; 1271 if (!count) 1272 goto out_mm; 1273 1274 /* do not disclose physical addresses: attack vector */ 1275 pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN); 1276 1277 pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); 1278 pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY); 1279 ret = -ENOMEM; 1280 if (!pm.buffer) 1281 goto out_mm; 1282 1283 pagemap_walk.pmd_entry = pagemap_pmd_range; 1284 pagemap_walk.pte_hole = pagemap_pte_hole; 1285 #ifdef CONFIG_HUGETLB_PAGE 1286 pagemap_walk.hugetlb_entry = pagemap_hugetlb_range; 1287 #endif 1288 pagemap_walk.mm = mm; 1289 pagemap_walk.private = ± 1290 1291 src = *ppos; 1292 svpfn = src / PM_ENTRY_BYTES; 1293 start_vaddr = svpfn << PAGE_SHIFT; 1294 end_vaddr = mm->task_size; 1295 1296 /* watch out for wraparound */ 1297 if (svpfn > mm->task_size >> PAGE_SHIFT) 1298 start_vaddr = end_vaddr; 1299 1300 /* 1301 * The odds are that this will stop walking way 1302 * before end_vaddr, because the length of the 1303 * user buffer is tracked in "pm", and the walk 1304 * will stop when we hit the end of the buffer. 1305 */ 1306 ret = 0; 1307 while (count && (start_vaddr < end_vaddr)) { 1308 int len; 1309 unsigned long end; 1310 1311 pm.pos = 0; 1312 end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK; 1313 /* overflow ? */ 1314 if (end < start_vaddr || end > end_vaddr) 1315 end = end_vaddr; 1316 down_read(&mm->mmap_sem); 1317 ret = walk_page_range(start_vaddr, end, &pagemap_walk); 1318 up_read(&mm->mmap_sem); 1319 start_vaddr = end; 1320 1321 len = min(count, PM_ENTRY_BYTES * pm.pos); 1322 if (copy_to_user(buf, pm.buffer, len)) { 1323 ret = -EFAULT; 1324 goto out_free; 1325 } 1326 copied += len; 1327 buf += len; 1328 count -= len; 1329 } 1330 *ppos += copied; 1331 if (!ret || ret == PM_END_OF_BUFFER) 1332 ret = copied; 1333 1334 out_free: 1335 kfree(pm.buffer); 1336 out_mm: 1337 mmput(mm); 1338 out: 1339 return ret; 1340 } 1341 1342 static int pagemap_open(struct inode *inode, struct file *file) 1343 { 1344 struct mm_struct *mm; 1345 1346 mm = proc_mem_open(inode, PTRACE_MODE_READ); 1347 if (IS_ERR(mm)) 1348 return PTR_ERR(mm); 1349 file->private_data = mm; 1350 return 0; 1351 } 1352 1353 static int pagemap_release(struct inode *inode, struct file *file) 1354 { 1355 struct mm_struct *mm = file->private_data; 1356 1357 if (mm) 1358 mmdrop(mm); 1359 return 0; 1360 } 1361 1362 const struct file_operations proc_pagemap_operations = { 1363 .llseek = mem_lseek, /* borrow this */ 1364 .read = pagemap_read, 1365 .open = pagemap_open, 1366 .release = pagemap_release, 1367 }; 1368 #endif /* CONFIG_PROC_PAGE_MONITOR */ 1369 1370 #ifdef CONFIG_NUMA 1371 1372 struct numa_maps { 1373 unsigned long pages; 1374 unsigned long anon; 1375 unsigned long active; 1376 unsigned long writeback; 1377 unsigned long mapcount_max; 1378 unsigned long dirty; 1379 unsigned long swapcache; 1380 unsigned long node[MAX_NUMNODES]; 1381 }; 1382 1383 struct numa_maps_private { 1384 struct proc_maps_private proc_maps; 1385 struct numa_maps md; 1386 }; 1387 1388 static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty, 1389 unsigned long nr_pages) 1390 { 1391 int count = page_mapcount(page); 1392 1393 md->pages += nr_pages; 1394 if (pte_dirty || PageDirty(page)) 1395 md->dirty += nr_pages; 1396 1397 if (PageSwapCache(page)) 1398 md->swapcache += nr_pages; 1399 1400 if (PageActive(page) || PageUnevictable(page)) 1401 md->active += nr_pages; 1402 1403 if (PageWriteback(page)) 1404 md->writeback += nr_pages; 1405 1406 if (PageAnon(page)) 1407 md->anon += nr_pages; 1408 1409 if (count > md->mapcount_max) 1410 md->mapcount_max = count; 1411 1412 md->node[page_to_nid(page)] += nr_pages; 1413 } 1414 1415 static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma, 1416 unsigned long addr) 1417 { 1418 struct page *page; 1419 int nid; 1420 1421 if (!pte_present(pte)) 1422 return NULL; 1423 1424 page = vm_normal_page(vma, addr, pte); 1425 if (!page) 1426 return NULL; 1427 1428 if (PageReserved(page)) 1429 return NULL; 1430 1431 nid = page_to_nid(page); 1432 if (!node_isset(nid, node_states[N_MEMORY])) 1433 return NULL; 1434 1435 return page; 1436 } 1437 1438 static int gather_pte_stats(pmd_t *pmd, unsigned long addr, 1439 unsigned long end, struct mm_walk *walk) 1440 { 1441 struct numa_maps *md = walk->private; 1442 struct vm_area_struct *vma = walk->vma; 1443 spinlock_t *ptl; 1444 pte_t *orig_pte; 1445 pte_t *pte; 1446 1447 if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { 1448 pte_t huge_pte = *(pte_t *)pmd; 1449 struct page *page; 1450 1451 page = can_gather_numa_stats(huge_pte, vma, addr); 1452 if (page) 1453 gather_stats(page, md, pte_dirty(huge_pte), 1454 HPAGE_PMD_SIZE/PAGE_SIZE); 1455 spin_unlock(ptl); 1456 return 0; 1457 } 1458 1459 if (pmd_trans_unstable(pmd)) 1460 return 0; 1461 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 1462 do { 1463 struct page *page = can_gather_numa_stats(*pte, vma, addr); 1464 if (!page) 1465 continue; 1466 gather_stats(page, md, pte_dirty(*pte), 1); 1467 1468 } while (pte++, addr += PAGE_SIZE, addr != end); 1469 pte_unmap_unlock(orig_pte, ptl); 1470 return 0; 1471 } 1472 #ifdef CONFIG_HUGETLB_PAGE 1473 static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, 1474 unsigned long addr, unsigned long end, struct mm_walk *walk) 1475 { 1476 struct numa_maps *md; 1477 struct page *page; 1478 1479 if (!pte_present(*pte)) 1480 return 0; 1481 1482 page = pte_page(*pte); 1483 if (!page) 1484 return 0; 1485 1486 md = walk->private; 1487 gather_stats(page, md, pte_dirty(*pte), 1); 1488 return 0; 1489 } 1490 1491 #else 1492 static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, 1493 unsigned long addr, unsigned long end, struct mm_walk *walk) 1494 { 1495 return 0; 1496 } 1497 #endif 1498 1499 /* 1500 * Display pages allocated per node and memory policy via /proc. 1501 */ 1502 static int show_numa_map(struct seq_file *m, void *v, int is_pid) 1503 { 1504 struct numa_maps_private *numa_priv = m->private; 1505 struct proc_maps_private *proc_priv = &numa_priv->proc_maps; 1506 struct vm_area_struct *vma = v; 1507 struct numa_maps *md = &numa_priv->md; 1508 struct file *file = vma->vm_file; 1509 struct mm_struct *mm = vma->vm_mm; 1510 struct mm_walk walk = { 1511 .hugetlb_entry = gather_hugetlb_stats, 1512 .pmd_entry = gather_pte_stats, 1513 .private = md, 1514 .mm = mm, 1515 }; 1516 struct mempolicy *pol; 1517 char buffer[64]; 1518 int nid; 1519 1520 if (!mm) 1521 return 0; 1522 1523 /* Ensure we start with an empty set of numa_maps statistics. */ 1524 memset(md, 0, sizeof(*md)); 1525 1526 pol = __get_vma_policy(vma, vma->vm_start); 1527 if (pol) { 1528 mpol_to_str(buffer, sizeof(buffer), pol); 1529 mpol_cond_put(pol); 1530 } else { 1531 mpol_to_str(buffer, sizeof(buffer), proc_priv->task_mempolicy); 1532 } 1533 1534 seq_printf(m, "%08lx %s", vma->vm_start, buffer); 1535 1536 if (file) { 1537 seq_puts(m, " file="); 1538 seq_file_path(m, file, "\n\t= "); 1539 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { 1540 seq_puts(m, " heap"); 1541 } else { 1542 pid_t tid = pid_of_stack(proc_priv, vma, is_pid); 1543 if (tid != 0) { 1544 /* 1545 * Thread stack in /proc/PID/task/TID/maps or 1546 * the main process stack. 1547 */ 1548 if (!is_pid || (vma->vm_start <= mm->start_stack && 1549 vma->vm_end >= mm->start_stack)) 1550 seq_puts(m, " stack"); 1551 else 1552 seq_printf(m, " stack:%d", tid); 1553 } 1554 } 1555 1556 if (is_vm_hugetlb_page(vma)) 1557 seq_puts(m, " huge"); 1558 1559 /* mmap_sem is held by m_start */ 1560 walk_page_vma(vma, &walk); 1561 1562 if (!md->pages) 1563 goto out; 1564 1565 if (md->anon) 1566 seq_printf(m, " anon=%lu", md->anon); 1567 1568 if (md->dirty) 1569 seq_printf(m, " dirty=%lu", md->dirty); 1570 1571 if (md->pages != md->anon && md->pages != md->dirty) 1572 seq_printf(m, " mapped=%lu", md->pages); 1573 1574 if (md->mapcount_max > 1) 1575 seq_printf(m, " mapmax=%lu", md->mapcount_max); 1576 1577 if (md->swapcache) 1578 seq_printf(m, " swapcache=%lu", md->swapcache); 1579 1580 if (md->active < md->pages && !is_vm_hugetlb_page(vma)) 1581 seq_printf(m, " active=%lu", md->active); 1582 1583 if (md->writeback) 1584 seq_printf(m, " writeback=%lu", md->writeback); 1585 1586 for_each_node_state(nid, N_MEMORY) 1587 if (md->node[nid]) 1588 seq_printf(m, " N%d=%lu", nid, md->node[nid]); 1589 1590 seq_printf(m, " kernelpagesize_kB=%lu", vma_kernel_pagesize(vma) >> 10); 1591 out: 1592 seq_putc(m, '\n'); 1593 m_cache_vma(m, vma); 1594 return 0; 1595 } 1596 1597 static int show_pid_numa_map(struct seq_file *m, void *v) 1598 { 1599 return show_numa_map(m, v, 1); 1600 } 1601 1602 static int show_tid_numa_map(struct seq_file *m, void *v) 1603 { 1604 return show_numa_map(m, v, 0); 1605 } 1606 1607 static const struct seq_operations proc_pid_numa_maps_op = { 1608 .start = m_start, 1609 .next = m_next, 1610 .stop = m_stop, 1611 .show = show_pid_numa_map, 1612 }; 1613 1614 static const struct seq_operations proc_tid_numa_maps_op = { 1615 .start = m_start, 1616 .next = m_next, 1617 .stop = m_stop, 1618 .show = show_tid_numa_map, 1619 }; 1620 1621 static int numa_maps_open(struct inode *inode, struct file *file, 1622 const struct seq_operations *ops) 1623 { 1624 return proc_maps_open(inode, file, ops, 1625 sizeof(struct numa_maps_private)); 1626 } 1627 1628 static int pid_numa_maps_open(struct inode *inode, struct file *file) 1629 { 1630 return numa_maps_open(inode, file, &proc_pid_numa_maps_op); 1631 } 1632 1633 static int tid_numa_maps_open(struct inode *inode, struct file *file) 1634 { 1635 return numa_maps_open(inode, file, &proc_tid_numa_maps_op); 1636 } 1637 1638 const struct file_operations proc_pid_numa_maps_operations = { 1639 .open = pid_numa_maps_open, 1640 .read = seq_read, 1641 .llseek = seq_lseek, 1642 .release = proc_map_release, 1643 }; 1644 1645 const struct file_operations proc_tid_numa_maps_operations = { 1646 .open = tid_numa_maps_open, 1647 .read = seq_read, 1648 .llseek = seq_lseek, 1649 .release = proc_map_release, 1650 }; 1651 #endif /* CONFIG_NUMA */ 1652