1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/mm.h> 3 #include <linux/vmacache.h> 4 #include <linux/hugetlb.h> 5 #include <linux/huge_mm.h> 6 #include <linux/mount.h> 7 #include <linux/seq_file.h> 8 #include <linux/highmem.h> 9 #include <linux/ptrace.h> 10 #include <linux/slab.h> 11 #include <linux/pagemap.h> 12 #include <linux/mempolicy.h> 13 #include <linux/rmap.h> 14 #include <linux/swap.h> 15 #include <linux/sched/mm.h> 16 #include <linux/swapops.h> 17 #include <linux/mmu_notifier.h> 18 #include <linux/page_idle.h> 19 #include <linux/shmem_fs.h> 20 #include <linux/uaccess.h> 21 22 #include <asm/elf.h> 23 #include <asm/tlb.h> 24 #include <asm/tlbflush.h> 25 #include "internal.h" 26 27 void task_mem(struct seq_file *m, struct mm_struct *mm) 28 { 29 unsigned long text, lib, swap, anon, file, shmem; 30 unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; 31 32 anon = get_mm_counter(mm, MM_ANONPAGES); 33 file = get_mm_counter(mm, MM_FILEPAGES); 34 shmem = get_mm_counter(mm, MM_SHMEMPAGES); 35 36 /* 37 * Note: to minimize their overhead, mm maintains hiwater_vm and 38 * hiwater_rss only when about to *lower* total_vm or rss. Any 39 * collector of these hiwater stats must therefore get total_vm 40 * and rss too, which will usually be the higher. Barriers? not 41 * worth the effort, such snapshots can always be inconsistent. 42 */ 43 hiwater_vm = total_vm = mm->total_vm; 44 if (hiwater_vm < mm->hiwater_vm) 45 hiwater_vm = mm->hiwater_vm; 46 hiwater_rss = total_rss = anon + file + shmem; 47 if (hiwater_rss < mm->hiwater_rss) 48 hiwater_rss = mm->hiwater_rss; 49 50 /* split executable areas between text and lib */ 51 text = PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK); 52 text = min(text, mm->exec_vm << PAGE_SHIFT); 53 lib = (mm->exec_vm << PAGE_SHIFT) - text; 54 55 swap = get_mm_counter(mm, MM_SWAPENTS); 56 seq_printf(m, 57 "VmPeak:\t%8lu kB\n" 58 "VmSize:\t%8lu kB\n" 59 "VmLck:\t%8lu kB\n" 60 "VmPin:\t%8lu kB\n" 61 "VmHWM:\t%8lu kB\n" 62 "VmRSS:\t%8lu kB\n" 63 "RssAnon:\t%8lu kB\n" 64 "RssFile:\t%8lu kB\n" 65 "RssShmem:\t%8lu kB\n" 66 "VmData:\t%8lu kB\n" 67 "VmStk:\t%8lu kB\n" 68 "VmExe:\t%8lu kB\n" 69 "VmLib:\t%8lu kB\n" 70 "VmPTE:\t%8lu kB\n" 71 "VmSwap:\t%8lu kB\n", 72 hiwater_vm << (PAGE_SHIFT-10), 73 total_vm << (PAGE_SHIFT-10), 74 mm->locked_vm << (PAGE_SHIFT-10), 75 mm->pinned_vm << (PAGE_SHIFT-10), 76 hiwater_rss << (PAGE_SHIFT-10), 77 total_rss << (PAGE_SHIFT-10), 78 anon << (PAGE_SHIFT-10), 79 file << (PAGE_SHIFT-10), 80 shmem << (PAGE_SHIFT-10), 81 mm->data_vm << (PAGE_SHIFT-10), 82 mm->stack_vm << (PAGE_SHIFT-10), 83 text >> 10, 84 lib >> 10, 85 mm_pgtables_bytes(mm) >> 10, 86 swap << (PAGE_SHIFT-10)); 87 hugetlb_report_usage(m, mm); 88 } 89 90 unsigned long task_vsize(struct mm_struct *mm) 91 { 92 return PAGE_SIZE * mm->total_vm; 93 } 94 95 unsigned long task_statm(struct mm_struct *mm, 96 unsigned long *shared, unsigned long *text, 97 unsigned long *data, unsigned long *resident) 98 { 99 *shared = get_mm_counter(mm, MM_FILEPAGES) + 100 get_mm_counter(mm, MM_SHMEMPAGES); 101 *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) 102 >> PAGE_SHIFT; 103 *data = mm->data_vm + mm->stack_vm; 104 *resident = *shared + get_mm_counter(mm, MM_ANONPAGES); 105 return mm->total_vm; 106 } 107 108 #ifdef CONFIG_NUMA 109 /* 110 * Save get_task_policy() for show_numa_map(). 111 */ 112 static void hold_task_mempolicy(struct proc_maps_private *priv) 113 { 114 struct task_struct *task = priv->task; 115 116 task_lock(task); 117 priv->task_mempolicy = get_task_policy(task); 118 mpol_get(priv->task_mempolicy); 119 task_unlock(task); 120 } 121 static void release_task_mempolicy(struct proc_maps_private *priv) 122 { 123 mpol_put(priv->task_mempolicy); 124 } 125 #else 126 static void hold_task_mempolicy(struct proc_maps_private *priv) 127 { 128 } 129 static void release_task_mempolicy(struct proc_maps_private *priv) 130 { 131 } 132 #endif 133 134 static void vma_stop(struct proc_maps_private *priv) 135 { 136 struct mm_struct *mm = priv->mm; 137 138 release_task_mempolicy(priv); 139 up_read(&mm->mmap_sem); 140 mmput(mm); 141 } 142 143 static struct vm_area_struct * 144 m_next_vma(struct proc_maps_private *priv, struct vm_area_struct *vma) 145 { 146 if (vma == priv->tail_vma) 147 return NULL; 148 return vma->vm_next ?: priv->tail_vma; 149 } 150 151 static void m_cache_vma(struct seq_file *m, struct vm_area_struct *vma) 152 { 153 if (m->count < m->size) /* vma is copied successfully */ 154 m->version = m_next_vma(m->private, vma) ? vma->vm_end : -1UL; 155 } 156 157 static void *m_start(struct seq_file *m, loff_t *ppos) 158 { 159 struct proc_maps_private *priv = m->private; 160 unsigned long last_addr = m->version; 161 struct mm_struct *mm; 162 struct vm_area_struct *vma; 163 unsigned int pos = *ppos; 164 165 /* See m_cache_vma(). Zero at the start or after lseek. */ 166 if (last_addr == -1UL) 167 return NULL; 168 169 priv->task = get_proc_task(priv->inode); 170 if (!priv->task) 171 return ERR_PTR(-ESRCH); 172 173 mm = priv->mm; 174 if (!mm || !mmget_not_zero(mm)) 175 return NULL; 176 177 down_read(&mm->mmap_sem); 178 hold_task_mempolicy(priv); 179 priv->tail_vma = get_gate_vma(mm); 180 181 if (last_addr) { 182 vma = find_vma(mm, last_addr - 1); 183 if (vma && vma->vm_start <= last_addr) 184 vma = m_next_vma(priv, vma); 185 if (vma) 186 return vma; 187 } 188 189 m->version = 0; 190 if (pos < mm->map_count) { 191 for (vma = mm->mmap; pos; pos--) { 192 m->version = vma->vm_start; 193 vma = vma->vm_next; 194 } 195 return vma; 196 } 197 198 /* we do not bother to update m->version in this case */ 199 if (pos == mm->map_count && priv->tail_vma) 200 return priv->tail_vma; 201 202 vma_stop(priv); 203 return NULL; 204 } 205 206 static void *m_next(struct seq_file *m, void *v, loff_t *pos) 207 { 208 struct proc_maps_private *priv = m->private; 209 struct vm_area_struct *next; 210 211 (*pos)++; 212 next = m_next_vma(priv, v); 213 if (!next) 214 vma_stop(priv); 215 return next; 216 } 217 218 static void m_stop(struct seq_file *m, void *v) 219 { 220 struct proc_maps_private *priv = m->private; 221 222 if (!IS_ERR_OR_NULL(v)) 223 vma_stop(priv); 224 if (priv->task) { 225 put_task_struct(priv->task); 226 priv->task = NULL; 227 } 228 } 229 230 static int proc_maps_open(struct inode *inode, struct file *file, 231 const struct seq_operations *ops, int psize) 232 { 233 struct proc_maps_private *priv = __seq_open_private(file, ops, psize); 234 235 if (!priv) 236 return -ENOMEM; 237 238 priv->inode = inode; 239 priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); 240 if (IS_ERR(priv->mm)) { 241 int err = PTR_ERR(priv->mm); 242 243 seq_release_private(inode, file); 244 return err; 245 } 246 247 return 0; 248 } 249 250 static int proc_map_release(struct inode *inode, struct file *file) 251 { 252 struct seq_file *seq = file->private_data; 253 struct proc_maps_private *priv = seq->private; 254 255 if (priv->mm) 256 mmdrop(priv->mm); 257 258 kfree(priv->rollup); 259 return seq_release_private(inode, file); 260 } 261 262 static int do_maps_open(struct inode *inode, struct file *file, 263 const struct seq_operations *ops) 264 { 265 return proc_maps_open(inode, file, ops, 266 sizeof(struct proc_maps_private)); 267 } 268 269 /* 270 * Indicate if the VMA is a stack for the given task; for 271 * /proc/PID/maps that is the stack of the main task. 272 */ 273 static int is_stack(struct vm_area_struct *vma) 274 { 275 /* 276 * We make no effort to guess what a given thread considers to be 277 * its "stack". It's not even well-defined for programs written 278 * languages like Go. 279 */ 280 return vma->vm_start <= vma->vm_mm->start_stack && 281 vma->vm_end >= vma->vm_mm->start_stack; 282 } 283 284 static void show_vma_header_prefix(struct seq_file *m, 285 unsigned long start, unsigned long end, 286 vm_flags_t flags, unsigned long long pgoff, 287 dev_t dev, unsigned long ino) 288 { 289 seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); 290 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", 291 start, 292 end, 293 flags & VM_READ ? 'r' : '-', 294 flags & VM_WRITE ? 'w' : '-', 295 flags & VM_EXEC ? 'x' : '-', 296 flags & VM_MAYSHARE ? 's' : 'p', 297 pgoff, 298 MAJOR(dev), MINOR(dev), ino); 299 } 300 301 static void 302 show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) 303 { 304 struct mm_struct *mm = vma->vm_mm; 305 struct file *file = vma->vm_file; 306 vm_flags_t flags = vma->vm_flags; 307 unsigned long ino = 0; 308 unsigned long long pgoff = 0; 309 unsigned long start, end; 310 dev_t dev = 0; 311 const char *name = NULL; 312 313 if (file) { 314 struct inode *inode = file_inode(vma->vm_file); 315 dev = inode->i_sb->s_dev; 316 ino = inode->i_ino; 317 pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT; 318 } 319 320 start = vma->vm_start; 321 end = vma->vm_end; 322 show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino); 323 324 /* 325 * Print the dentry name for named mappings, and a 326 * special [heap] marker for the heap: 327 */ 328 if (file) { 329 seq_pad(m, ' '); 330 seq_file_path(m, file, "\n"); 331 goto done; 332 } 333 334 if (vma->vm_ops && vma->vm_ops->name) { 335 name = vma->vm_ops->name(vma); 336 if (name) 337 goto done; 338 } 339 340 name = arch_vma_name(vma); 341 if (!name) { 342 if (!mm) { 343 name = "[vdso]"; 344 goto done; 345 } 346 347 if (vma->vm_start <= mm->brk && 348 vma->vm_end >= mm->start_brk) { 349 name = "[heap]"; 350 goto done; 351 } 352 353 if (is_stack(vma)) 354 name = "[stack]"; 355 } 356 357 done: 358 if (name) { 359 seq_pad(m, ' '); 360 seq_puts(m, name); 361 } 362 seq_putc(m, '\n'); 363 } 364 365 static int show_map(struct seq_file *m, void *v, int is_pid) 366 { 367 show_map_vma(m, v, is_pid); 368 m_cache_vma(m, v); 369 return 0; 370 } 371 372 static int show_pid_map(struct seq_file *m, void *v) 373 { 374 return show_map(m, v, 1); 375 } 376 377 static int show_tid_map(struct seq_file *m, void *v) 378 { 379 return show_map(m, v, 0); 380 } 381 382 static const struct seq_operations proc_pid_maps_op = { 383 .start = m_start, 384 .next = m_next, 385 .stop = m_stop, 386 .show = show_pid_map 387 }; 388 389 static const struct seq_operations proc_tid_maps_op = { 390 .start = m_start, 391 .next = m_next, 392 .stop = m_stop, 393 .show = show_tid_map 394 }; 395 396 static int pid_maps_open(struct inode *inode, struct file *file) 397 { 398 return do_maps_open(inode, file, &proc_pid_maps_op); 399 } 400 401 static int tid_maps_open(struct inode *inode, struct file *file) 402 { 403 return do_maps_open(inode, file, &proc_tid_maps_op); 404 } 405 406 const struct file_operations proc_pid_maps_operations = { 407 .open = pid_maps_open, 408 .read = seq_read, 409 .llseek = seq_lseek, 410 .release = proc_map_release, 411 }; 412 413 const struct file_operations proc_tid_maps_operations = { 414 .open = tid_maps_open, 415 .read = seq_read, 416 .llseek = seq_lseek, 417 .release = proc_map_release, 418 }; 419 420 /* 421 * Proportional Set Size(PSS): my share of RSS. 422 * 423 * PSS of a process is the count of pages it has in memory, where each 424 * page is divided by the number of processes sharing it. So if a 425 * process has 1000 pages all to itself, and 1000 shared with one other 426 * process, its PSS will be 1500. 427 * 428 * To keep (accumulated) division errors low, we adopt a 64bit 429 * fixed-point pss counter to minimize division errors. So (pss >> 430 * PSS_SHIFT) would be the real byte count. 431 * 432 * A shift of 12 before division means (assuming 4K page size): 433 * - 1M 3-user-pages add up to 8KB errors; 434 * - supports mapcount up to 2^24, or 16M; 435 * - supports PSS up to 2^52 bytes, or 4PB. 436 */ 437 #define PSS_SHIFT 12 438 439 #ifdef CONFIG_PROC_PAGE_MONITOR 440 struct mem_size_stats { 441 bool first; 442 unsigned long resident; 443 unsigned long shared_clean; 444 unsigned long shared_dirty; 445 unsigned long private_clean; 446 unsigned long private_dirty; 447 unsigned long referenced; 448 unsigned long anonymous; 449 unsigned long lazyfree; 450 unsigned long anonymous_thp; 451 unsigned long shmem_thp; 452 unsigned long swap; 453 unsigned long shared_hugetlb; 454 unsigned long private_hugetlb; 455 unsigned long first_vma_start; 456 u64 pss; 457 u64 pss_locked; 458 u64 swap_pss; 459 bool check_shmem_swap; 460 }; 461 462 static void smaps_account(struct mem_size_stats *mss, struct page *page, 463 bool compound, bool young, bool dirty) 464 { 465 int i, nr = compound ? 1 << compound_order(page) : 1; 466 unsigned long size = nr * PAGE_SIZE; 467 468 if (PageAnon(page)) { 469 mss->anonymous += size; 470 if (!PageSwapBacked(page) && !dirty && !PageDirty(page)) 471 mss->lazyfree += size; 472 } 473 474 mss->resident += size; 475 /* Accumulate the size in pages that have been accessed. */ 476 if (young || page_is_young(page) || PageReferenced(page)) 477 mss->referenced += size; 478 479 /* 480 * page_count(page) == 1 guarantees the page is mapped exactly once. 481 * If any subpage of the compound page mapped with PTE it would elevate 482 * page_count(). 483 */ 484 if (page_count(page) == 1) { 485 if (dirty || PageDirty(page)) 486 mss->private_dirty += size; 487 else 488 mss->private_clean += size; 489 mss->pss += (u64)size << PSS_SHIFT; 490 return; 491 } 492 493 for (i = 0; i < nr; i++, page++) { 494 int mapcount = page_mapcount(page); 495 496 if (mapcount >= 2) { 497 if (dirty || PageDirty(page)) 498 mss->shared_dirty += PAGE_SIZE; 499 else 500 mss->shared_clean += PAGE_SIZE; 501 mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; 502 } else { 503 if (dirty || PageDirty(page)) 504 mss->private_dirty += PAGE_SIZE; 505 else 506 mss->private_clean += PAGE_SIZE; 507 mss->pss += PAGE_SIZE << PSS_SHIFT; 508 } 509 } 510 } 511 512 #ifdef CONFIG_SHMEM 513 static int smaps_pte_hole(unsigned long addr, unsigned long end, 514 struct mm_walk *walk) 515 { 516 struct mem_size_stats *mss = walk->private; 517 518 mss->swap += shmem_partial_swap_usage( 519 walk->vma->vm_file->f_mapping, addr, end); 520 521 return 0; 522 } 523 #endif 524 525 static void smaps_pte_entry(pte_t *pte, unsigned long addr, 526 struct mm_walk *walk) 527 { 528 struct mem_size_stats *mss = walk->private; 529 struct vm_area_struct *vma = walk->vma; 530 struct page *page = NULL; 531 532 if (pte_present(*pte)) { 533 page = vm_normal_page(vma, addr, *pte); 534 } else if (is_swap_pte(*pte)) { 535 swp_entry_t swpent = pte_to_swp_entry(*pte); 536 537 if (!non_swap_entry(swpent)) { 538 int mapcount; 539 540 mss->swap += PAGE_SIZE; 541 mapcount = swp_swapcount(swpent); 542 if (mapcount >= 2) { 543 u64 pss_delta = (u64)PAGE_SIZE << PSS_SHIFT; 544 545 do_div(pss_delta, mapcount); 546 mss->swap_pss += pss_delta; 547 } else { 548 mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT; 549 } 550 } else if (is_migration_entry(swpent)) 551 page = migration_entry_to_page(swpent); 552 else if (is_device_private_entry(swpent)) 553 page = device_private_entry_to_page(swpent); 554 } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap 555 && pte_none(*pte))) { 556 page = find_get_entry(vma->vm_file->f_mapping, 557 linear_page_index(vma, addr)); 558 if (!page) 559 return; 560 561 if (radix_tree_exceptional_entry(page)) 562 mss->swap += PAGE_SIZE; 563 else 564 put_page(page); 565 566 return; 567 } 568 569 if (!page) 570 return; 571 572 smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte)); 573 } 574 575 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 576 static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, 577 struct mm_walk *walk) 578 { 579 struct mem_size_stats *mss = walk->private; 580 struct vm_area_struct *vma = walk->vma; 581 struct page *page; 582 583 /* FOLL_DUMP will return -EFAULT on huge zero page */ 584 page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP); 585 if (IS_ERR_OR_NULL(page)) 586 return; 587 if (PageAnon(page)) 588 mss->anonymous_thp += HPAGE_PMD_SIZE; 589 else if (PageSwapBacked(page)) 590 mss->shmem_thp += HPAGE_PMD_SIZE; 591 else if (is_zone_device_page(page)) 592 /* pass */; 593 else 594 VM_BUG_ON_PAGE(1, page); 595 smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd)); 596 } 597 #else 598 static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, 599 struct mm_walk *walk) 600 { 601 } 602 #endif 603 604 static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 605 struct mm_walk *walk) 606 { 607 struct vm_area_struct *vma = walk->vma; 608 pte_t *pte; 609 spinlock_t *ptl; 610 611 ptl = pmd_trans_huge_lock(pmd, vma); 612 if (ptl) { 613 if (pmd_present(*pmd)) 614 smaps_pmd_entry(pmd, addr, walk); 615 spin_unlock(ptl); 616 goto out; 617 } 618 619 if (pmd_trans_unstable(pmd)) 620 goto out; 621 /* 622 * The mmap_sem held all the way back in m_start() is what 623 * keeps khugepaged out of here and from collapsing things 624 * in here. 625 */ 626 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 627 for (; addr != end; pte++, addr += PAGE_SIZE) 628 smaps_pte_entry(pte, addr, walk); 629 pte_unmap_unlock(pte - 1, ptl); 630 out: 631 cond_resched(); 632 return 0; 633 } 634 635 static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) 636 { 637 /* 638 * Don't forget to update Documentation/ on changes. 639 */ 640 static const char mnemonics[BITS_PER_LONG][2] = { 641 /* 642 * In case if we meet a flag we don't know about. 643 */ 644 [0 ... (BITS_PER_LONG-1)] = "??", 645 646 [ilog2(VM_READ)] = "rd", 647 [ilog2(VM_WRITE)] = "wr", 648 [ilog2(VM_EXEC)] = "ex", 649 [ilog2(VM_SHARED)] = "sh", 650 [ilog2(VM_MAYREAD)] = "mr", 651 [ilog2(VM_MAYWRITE)] = "mw", 652 [ilog2(VM_MAYEXEC)] = "me", 653 [ilog2(VM_MAYSHARE)] = "ms", 654 [ilog2(VM_GROWSDOWN)] = "gd", 655 [ilog2(VM_PFNMAP)] = "pf", 656 [ilog2(VM_DENYWRITE)] = "dw", 657 #ifdef CONFIG_X86_INTEL_MPX 658 [ilog2(VM_MPX)] = "mp", 659 #endif 660 [ilog2(VM_LOCKED)] = "lo", 661 [ilog2(VM_IO)] = "io", 662 [ilog2(VM_SEQ_READ)] = "sr", 663 [ilog2(VM_RAND_READ)] = "rr", 664 [ilog2(VM_DONTCOPY)] = "dc", 665 [ilog2(VM_DONTEXPAND)] = "de", 666 [ilog2(VM_ACCOUNT)] = "ac", 667 [ilog2(VM_NORESERVE)] = "nr", 668 [ilog2(VM_HUGETLB)] = "ht", 669 [ilog2(VM_SYNC)] = "sf", 670 [ilog2(VM_ARCH_1)] = "ar", 671 [ilog2(VM_WIPEONFORK)] = "wf", 672 [ilog2(VM_DONTDUMP)] = "dd", 673 #ifdef CONFIG_MEM_SOFT_DIRTY 674 [ilog2(VM_SOFTDIRTY)] = "sd", 675 #endif 676 [ilog2(VM_MIXEDMAP)] = "mm", 677 [ilog2(VM_HUGEPAGE)] = "hg", 678 [ilog2(VM_NOHUGEPAGE)] = "nh", 679 [ilog2(VM_MERGEABLE)] = "mg", 680 [ilog2(VM_UFFD_MISSING)]= "um", 681 [ilog2(VM_UFFD_WP)] = "uw", 682 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS 683 /* These come out via ProtectionKey: */ 684 [ilog2(VM_PKEY_BIT0)] = "", 685 [ilog2(VM_PKEY_BIT1)] = "", 686 [ilog2(VM_PKEY_BIT2)] = "", 687 [ilog2(VM_PKEY_BIT3)] = "", 688 #endif 689 }; 690 size_t i; 691 692 seq_puts(m, "VmFlags: "); 693 for (i = 0; i < BITS_PER_LONG; i++) { 694 if (!mnemonics[i][0]) 695 continue; 696 if (vma->vm_flags & (1UL << i)) { 697 seq_printf(m, "%c%c ", 698 mnemonics[i][0], mnemonics[i][1]); 699 } 700 } 701 seq_putc(m, '\n'); 702 } 703 704 #ifdef CONFIG_HUGETLB_PAGE 705 static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, 706 unsigned long addr, unsigned long end, 707 struct mm_walk *walk) 708 { 709 struct mem_size_stats *mss = walk->private; 710 struct vm_area_struct *vma = walk->vma; 711 struct page *page = NULL; 712 713 if (pte_present(*pte)) { 714 page = vm_normal_page(vma, addr, *pte); 715 } else if (is_swap_pte(*pte)) { 716 swp_entry_t swpent = pte_to_swp_entry(*pte); 717 718 if (is_migration_entry(swpent)) 719 page = migration_entry_to_page(swpent); 720 else if (is_device_private_entry(swpent)) 721 page = device_private_entry_to_page(swpent); 722 } 723 if (page) { 724 int mapcount = page_mapcount(page); 725 726 if (mapcount >= 2) 727 mss->shared_hugetlb += huge_page_size(hstate_vma(vma)); 728 else 729 mss->private_hugetlb += huge_page_size(hstate_vma(vma)); 730 } 731 return 0; 732 } 733 #endif /* HUGETLB_PAGE */ 734 735 void __weak arch_show_smap(struct seq_file *m, struct vm_area_struct *vma) 736 { 737 } 738 739 static int show_smap(struct seq_file *m, void *v, int is_pid) 740 { 741 struct proc_maps_private *priv = m->private; 742 struct vm_area_struct *vma = v; 743 struct mem_size_stats mss_stack; 744 struct mem_size_stats *mss; 745 struct mm_walk smaps_walk = { 746 .pmd_entry = smaps_pte_range, 747 #ifdef CONFIG_HUGETLB_PAGE 748 .hugetlb_entry = smaps_hugetlb_range, 749 #endif 750 .mm = vma->vm_mm, 751 }; 752 int ret = 0; 753 bool rollup_mode; 754 bool last_vma; 755 756 if (priv->rollup) { 757 rollup_mode = true; 758 mss = priv->rollup; 759 if (mss->first) { 760 mss->first_vma_start = vma->vm_start; 761 mss->first = false; 762 } 763 last_vma = !m_next_vma(priv, vma); 764 } else { 765 rollup_mode = false; 766 memset(&mss_stack, 0, sizeof(mss_stack)); 767 mss = &mss_stack; 768 } 769 770 smaps_walk.private = mss; 771 772 #ifdef CONFIG_SHMEM 773 if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) { 774 /* 775 * For shared or readonly shmem mappings we know that all 776 * swapped out pages belong to the shmem object, and we can 777 * obtain the swap value much more efficiently. For private 778 * writable mappings, we might have COW pages that are 779 * not affected by the parent swapped out pages of the shmem 780 * object, so we have to distinguish them during the page walk. 781 * Unless we know that the shmem object (or the part mapped by 782 * our VMA) has no swapped out pages at all. 783 */ 784 unsigned long shmem_swapped = shmem_swap_usage(vma); 785 786 if (!shmem_swapped || (vma->vm_flags & VM_SHARED) || 787 !(vma->vm_flags & VM_WRITE)) { 788 mss->swap = shmem_swapped; 789 } else { 790 mss->check_shmem_swap = true; 791 smaps_walk.pte_hole = smaps_pte_hole; 792 } 793 } 794 #endif 795 796 /* mmap_sem is held in m_start */ 797 walk_page_vma(vma, &smaps_walk); 798 if (vma->vm_flags & VM_LOCKED) 799 mss->pss_locked += mss->pss; 800 801 if (!rollup_mode) { 802 show_map_vma(m, vma, is_pid); 803 } else if (last_vma) { 804 show_vma_header_prefix( 805 m, mss->first_vma_start, vma->vm_end, 0, 0, 0, 0); 806 seq_pad(m, ' '); 807 seq_puts(m, "[rollup]\n"); 808 } else { 809 ret = SEQ_SKIP; 810 } 811 812 if (!rollup_mode) 813 seq_printf(m, 814 "Size: %8lu kB\n" 815 "KernelPageSize: %8lu kB\n" 816 "MMUPageSize: %8lu kB\n", 817 (vma->vm_end - vma->vm_start) >> 10, 818 vma_kernel_pagesize(vma) >> 10, 819 vma_mmu_pagesize(vma) >> 10); 820 821 822 if (!rollup_mode || last_vma) 823 seq_printf(m, 824 "Rss: %8lu kB\n" 825 "Pss: %8lu kB\n" 826 "Shared_Clean: %8lu kB\n" 827 "Shared_Dirty: %8lu kB\n" 828 "Private_Clean: %8lu kB\n" 829 "Private_Dirty: %8lu kB\n" 830 "Referenced: %8lu kB\n" 831 "Anonymous: %8lu kB\n" 832 "LazyFree: %8lu kB\n" 833 "AnonHugePages: %8lu kB\n" 834 "ShmemPmdMapped: %8lu kB\n" 835 "Shared_Hugetlb: %8lu kB\n" 836 "Private_Hugetlb: %7lu kB\n" 837 "Swap: %8lu kB\n" 838 "SwapPss: %8lu kB\n" 839 "Locked: %8lu kB\n", 840 mss->resident >> 10, 841 (unsigned long)(mss->pss >> (10 + PSS_SHIFT)), 842 mss->shared_clean >> 10, 843 mss->shared_dirty >> 10, 844 mss->private_clean >> 10, 845 mss->private_dirty >> 10, 846 mss->referenced >> 10, 847 mss->anonymous >> 10, 848 mss->lazyfree >> 10, 849 mss->anonymous_thp >> 10, 850 mss->shmem_thp >> 10, 851 mss->shared_hugetlb >> 10, 852 mss->private_hugetlb >> 10, 853 mss->swap >> 10, 854 (unsigned long)(mss->swap_pss >> (10 + PSS_SHIFT)), 855 (unsigned long)(mss->pss >> (10 + PSS_SHIFT))); 856 857 if (!rollup_mode) { 858 arch_show_smap(m, vma); 859 show_smap_vma_flags(m, vma); 860 } 861 m_cache_vma(m, vma); 862 return ret; 863 } 864 865 static int show_pid_smap(struct seq_file *m, void *v) 866 { 867 return show_smap(m, v, 1); 868 } 869 870 static int show_tid_smap(struct seq_file *m, void *v) 871 { 872 return show_smap(m, v, 0); 873 } 874 875 static const struct seq_operations proc_pid_smaps_op = { 876 .start = m_start, 877 .next = m_next, 878 .stop = m_stop, 879 .show = show_pid_smap 880 }; 881 882 static const struct seq_operations proc_tid_smaps_op = { 883 .start = m_start, 884 .next = m_next, 885 .stop = m_stop, 886 .show = show_tid_smap 887 }; 888 889 static int pid_smaps_open(struct inode *inode, struct file *file) 890 { 891 return do_maps_open(inode, file, &proc_pid_smaps_op); 892 } 893 894 static int pid_smaps_rollup_open(struct inode *inode, struct file *file) 895 { 896 struct seq_file *seq; 897 struct proc_maps_private *priv; 898 int ret = do_maps_open(inode, file, &proc_pid_smaps_op); 899 900 if (ret < 0) 901 return ret; 902 seq = file->private_data; 903 priv = seq->private; 904 priv->rollup = kzalloc(sizeof(*priv->rollup), GFP_KERNEL); 905 if (!priv->rollup) { 906 proc_map_release(inode, file); 907 return -ENOMEM; 908 } 909 priv->rollup->first = true; 910 return 0; 911 } 912 913 static int tid_smaps_open(struct inode *inode, struct file *file) 914 { 915 return do_maps_open(inode, file, &proc_tid_smaps_op); 916 } 917 918 const struct file_operations proc_pid_smaps_operations = { 919 .open = pid_smaps_open, 920 .read = seq_read, 921 .llseek = seq_lseek, 922 .release = proc_map_release, 923 }; 924 925 const struct file_operations proc_pid_smaps_rollup_operations = { 926 .open = pid_smaps_rollup_open, 927 .read = seq_read, 928 .llseek = seq_lseek, 929 .release = proc_map_release, 930 }; 931 932 const struct file_operations proc_tid_smaps_operations = { 933 .open = tid_smaps_open, 934 .read = seq_read, 935 .llseek = seq_lseek, 936 .release = proc_map_release, 937 }; 938 939 enum clear_refs_types { 940 CLEAR_REFS_ALL = 1, 941 CLEAR_REFS_ANON, 942 CLEAR_REFS_MAPPED, 943 CLEAR_REFS_SOFT_DIRTY, 944 CLEAR_REFS_MM_HIWATER_RSS, 945 CLEAR_REFS_LAST, 946 }; 947 948 struct clear_refs_private { 949 enum clear_refs_types type; 950 }; 951 952 #ifdef CONFIG_MEM_SOFT_DIRTY 953 static inline void clear_soft_dirty(struct vm_area_struct *vma, 954 unsigned long addr, pte_t *pte) 955 { 956 /* 957 * The soft-dirty tracker uses #PF-s to catch writes 958 * to pages, so write-protect the pte as well. See the 959 * Documentation/vm/soft-dirty.txt for full description 960 * of how soft-dirty works. 961 */ 962 pte_t ptent = *pte; 963 964 if (pte_present(ptent)) { 965 ptent = ptep_modify_prot_start(vma->vm_mm, addr, pte); 966 ptent = pte_wrprotect(ptent); 967 ptent = pte_clear_soft_dirty(ptent); 968 ptep_modify_prot_commit(vma->vm_mm, addr, pte, ptent); 969 } else if (is_swap_pte(ptent)) { 970 ptent = pte_swp_clear_soft_dirty(ptent); 971 set_pte_at(vma->vm_mm, addr, pte, ptent); 972 } 973 } 974 #else 975 static inline void clear_soft_dirty(struct vm_area_struct *vma, 976 unsigned long addr, pte_t *pte) 977 { 978 } 979 #endif 980 981 #if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE) 982 static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, 983 unsigned long addr, pmd_t *pmdp) 984 { 985 pmd_t old, pmd = *pmdp; 986 987 if (pmd_present(pmd)) { 988 /* See comment in change_huge_pmd() */ 989 old = pmdp_invalidate(vma, addr, pmdp); 990 if (pmd_dirty(old)) 991 pmd = pmd_mkdirty(pmd); 992 if (pmd_young(old)) 993 pmd = pmd_mkyoung(pmd); 994 995 pmd = pmd_wrprotect(pmd); 996 pmd = pmd_clear_soft_dirty(pmd); 997 998 set_pmd_at(vma->vm_mm, addr, pmdp, pmd); 999 } else if (is_migration_entry(pmd_to_swp_entry(pmd))) { 1000 pmd = pmd_swp_clear_soft_dirty(pmd); 1001 set_pmd_at(vma->vm_mm, addr, pmdp, pmd); 1002 } 1003 } 1004 #else 1005 static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, 1006 unsigned long addr, pmd_t *pmdp) 1007 { 1008 } 1009 #endif 1010 1011 static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, 1012 unsigned long end, struct mm_walk *walk) 1013 { 1014 struct clear_refs_private *cp = walk->private; 1015 struct vm_area_struct *vma = walk->vma; 1016 pte_t *pte, ptent; 1017 spinlock_t *ptl; 1018 struct page *page; 1019 1020 ptl = pmd_trans_huge_lock(pmd, vma); 1021 if (ptl) { 1022 if (cp->type == CLEAR_REFS_SOFT_DIRTY) { 1023 clear_soft_dirty_pmd(vma, addr, pmd); 1024 goto out; 1025 } 1026 1027 if (!pmd_present(*pmd)) 1028 goto out; 1029 1030 page = pmd_page(*pmd); 1031 1032 /* Clear accessed and referenced bits. */ 1033 pmdp_test_and_clear_young(vma, addr, pmd); 1034 test_and_clear_page_young(page); 1035 ClearPageReferenced(page); 1036 out: 1037 spin_unlock(ptl); 1038 return 0; 1039 } 1040 1041 if (pmd_trans_unstable(pmd)) 1042 return 0; 1043 1044 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 1045 for (; addr != end; pte++, addr += PAGE_SIZE) { 1046 ptent = *pte; 1047 1048 if (cp->type == CLEAR_REFS_SOFT_DIRTY) { 1049 clear_soft_dirty(vma, addr, pte); 1050 continue; 1051 } 1052 1053 if (!pte_present(ptent)) 1054 continue; 1055 1056 page = vm_normal_page(vma, addr, ptent); 1057 if (!page) 1058 continue; 1059 1060 /* Clear accessed and referenced bits. */ 1061 ptep_test_and_clear_young(vma, addr, pte); 1062 test_and_clear_page_young(page); 1063 ClearPageReferenced(page); 1064 } 1065 pte_unmap_unlock(pte - 1, ptl); 1066 cond_resched(); 1067 return 0; 1068 } 1069 1070 static int clear_refs_test_walk(unsigned long start, unsigned long end, 1071 struct mm_walk *walk) 1072 { 1073 struct clear_refs_private *cp = walk->private; 1074 struct vm_area_struct *vma = walk->vma; 1075 1076 if (vma->vm_flags & VM_PFNMAP) 1077 return 1; 1078 1079 /* 1080 * Writing 1 to /proc/pid/clear_refs affects all pages. 1081 * Writing 2 to /proc/pid/clear_refs only affects anonymous pages. 1082 * Writing 3 to /proc/pid/clear_refs only affects file mapped pages. 1083 * Writing 4 to /proc/pid/clear_refs affects all pages. 1084 */ 1085 if (cp->type == CLEAR_REFS_ANON && vma->vm_file) 1086 return 1; 1087 if (cp->type == CLEAR_REFS_MAPPED && !vma->vm_file) 1088 return 1; 1089 return 0; 1090 } 1091 1092 static ssize_t clear_refs_write(struct file *file, const char __user *buf, 1093 size_t count, loff_t *ppos) 1094 { 1095 struct task_struct *task; 1096 char buffer[PROC_NUMBUF]; 1097 struct mm_struct *mm; 1098 struct vm_area_struct *vma; 1099 enum clear_refs_types type; 1100 struct mmu_gather tlb; 1101 int itype; 1102 int rv; 1103 1104 memset(buffer, 0, sizeof(buffer)); 1105 if (count > sizeof(buffer) - 1) 1106 count = sizeof(buffer) - 1; 1107 if (copy_from_user(buffer, buf, count)) 1108 return -EFAULT; 1109 rv = kstrtoint(strstrip(buffer), 10, &itype); 1110 if (rv < 0) 1111 return rv; 1112 type = (enum clear_refs_types)itype; 1113 if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST) 1114 return -EINVAL; 1115 1116 task = get_proc_task(file_inode(file)); 1117 if (!task) 1118 return -ESRCH; 1119 mm = get_task_mm(task); 1120 if (mm) { 1121 struct clear_refs_private cp = { 1122 .type = type, 1123 }; 1124 struct mm_walk clear_refs_walk = { 1125 .pmd_entry = clear_refs_pte_range, 1126 .test_walk = clear_refs_test_walk, 1127 .mm = mm, 1128 .private = &cp, 1129 }; 1130 1131 if (type == CLEAR_REFS_MM_HIWATER_RSS) { 1132 if (down_write_killable(&mm->mmap_sem)) { 1133 count = -EINTR; 1134 goto out_mm; 1135 } 1136 1137 /* 1138 * Writing 5 to /proc/pid/clear_refs resets the peak 1139 * resident set size to this mm's current rss value. 1140 */ 1141 reset_mm_hiwater_rss(mm); 1142 up_write(&mm->mmap_sem); 1143 goto out_mm; 1144 } 1145 1146 down_read(&mm->mmap_sem); 1147 tlb_gather_mmu(&tlb, mm, 0, -1); 1148 if (type == CLEAR_REFS_SOFT_DIRTY) { 1149 for (vma = mm->mmap; vma; vma = vma->vm_next) { 1150 if (!(vma->vm_flags & VM_SOFTDIRTY)) 1151 continue; 1152 up_read(&mm->mmap_sem); 1153 if (down_write_killable(&mm->mmap_sem)) { 1154 count = -EINTR; 1155 goto out_mm; 1156 } 1157 for (vma = mm->mmap; vma; vma = vma->vm_next) { 1158 vma->vm_flags &= ~VM_SOFTDIRTY; 1159 vma_set_page_prot(vma); 1160 } 1161 downgrade_write(&mm->mmap_sem); 1162 break; 1163 } 1164 mmu_notifier_invalidate_range_start(mm, 0, -1); 1165 } 1166 walk_page_range(0, mm->highest_vm_end, &clear_refs_walk); 1167 if (type == CLEAR_REFS_SOFT_DIRTY) 1168 mmu_notifier_invalidate_range_end(mm, 0, -1); 1169 tlb_finish_mmu(&tlb, 0, -1); 1170 up_read(&mm->mmap_sem); 1171 out_mm: 1172 mmput(mm); 1173 } 1174 put_task_struct(task); 1175 1176 return count; 1177 } 1178 1179 const struct file_operations proc_clear_refs_operations = { 1180 .write = clear_refs_write, 1181 .llseek = noop_llseek, 1182 }; 1183 1184 typedef struct { 1185 u64 pme; 1186 } pagemap_entry_t; 1187 1188 struct pagemapread { 1189 int pos, len; /* units: PM_ENTRY_BYTES, not bytes */ 1190 pagemap_entry_t *buffer; 1191 bool show_pfn; 1192 }; 1193 1194 #define PAGEMAP_WALK_SIZE (PMD_SIZE) 1195 #define PAGEMAP_WALK_MASK (PMD_MASK) 1196 1197 #define PM_ENTRY_BYTES sizeof(pagemap_entry_t) 1198 #define PM_PFRAME_BITS 55 1199 #define PM_PFRAME_MASK GENMASK_ULL(PM_PFRAME_BITS - 1, 0) 1200 #define PM_SOFT_DIRTY BIT_ULL(55) 1201 #define PM_MMAP_EXCLUSIVE BIT_ULL(56) 1202 #define PM_FILE BIT_ULL(61) 1203 #define PM_SWAP BIT_ULL(62) 1204 #define PM_PRESENT BIT_ULL(63) 1205 1206 #define PM_END_OF_BUFFER 1 1207 1208 static inline pagemap_entry_t make_pme(u64 frame, u64 flags) 1209 { 1210 return (pagemap_entry_t) { .pme = (frame & PM_PFRAME_MASK) | flags }; 1211 } 1212 1213 static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme, 1214 struct pagemapread *pm) 1215 { 1216 pm->buffer[pm->pos++] = *pme; 1217 if (pm->pos >= pm->len) 1218 return PM_END_OF_BUFFER; 1219 return 0; 1220 } 1221 1222 static int pagemap_pte_hole(unsigned long start, unsigned long end, 1223 struct mm_walk *walk) 1224 { 1225 struct pagemapread *pm = walk->private; 1226 unsigned long addr = start; 1227 int err = 0; 1228 1229 while (addr < end) { 1230 struct vm_area_struct *vma = find_vma(walk->mm, addr); 1231 pagemap_entry_t pme = make_pme(0, 0); 1232 /* End of address space hole, which we mark as non-present. */ 1233 unsigned long hole_end; 1234 1235 if (vma) 1236 hole_end = min(end, vma->vm_start); 1237 else 1238 hole_end = end; 1239 1240 for (; addr < hole_end; addr += PAGE_SIZE) { 1241 err = add_to_pagemap(addr, &pme, pm); 1242 if (err) 1243 goto out; 1244 } 1245 1246 if (!vma) 1247 break; 1248 1249 /* Addresses in the VMA. */ 1250 if (vma->vm_flags & VM_SOFTDIRTY) 1251 pme = make_pme(0, PM_SOFT_DIRTY); 1252 for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) { 1253 err = add_to_pagemap(addr, &pme, pm); 1254 if (err) 1255 goto out; 1256 } 1257 } 1258 out: 1259 return err; 1260 } 1261 1262 static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, 1263 struct vm_area_struct *vma, unsigned long addr, pte_t pte) 1264 { 1265 u64 frame = 0, flags = 0; 1266 struct page *page = NULL; 1267 1268 if (pte_present(pte)) { 1269 if (pm->show_pfn) 1270 frame = pte_pfn(pte); 1271 flags |= PM_PRESENT; 1272 page = _vm_normal_page(vma, addr, pte, true); 1273 if (pte_soft_dirty(pte)) 1274 flags |= PM_SOFT_DIRTY; 1275 } else if (is_swap_pte(pte)) { 1276 swp_entry_t entry; 1277 if (pte_swp_soft_dirty(pte)) 1278 flags |= PM_SOFT_DIRTY; 1279 entry = pte_to_swp_entry(pte); 1280 frame = swp_type(entry) | 1281 (swp_offset(entry) << MAX_SWAPFILES_SHIFT); 1282 flags |= PM_SWAP; 1283 if (is_migration_entry(entry)) 1284 page = migration_entry_to_page(entry); 1285 1286 if (is_device_private_entry(entry)) 1287 page = device_private_entry_to_page(entry); 1288 } 1289 1290 if (page && !PageAnon(page)) 1291 flags |= PM_FILE; 1292 if (page && page_mapcount(page) == 1) 1293 flags |= PM_MMAP_EXCLUSIVE; 1294 if (vma->vm_flags & VM_SOFTDIRTY) 1295 flags |= PM_SOFT_DIRTY; 1296 1297 return make_pme(frame, flags); 1298 } 1299 1300 static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, 1301 struct mm_walk *walk) 1302 { 1303 struct vm_area_struct *vma = walk->vma; 1304 struct pagemapread *pm = walk->private; 1305 spinlock_t *ptl; 1306 pte_t *pte, *orig_pte; 1307 int err = 0; 1308 1309 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1310 ptl = pmd_trans_huge_lock(pmdp, vma); 1311 if (ptl) { 1312 u64 flags = 0, frame = 0; 1313 pmd_t pmd = *pmdp; 1314 struct page *page = NULL; 1315 1316 if (vma->vm_flags & VM_SOFTDIRTY) 1317 flags |= PM_SOFT_DIRTY; 1318 1319 if (pmd_present(pmd)) { 1320 page = pmd_page(pmd); 1321 1322 flags |= PM_PRESENT; 1323 if (pmd_soft_dirty(pmd)) 1324 flags |= PM_SOFT_DIRTY; 1325 if (pm->show_pfn) 1326 frame = pmd_pfn(pmd) + 1327 ((addr & ~PMD_MASK) >> PAGE_SHIFT); 1328 } 1329 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION 1330 else if (is_swap_pmd(pmd)) { 1331 swp_entry_t entry = pmd_to_swp_entry(pmd); 1332 1333 frame = swp_type(entry) | 1334 (swp_offset(entry) << MAX_SWAPFILES_SHIFT); 1335 flags |= PM_SWAP; 1336 if (pmd_swp_soft_dirty(pmd)) 1337 flags |= PM_SOFT_DIRTY; 1338 VM_BUG_ON(!is_pmd_migration_entry(pmd)); 1339 page = migration_entry_to_page(entry); 1340 } 1341 #endif 1342 1343 if (page && page_mapcount(page) == 1) 1344 flags |= PM_MMAP_EXCLUSIVE; 1345 1346 for (; addr != end; addr += PAGE_SIZE) { 1347 pagemap_entry_t pme = make_pme(frame, flags); 1348 1349 err = add_to_pagemap(addr, &pme, pm); 1350 if (err) 1351 break; 1352 if (pm->show_pfn && (flags & PM_PRESENT)) 1353 frame++; 1354 } 1355 spin_unlock(ptl); 1356 return err; 1357 } 1358 1359 if (pmd_trans_unstable(pmdp)) 1360 return 0; 1361 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1362 1363 /* 1364 * We can assume that @vma always points to a valid one and @end never 1365 * goes beyond vma->vm_end. 1366 */ 1367 orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl); 1368 for (; addr < end; pte++, addr += PAGE_SIZE) { 1369 pagemap_entry_t pme; 1370 1371 pme = pte_to_pagemap_entry(pm, vma, addr, *pte); 1372 err = add_to_pagemap(addr, &pme, pm); 1373 if (err) 1374 break; 1375 } 1376 pte_unmap_unlock(orig_pte, ptl); 1377 1378 cond_resched(); 1379 1380 return err; 1381 } 1382 1383 #ifdef CONFIG_HUGETLB_PAGE 1384 /* This function walks within one hugetlb entry in the single call */ 1385 static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, 1386 unsigned long addr, unsigned long end, 1387 struct mm_walk *walk) 1388 { 1389 struct pagemapread *pm = walk->private; 1390 struct vm_area_struct *vma = walk->vma; 1391 u64 flags = 0, frame = 0; 1392 int err = 0; 1393 pte_t pte; 1394 1395 if (vma->vm_flags & VM_SOFTDIRTY) 1396 flags |= PM_SOFT_DIRTY; 1397 1398 pte = huge_ptep_get(ptep); 1399 if (pte_present(pte)) { 1400 struct page *page = pte_page(pte); 1401 1402 if (!PageAnon(page)) 1403 flags |= PM_FILE; 1404 1405 if (page_mapcount(page) == 1) 1406 flags |= PM_MMAP_EXCLUSIVE; 1407 1408 flags |= PM_PRESENT; 1409 if (pm->show_pfn) 1410 frame = pte_pfn(pte) + 1411 ((addr & ~hmask) >> PAGE_SHIFT); 1412 } 1413 1414 for (; addr != end; addr += PAGE_SIZE) { 1415 pagemap_entry_t pme = make_pme(frame, flags); 1416 1417 err = add_to_pagemap(addr, &pme, pm); 1418 if (err) 1419 return err; 1420 if (pm->show_pfn && (flags & PM_PRESENT)) 1421 frame++; 1422 } 1423 1424 cond_resched(); 1425 1426 return err; 1427 } 1428 #endif /* HUGETLB_PAGE */ 1429 1430 /* 1431 * /proc/pid/pagemap - an array mapping virtual pages to pfns 1432 * 1433 * For each page in the address space, this file contains one 64-bit entry 1434 * consisting of the following: 1435 * 1436 * Bits 0-54 page frame number (PFN) if present 1437 * Bits 0-4 swap type if swapped 1438 * Bits 5-54 swap offset if swapped 1439 * Bit 55 pte is soft-dirty (see Documentation/vm/soft-dirty.txt) 1440 * Bit 56 page exclusively mapped 1441 * Bits 57-60 zero 1442 * Bit 61 page is file-page or shared-anon 1443 * Bit 62 page swapped 1444 * Bit 63 page present 1445 * 1446 * If the page is not present but in swap, then the PFN contains an 1447 * encoding of the swap file number and the page's offset into the 1448 * swap. Unmapped pages return a null PFN. This allows determining 1449 * precisely which pages are mapped (or in swap) and comparing mapped 1450 * pages between processes. 1451 * 1452 * Efficient users of this interface will use /proc/pid/maps to 1453 * determine which areas of memory are actually mapped and llseek to 1454 * skip over unmapped regions. 1455 */ 1456 static ssize_t pagemap_read(struct file *file, char __user *buf, 1457 size_t count, loff_t *ppos) 1458 { 1459 struct mm_struct *mm = file->private_data; 1460 struct pagemapread pm; 1461 struct mm_walk pagemap_walk = {}; 1462 unsigned long src; 1463 unsigned long svpfn; 1464 unsigned long start_vaddr; 1465 unsigned long end_vaddr; 1466 int ret = 0, copied = 0; 1467 1468 if (!mm || !mmget_not_zero(mm)) 1469 goto out; 1470 1471 ret = -EINVAL; 1472 /* file position must be aligned */ 1473 if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES)) 1474 goto out_mm; 1475 1476 ret = 0; 1477 if (!count) 1478 goto out_mm; 1479 1480 /* do not disclose physical addresses: attack vector */ 1481 pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN); 1482 1483 pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); 1484 pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_KERNEL); 1485 ret = -ENOMEM; 1486 if (!pm.buffer) 1487 goto out_mm; 1488 1489 pagemap_walk.pmd_entry = pagemap_pmd_range; 1490 pagemap_walk.pte_hole = pagemap_pte_hole; 1491 #ifdef CONFIG_HUGETLB_PAGE 1492 pagemap_walk.hugetlb_entry = pagemap_hugetlb_range; 1493 #endif 1494 pagemap_walk.mm = mm; 1495 pagemap_walk.private = ± 1496 1497 src = *ppos; 1498 svpfn = src / PM_ENTRY_BYTES; 1499 start_vaddr = svpfn << PAGE_SHIFT; 1500 end_vaddr = mm->task_size; 1501 1502 /* watch out for wraparound */ 1503 if (svpfn > mm->task_size >> PAGE_SHIFT) 1504 start_vaddr = end_vaddr; 1505 1506 /* 1507 * The odds are that this will stop walking way 1508 * before end_vaddr, because the length of the 1509 * user buffer is tracked in "pm", and the walk 1510 * will stop when we hit the end of the buffer. 1511 */ 1512 ret = 0; 1513 while (count && (start_vaddr < end_vaddr)) { 1514 int len; 1515 unsigned long end; 1516 1517 pm.pos = 0; 1518 end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK; 1519 /* overflow ? */ 1520 if (end < start_vaddr || end > end_vaddr) 1521 end = end_vaddr; 1522 down_read(&mm->mmap_sem); 1523 ret = walk_page_range(start_vaddr, end, &pagemap_walk); 1524 up_read(&mm->mmap_sem); 1525 start_vaddr = end; 1526 1527 len = min(count, PM_ENTRY_BYTES * pm.pos); 1528 if (copy_to_user(buf, pm.buffer, len)) { 1529 ret = -EFAULT; 1530 goto out_free; 1531 } 1532 copied += len; 1533 buf += len; 1534 count -= len; 1535 } 1536 *ppos += copied; 1537 if (!ret || ret == PM_END_OF_BUFFER) 1538 ret = copied; 1539 1540 out_free: 1541 kfree(pm.buffer); 1542 out_mm: 1543 mmput(mm); 1544 out: 1545 return ret; 1546 } 1547 1548 static int pagemap_open(struct inode *inode, struct file *file) 1549 { 1550 struct mm_struct *mm; 1551 1552 mm = proc_mem_open(inode, PTRACE_MODE_READ); 1553 if (IS_ERR(mm)) 1554 return PTR_ERR(mm); 1555 file->private_data = mm; 1556 return 0; 1557 } 1558 1559 static int pagemap_release(struct inode *inode, struct file *file) 1560 { 1561 struct mm_struct *mm = file->private_data; 1562 1563 if (mm) 1564 mmdrop(mm); 1565 return 0; 1566 } 1567 1568 const struct file_operations proc_pagemap_operations = { 1569 .llseek = mem_lseek, /* borrow this */ 1570 .read = pagemap_read, 1571 .open = pagemap_open, 1572 .release = pagemap_release, 1573 }; 1574 #endif /* CONFIG_PROC_PAGE_MONITOR */ 1575 1576 #ifdef CONFIG_NUMA 1577 1578 struct numa_maps { 1579 unsigned long pages; 1580 unsigned long anon; 1581 unsigned long active; 1582 unsigned long writeback; 1583 unsigned long mapcount_max; 1584 unsigned long dirty; 1585 unsigned long swapcache; 1586 unsigned long node[MAX_NUMNODES]; 1587 }; 1588 1589 struct numa_maps_private { 1590 struct proc_maps_private proc_maps; 1591 struct numa_maps md; 1592 }; 1593 1594 static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty, 1595 unsigned long nr_pages) 1596 { 1597 int count = page_mapcount(page); 1598 1599 md->pages += nr_pages; 1600 if (pte_dirty || PageDirty(page)) 1601 md->dirty += nr_pages; 1602 1603 if (PageSwapCache(page)) 1604 md->swapcache += nr_pages; 1605 1606 if (PageActive(page) || PageUnevictable(page)) 1607 md->active += nr_pages; 1608 1609 if (PageWriteback(page)) 1610 md->writeback += nr_pages; 1611 1612 if (PageAnon(page)) 1613 md->anon += nr_pages; 1614 1615 if (count > md->mapcount_max) 1616 md->mapcount_max = count; 1617 1618 md->node[page_to_nid(page)] += nr_pages; 1619 } 1620 1621 static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma, 1622 unsigned long addr) 1623 { 1624 struct page *page; 1625 int nid; 1626 1627 if (!pte_present(pte)) 1628 return NULL; 1629 1630 page = vm_normal_page(vma, addr, pte); 1631 if (!page) 1632 return NULL; 1633 1634 if (PageReserved(page)) 1635 return NULL; 1636 1637 nid = page_to_nid(page); 1638 if (!node_isset(nid, node_states[N_MEMORY])) 1639 return NULL; 1640 1641 return page; 1642 } 1643 1644 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1645 static struct page *can_gather_numa_stats_pmd(pmd_t pmd, 1646 struct vm_area_struct *vma, 1647 unsigned long addr) 1648 { 1649 struct page *page; 1650 int nid; 1651 1652 if (!pmd_present(pmd)) 1653 return NULL; 1654 1655 page = vm_normal_page_pmd(vma, addr, pmd); 1656 if (!page) 1657 return NULL; 1658 1659 if (PageReserved(page)) 1660 return NULL; 1661 1662 nid = page_to_nid(page); 1663 if (!node_isset(nid, node_states[N_MEMORY])) 1664 return NULL; 1665 1666 return page; 1667 } 1668 #endif 1669 1670 static int gather_pte_stats(pmd_t *pmd, unsigned long addr, 1671 unsigned long end, struct mm_walk *walk) 1672 { 1673 struct numa_maps *md = walk->private; 1674 struct vm_area_struct *vma = walk->vma; 1675 spinlock_t *ptl; 1676 pte_t *orig_pte; 1677 pte_t *pte; 1678 1679 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1680 ptl = pmd_trans_huge_lock(pmd, vma); 1681 if (ptl) { 1682 struct page *page; 1683 1684 page = can_gather_numa_stats_pmd(*pmd, vma, addr); 1685 if (page) 1686 gather_stats(page, md, pmd_dirty(*pmd), 1687 HPAGE_PMD_SIZE/PAGE_SIZE); 1688 spin_unlock(ptl); 1689 return 0; 1690 } 1691 1692 if (pmd_trans_unstable(pmd)) 1693 return 0; 1694 #endif 1695 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 1696 do { 1697 struct page *page = can_gather_numa_stats(*pte, vma, addr); 1698 if (!page) 1699 continue; 1700 gather_stats(page, md, pte_dirty(*pte), 1); 1701 1702 } while (pte++, addr += PAGE_SIZE, addr != end); 1703 pte_unmap_unlock(orig_pte, ptl); 1704 cond_resched(); 1705 return 0; 1706 } 1707 #ifdef CONFIG_HUGETLB_PAGE 1708 static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, 1709 unsigned long addr, unsigned long end, struct mm_walk *walk) 1710 { 1711 pte_t huge_pte = huge_ptep_get(pte); 1712 struct numa_maps *md; 1713 struct page *page; 1714 1715 if (!pte_present(huge_pte)) 1716 return 0; 1717 1718 page = pte_page(huge_pte); 1719 if (!page) 1720 return 0; 1721 1722 md = walk->private; 1723 gather_stats(page, md, pte_dirty(huge_pte), 1); 1724 return 0; 1725 } 1726 1727 #else 1728 static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, 1729 unsigned long addr, unsigned long end, struct mm_walk *walk) 1730 { 1731 return 0; 1732 } 1733 #endif 1734 1735 /* 1736 * Display pages allocated per node and memory policy via /proc. 1737 */ 1738 static int show_numa_map(struct seq_file *m, void *v, int is_pid) 1739 { 1740 struct numa_maps_private *numa_priv = m->private; 1741 struct proc_maps_private *proc_priv = &numa_priv->proc_maps; 1742 struct vm_area_struct *vma = v; 1743 struct numa_maps *md = &numa_priv->md; 1744 struct file *file = vma->vm_file; 1745 struct mm_struct *mm = vma->vm_mm; 1746 struct mm_walk walk = { 1747 .hugetlb_entry = gather_hugetlb_stats, 1748 .pmd_entry = gather_pte_stats, 1749 .private = md, 1750 .mm = mm, 1751 }; 1752 struct mempolicy *pol; 1753 char buffer[64]; 1754 int nid; 1755 1756 if (!mm) 1757 return 0; 1758 1759 /* Ensure we start with an empty set of numa_maps statistics. */ 1760 memset(md, 0, sizeof(*md)); 1761 1762 pol = __get_vma_policy(vma, vma->vm_start); 1763 if (pol) { 1764 mpol_to_str(buffer, sizeof(buffer), pol); 1765 mpol_cond_put(pol); 1766 } else { 1767 mpol_to_str(buffer, sizeof(buffer), proc_priv->task_mempolicy); 1768 } 1769 1770 seq_printf(m, "%08lx %s", vma->vm_start, buffer); 1771 1772 if (file) { 1773 seq_puts(m, " file="); 1774 seq_file_path(m, file, "\n\t= "); 1775 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { 1776 seq_puts(m, " heap"); 1777 } else if (is_stack(vma)) { 1778 seq_puts(m, " stack"); 1779 } 1780 1781 if (is_vm_hugetlb_page(vma)) 1782 seq_puts(m, " huge"); 1783 1784 /* mmap_sem is held by m_start */ 1785 walk_page_vma(vma, &walk); 1786 1787 if (!md->pages) 1788 goto out; 1789 1790 if (md->anon) 1791 seq_printf(m, " anon=%lu", md->anon); 1792 1793 if (md->dirty) 1794 seq_printf(m, " dirty=%lu", md->dirty); 1795 1796 if (md->pages != md->anon && md->pages != md->dirty) 1797 seq_printf(m, " mapped=%lu", md->pages); 1798 1799 if (md->mapcount_max > 1) 1800 seq_printf(m, " mapmax=%lu", md->mapcount_max); 1801 1802 if (md->swapcache) 1803 seq_printf(m, " swapcache=%lu", md->swapcache); 1804 1805 if (md->active < md->pages && !is_vm_hugetlb_page(vma)) 1806 seq_printf(m, " active=%lu", md->active); 1807 1808 if (md->writeback) 1809 seq_printf(m, " writeback=%lu", md->writeback); 1810 1811 for_each_node_state(nid, N_MEMORY) 1812 if (md->node[nid]) 1813 seq_printf(m, " N%d=%lu", nid, md->node[nid]); 1814 1815 seq_printf(m, " kernelpagesize_kB=%lu", vma_kernel_pagesize(vma) >> 10); 1816 out: 1817 seq_putc(m, '\n'); 1818 m_cache_vma(m, vma); 1819 return 0; 1820 } 1821 1822 static int show_pid_numa_map(struct seq_file *m, void *v) 1823 { 1824 return show_numa_map(m, v, 1); 1825 } 1826 1827 static int show_tid_numa_map(struct seq_file *m, void *v) 1828 { 1829 return show_numa_map(m, v, 0); 1830 } 1831 1832 static const struct seq_operations proc_pid_numa_maps_op = { 1833 .start = m_start, 1834 .next = m_next, 1835 .stop = m_stop, 1836 .show = show_pid_numa_map, 1837 }; 1838 1839 static const struct seq_operations proc_tid_numa_maps_op = { 1840 .start = m_start, 1841 .next = m_next, 1842 .stop = m_stop, 1843 .show = show_tid_numa_map, 1844 }; 1845 1846 static int numa_maps_open(struct inode *inode, struct file *file, 1847 const struct seq_operations *ops) 1848 { 1849 return proc_maps_open(inode, file, ops, 1850 sizeof(struct numa_maps_private)); 1851 } 1852 1853 static int pid_numa_maps_open(struct inode *inode, struct file *file) 1854 { 1855 return numa_maps_open(inode, file, &proc_pid_numa_maps_op); 1856 } 1857 1858 static int tid_numa_maps_open(struct inode *inode, struct file *file) 1859 { 1860 return numa_maps_open(inode, file, &proc_tid_numa_maps_op); 1861 } 1862 1863 const struct file_operations proc_pid_numa_maps_operations = { 1864 .open = pid_numa_maps_open, 1865 .read = seq_read, 1866 .llseek = seq_lseek, 1867 .release = proc_map_release, 1868 }; 1869 1870 const struct file_operations proc_tid_numa_maps_operations = { 1871 .open = tid_numa_maps_open, 1872 .read = seq_read, 1873 .llseek = seq_lseek, 1874 .release = proc_map_release, 1875 }; 1876 #endif /* CONFIG_NUMA */ 1877