task_mmu.c (4af6600fd793023c01634cca5abfe4a2b707788f) task_mmu.c (64e455079e1bd7787cc47be30b7f601ce682a5f6)
1#include <linux/mm.h>
2#include <linux/vmacache.h>
3#include <linux/hugetlb.h>
4#include <linux/huge_mm.h>
5#include <linux/mount.h>
6#include <linux/seq_file.h>
7#include <linux/highmem.h>
8#include <linux/ptrace.h>

--- 73 unchanged lines hidden (view full) ---

82 >> PAGE_SHIFT;
83 *data = mm->total_vm - mm->shared_vm;
84 *resident = *shared + get_mm_counter(mm, MM_ANONPAGES);
85 return mm->total_vm;
86}
87
88#ifdef CONFIG_NUMA
89/*
1#include <linux/mm.h>
2#include <linux/vmacache.h>
3#include <linux/hugetlb.h>
4#include <linux/huge_mm.h>
5#include <linux/mount.h>
6#include <linux/seq_file.h>
7#include <linux/highmem.h>
8#include <linux/ptrace.h>

--- 73 unchanged lines hidden (view full) ---

82 >> PAGE_SHIFT;
83 *data = mm->total_vm - mm->shared_vm;
84 *resident = *shared + get_mm_counter(mm, MM_ANONPAGES);
85 return mm->total_vm;
86}
87
88#ifdef CONFIG_NUMA
89/*
90 * These functions are for numa_maps but called in generic **maps seq_file
91 * ->start(), ->stop() ops.
92 *
93 * numa_maps scans all vmas under mmap_sem and checks their mempolicy.
94 * Each mempolicy object is controlled by reference counting. The problem here
95 * is how to avoid accessing dead mempolicy object.
96 *
97 * Because we're holding mmap_sem while reading seq_file, it's safe to access
98 * each vma's mempolicy, no vma objects will never drop refs to mempolicy.
99 *
100 * A task's mempolicy (task->mempolicy) has different behavior. task->mempolicy
101 * is set and replaced under mmap_sem but unrefed and cleared under task_lock().
102 * So, without task_lock(), we cannot trust get_vma_policy() because we cannot
103 * gurantee the task never exits under us. But taking task_lock() around
104 * get_vma_plicy() causes lock order problem.
105 *
106 * To access task->mempolicy without lock, we hold a reference count of an
107 * object pointed by task->mempolicy and remember it. This will guarantee
108 * that task->mempolicy points to an alive object or NULL in numa_maps accesses.
90 * Save get_task_policy() for show_numa_map().
109 */
110static void hold_task_mempolicy(struct proc_maps_private *priv)
111{
112 struct task_struct *task = priv->task;
113
114 task_lock(task);
91 */
92static void hold_task_mempolicy(struct proc_maps_private *priv)
93{
94 struct task_struct *task = priv->task;
95
96 task_lock(task);
115 priv->task_mempolicy = task->mempolicy;
97 priv->task_mempolicy = get_task_policy(task);
116 mpol_get(priv->task_mempolicy);
117 task_unlock(task);
118}
119static void release_task_mempolicy(struct proc_maps_private *priv)
120{
121 mpol_put(priv->task_mempolicy);
122}
123#else
124static void hold_task_mempolicy(struct proc_maps_private *priv)
125{
126}
127static void release_task_mempolicy(struct proc_maps_private *priv)
128{
129}
130#endif
131
98 mpol_get(priv->task_mempolicy);
99 task_unlock(task);
100}
101static void release_task_mempolicy(struct proc_maps_private *priv)
102{
103 mpol_put(priv->task_mempolicy);
104}
105#else
106static void hold_task_mempolicy(struct proc_maps_private *priv)
107{
108}
109static void release_task_mempolicy(struct proc_maps_private *priv)
110{
111}
112#endif
113
132static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
114static void vma_stop(struct proc_maps_private *priv)
133{
115{
134 if (vma && vma != priv->tail_vma) {
135 struct mm_struct *mm = vma->vm_mm;
136 release_task_mempolicy(priv);
137 up_read(&mm->mmap_sem);
138 mmput(mm);
139 }
116 struct mm_struct *mm = priv->mm;
117
118 release_task_mempolicy(priv);
119 up_read(&mm->mmap_sem);
120 mmput(mm);
140}
141
121}
122
142static void *m_start(struct seq_file *m, loff_t *pos)
123static struct vm_area_struct *
124m_next_vma(struct proc_maps_private *priv, struct vm_area_struct *vma)
143{
125{
126 if (vma == priv->tail_vma)
127 return NULL;
128 return vma->vm_next ?: priv->tail_vma;
129}
130
131static void m_cache_vma(struct seq_file *m, struct vm_area_struct *vma)
132{
133 if (m->count < m->size) /* vma is copied successfully */
134 m->version = m_next_vma(m->private, vma) ? vma->vm_start : -1UL;
135}
136
137static void *m_start(struct seq_file *m, loff_t *ppos)
138{
144 struct proc_maps_private *priv = m->private;
145 unsigned long last_addr = m->version;
146 struct mm_struct *mm;
139 struct proc_maps_private *priv = m->private;
140 unsigned long last_addr = m->version;
141 struct mm_struct *mm;
147 struct vm_area_struct *vma, *tail_vma = NULL;
148 loff_t l = *pos;
142 struct vm_area_struct *vma;
143 unsigned int pos = *ppos;
149
144
150 /* Clear the per syscall fields in priv */
151 priv->task = NULL;
152 priv->tail_vma = NULL;
153
154 /*
155 * We remember last_addr rather than next_addr to hit with
156 * vmacache most of the time. We have zero last_addr at
157 * the beginning and also after lseek. We will have -1 last_addr
158 * after the end of the vmas.
159 */
160
145 /* See m_cache_vma(). Zero at the start or after lseek. */
161 if (last_addr == -1UL)
162 return NULL;
163
146 if (last_addr == -1UL)
147 return NULL;
148
164 priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
149 priv->task = get_proc_task(priv->inode);
165 if (!priv->task)
166 return ERR_PTR(-ESRCH);
167
150 if (!priv->task)
151 return ERR_PTR(-ESRCH);
152
168 mm = mm_access(priv->task, PTRACE_MODE_READ);
169 if (!mm || IS_ERR(mm))
170 return mm;
171 down_read(&mm->mmap_sem);
153 mm = priv->mm;
154 if (!mm || !atomic_inc_not_zero(&mm->mm_users))
155 return NULL;
172
156
173 tail_vma = get_gate_vma(priv->task->mm);
174 priv->tail_vma = tail_vma;
157 down_read(&mm->mmap_sem);
175 hold_task_mempolicy(priv);
158 hold_task_mempolicy(priv);
176 /* Start with last addr hint */
177 vma = find_vma(mm, last_addr);
178 if (last_addr && vma) {
179 vma = vma->vm_next;
180 goto out;
159 priv->tail_vma = get_gate_vma(mm);
160
161 if (last_addr) {
162 vma = find_vma(mm, last_addr);
163 if (vma && (vma = m_next_vma(priv, vma)))
164 return vma;
181 }
182
165 }
166
183 /*
184 * Check the vma index is within the range and do
185 * sequential scan until m_index.
186 */
187 vma = NULL;
188 if ((unsigned long)l < mm->map_count) {
189 vma = mm->mmap;
190 while (l-- && vma)
167 m->version = 0;
168 if (pos < mm->map_count) {
169 for (vma = mm->mmap; pos; pos--) {
170 m->version = vma->vm_start;
191 vma = vma->vm_next;
171 vma = vma->vm_next;
192 goto out;
172 }
173 return vma;
193 }
194
174 }
175
195 if (l != mm->map_count)
196 tail_vma = NULL; /* After gate vma */
176 /* we do not bother to update m->version in this case */
177 if (pos == mm->map_count && priv->tail_vma)
178 return priv->tail_vma;
197
179
198out:
199 if (vma)
200 return vma;
201
202 release_task_mempolicy(priv);
203 /* End of vmas has been reached */
204 m->version = (tail_vma != NULL)? 0: -1UL;
205 up_read(&mm->mmap_sem);
206 mmput(mm);
207 return tail_vma;
180 vma_stop(priv);
181 return NULL;
208}
209
210static void *m_next(struct seq_file *m, void *v, loff_t *pos)
211{
212 struct proc_maps_private *priv = m->private;
182}
183
184static void *m_next(struct seq_file *m, void *v, loff_t *pos)
185{
186 struct proc_maps_private *priv = m->private;
213 struct vm_area_struct *vma = v;
214 struct vm_area_struct *tail_vma = priv->tail_vma;
187 struct vm_area_struct *next;
215
216 (*pos)++;
188
189 (*pos)++;
217 if (vma && (vma != tail_vma) && vma->vm_next)
218 return vma->vm_next;
219 vma_stop(priv, vma);
220 return (vma != tail_vma)? tail_vma: NULL;
190 next = m_next_vma(priv, v);
191 if (!next)
192 vma_stop(priv);
193 return next;
221}
222
223static void m_stop(struct seq_file *m, void *v)
224{
225 struct proc_maps_private *priv = m->private;
194}
195
196static void m_stop(struct seq_file *m, void *v)
197{
198 struct proc_maps_private *priv = m->private;
226 struct vm_area_struct *vma = v;
227
199
228 if (!IS_ERR(vma))
229 vma_stop(priv, vma);
230 if (priv->task)
200 if (!IS_ERR_OR_NULL(v))
201 vma_stop(priv);
202 if (priv->task) {
231 put_task_struct(priv->task);
203 put_task_struct(priv->task);
204 priv->task = NULL;
205 }
232}
233
206}
207
208static int proc_maps_open(struct inode *inode, struct file *file,
209 const struct seq_operations *ops, int psize)
210{
211 struct proc_maps_private *priv = __seq_open_private(file, ops, psize);
212
213 if (!priv)
214 return -ENOMEM;
215
216 priv->inode = inode;
217 priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
218 if (IS_ERR(priv->mm)) {
219 int err = PTR_ERR(priv->mm);
220
221 seq_release_private(inode, file);
222 return err;
223 }
224
225 return 0;
226}
227
228static int proc_map_release(struct inode *inode, struct file *file)
229{
230 struct seq_file *seq = file->private_data;
231 struct proc_maps_private *priv = seq->private;
232
233 if (priv->mm)
234 mmdrop(priv->mm);
235
236 return seq_release_private(inode, file);
237}
238
234static int do_maps_open(struct inode *inode, struct file *file,
235 const struct seq_operations *ops)
236{
239static int do_maps_open(struct inode *inode, struct file *file,
240 const struct seq_operations *ops)
241{
237 struct proc_maps_private *priv;
238 int ret = -ENOMEM;
239 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
240 if (priv) {
241 priv->pid = proc_pid(inode);
242 ret = seq_open(file, ops);
243 if (!ret) {
244 struct seq_file *m = file->private_data;
245 m->private = priv;
246 } else {
247 kfree(priv);
248 }
242 return proc_maps_open(inode, file, ops,
243 sizeof(struct proc_maps_private));
244}
245
246static pid_t pid_of_stack(struct proc_maps_private *priv,
247 struct vm_area_struct *vma, bool is_pid)
248{
249 struct inode *inode = priv->inode;
250 struct task_struct *task;
251 pid_t ret = 0;
252
253 rcu_read_lock();
254 task = pid_task(proc_pid(inode), PIDTYPE_PID);
255 if (task) {
256 task = task_of_stack(task, vma, is_pid);
257 if (task)
258 ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info);
249 }
259 }
260 rcu_read_unlock();
261
250 return ret;
251}
252
253static void
254show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
255{
256 struct mm_struct *mm = vma->vm_mm;
257 struct file *file = vma->vm_file;
258 struct proc_maps_private *priv = m->private;
262 return ret;
263}
264
265static void
266show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
267{
268 struct mm_struct *mm = vma->vm_mm;
269 struct file *file = vma->vm_file;
270 struct proc_maps_private *priv = m->private;
259 struct task_struct *task = priv->task;
260 vm_flags_t flags = vma->vm_flags;
261 unsigned long ino = 0;
262 unsigned long long pgoff = 0;
263 unsigned long start, end;
264 dev_t dev = 0;
265 const char *name = NULL;
266
267 if (file) {

--- 48 unchanged lines hidden (view full) ---

316 }
317
318 if (vma->vm_start <= mm->brk &&
319 vma->vm_end >= mm->start_brk) {
320 name = "[heap]";
321 goto done;
322 }
323
271 vm_flags_t flags = vma->vm_flags;
272 unsigned long ino = 0;
273 unsigned long long pgoff = 0;
274 unsigned long start, end;
275 dev_t dev = 0;
276 const char *name = NULL;
277
278 if (file) {

--- 48 unchanged lines hidden (view full) ---

327 }
328
329 if (vma->vm_start <= mm->brk &&
330 vma->vm_end >= mm->start_brk) {
331 name = "[heap]";
332 goto done;
333 }
334
324 tid = vm_is_stack(task, vma, is_pid);
325
335 tid = pid_of_stack(priv, vma, is_pid);
326 if (tid != 0) {
327 /*
328 * Thread stack in /proc/PID/task/TID/maps or
329 * the main process stack.
330 */
331 if (!is_pid || (vma->vm_start <= mm->start_stack &&
332 vma->vm_end >= mm->start_stack)) {
333 name = "[stack]";

--- 10 unchanged lines hidden (view full) ---

344 seq_pad(m, ' ');
345 seq_puts(m, name);
346 }
347 seq_putc(m, '\n');
348}
349
350static int show_map(struct seq_file *m, void *v, int is_pid)
351{
336 if (tid != 0) {
337 /*
338 * Thread stack in /proc/PID/task/TID/maps or
339 * the main process stack.
340 */
341 if (!is_pid || (vma->vm_start <= mm->start_stack &&
342 vma->vm_end >= mm->start_stack)) {
343 name = "[stack]";

--- 10 unchanged lines hidden (view full) ---

354 seq_pad(m, ' ');
355 seq_puts(m, name);
356 }
357 seq_putc(m, '\n');
358}
359
360static int show_map(struct seq_file *m, void *v, int is_pid)
361{
352 struct vm_area_struct *vma = v;
353 struct proc_maps_private *priv = m->private;
354 struct task_struct *task = priv->task;
355
356 show_map_vma(m, vma, is_pid);
357
358 if (m->count < m->size) /* vma is copied successfully */
359 m->version = (vma != get_gate_vma(task->mm))
360 ? vma->vm_start : 0;
362 show_map_vma(m, v, is_pid);
363 m_cache_vma(m, v);
361 return 0;
362}
363
364static int show_pid_map(struct seq_file *m, void *v)
365{
366 return show_map(m, v, 1);
367}
368

--- 25 unchanged lines hidden (view full) ---

394{
395 return do_maps_open(inode, file, &proc_tid_maps_op);
396}
397
398const struct file_operations proc_pid_maps_operations = {
399 .open = pid_maps_open,
400 .read = seq_read,
401 .llseek = seq_lseek,
364 return 0;
365}
366
367static int show_pid_map(struct seq_file *m, void *v)
368{
369 return show_map(m, v, 1);
370}
371

--- 25 unchanged lines hidden (view full) ---

397{
398 return do_maps_open(inode, file, &proc_tid_maps_op);
399}
400
401const struct file_operations proc_pid_maps_operations = {
402 .open = pid_maps_open,
403 .read = seq_read,
404 .llseek = seq_lseek,
402 .release = seq_release_private,
405 .release = proc_map_release,
403};
404
405const struct file_operations proc_tid_maps_operations = {
406 .open = tid_maps_open,
407 .read = seq_read,
408 .llseek = seq_lseek,
406};
407
408const struct file_operations proc_tid_maps_operations = {
409 .open = tid_maps_open,
410 .read = seq_read,
411 .llseek = seq_lseek,
409 .release = seq_release_private,
412 .release = proc_map_release,
410};
411
412/*
413 * Proportional Set Size(PSS): my share of RSS.
414 *
415 * PSS of a process is the count of pages it has in memory, where each
416 * page is divided by the number of processes sharing it. So if a
417 * process has 1000 pages all to itself, and 1000 shared with one other

--- 160 unchanged lines hidden (view full) ---

578 mnemonics[i][0], mnemonics[i][1]);
579 }
580 }
581 seq_putc(m, '\n');
582}
583
584static int show_smap(struct seq_file *m, void *v, int is_pid)
585{
413};
414
415/*
416 * Proportional Set Size(PSS): my share of RSS.
417 *
418 * PSS of a process is the count of pages it has in memory, where each
419 * page is divided by the number of processes sharing it. So if a
420 * process has 1000 pages all to itself, and 1000 shared with one other

--- 160 unchanged lines hidden (view full) ---

581 mnemonics[i][0], mnemonics[i][1]);
582 }
583 }
584 seq_putc(m, '\n');
585}
586
587static int show_smap(struct seq_file *m, void *v, int is_pid)
588{
586 struct proc_maps_private *priv = m->private;
587 struct task_struct *task = priv->task;
588 struct vm_area_struct *vma = v;
589 struct mem_size_stats mss;
590 struct mm_walk smaps_walk = {
591 .pmd_entry = smaps_pte_range,
592 .mm = vma->vm_mm,
593 .private = &mss,
594 };
595

--- 36 unchanged lines hidden (view full) ---

632 (vma->vm_flags & VM_LOCKED) ?
633 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0);
634
635 if (vma->vm_flags & VM_NONLINEAR)
636 seq_printf(m, "Nonlinear: %8lu kB\n",
637 mss.nonlinear >> 10);
638
639 show_smap_vma_flags(m, vma);
589 struct vm_area_struct *vma = v;
590 struct mem_size_stats mss;
591 struct mm_walk smaps_walk = {
592 .pmd_entry = smaps_pte_range,
593 .mm = vma->vm_mm,
594 .private = &mss,
595 };
596

--- 36 unchanged lines hidden (view full) ---

633 (vma->vm_flags & VM_LOCKED) ?
634 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0);
635
636 if (vma->vm_flags & VM_NONLINEAR)
637 seq_printf(m, "Nonlinear: %8lu kB\n",
638 mss.nonlinear >> 10);
639
640 show_smap_vma_flags(m, vma);
640
641 if (m->count < m->size) /* vma is copied successfully */
642 m->version = (vma != get_gate_vma(task->mm))
643 ? vma->vm_start : 0;
641 m_cache_vma(m, vma);
644 return 0;
645}
646
647static int show_pid_smap(struct seq_file *m, void *v)
648{
649 return show_smap(m, v, 1);
650}
651

--- 25 unchanged lines hidden (view full) ---

677{
678 return do_maps_open(inode, file, &proc_tid_smaps_op);
679}
680
681const struct file_operations proc_pid_smaps_operations = {
682 .open = pid_smaps_open,
683 .read = seq_read,
684 .llseek = seq_lseek,
642 return 0;
643}
644
645static int show_pid_smap(struct seq_file *m, void *v)
646{
647 return show_smap(m, v, 1);
648}
649

--- 25 unchanged lines hidden (view full) ---

675{
676 return do_maps_open(inode, file, &proc_tid_smaps_op);
677}
678
679const struct file_operations proc_pid_smaps_operations = {
680 .open = pid_smaps_open,
681 .read = seq_read,
682 .llseek = seq_lseek,
685 .release = seq_release_private,
683 .release = proc_map_release,
686};
687
688const struct file_operations proc_tid_smaps_operations = {
689 .open = tid_smaps_open,
690 .read = seq_read,
691 .llseek = seq_lseek,
684};
685
686const struct file_operations proc_tid_smaps_operations = {
687 .open = tid_smaps_open,
688 .read = seq_read,
689 .llseek = seq_lseek,
692 .release = seq_release_private,
690 .release = proc_map_release,
693};
694
695/*
696 * We do not want to have constant page-shift bits sitting in
697 * pagemap entries and are about to reuse them some time soon.
698 *
699 * Here's the "migration strategy":
700 * 1. when the system boots these bits remain what they are,

--- 123 unchanged lines hidden (view full) ---

824 .type = type,
825 };
826 struct mm_walk clear_refs_walk = {
827 .pmd_entry = clear_refs_pte_range,
828 .mm = mm,
829 .private = &cp,
830 };
831 down_read(&mm->mmap_sem);
691};
692
693/*
694 * We do not want to have constant page-shift bits sitting in
695 * pagemap entries and are about to reuse them some time soon.
696 *
697 * Here's the "migration strategy":
698 * 1. when the system boots these bits remain what they are,

--- 123 unchanged lines hidden (view full) ---

822 .type = type,
823 };
824 struct mm_walk clear_refs_walk = {
825 .pmd_entry = clear_refs_pte_range,
826 .mm = mm,
827 .private = &cp,
828 };
829 down_read(&mm->mmap_sem);
832 if (type == CLEAR_REFS_SOFT_DIRTY)
830 if (type == CLEAR_REFS_SOFT_DIRTY) {
831 for (vma = mm->mmap; vma; vma = vma->vm_next) {
832 if (!(vma->vm_flags & VM_SOFTDIRTY))
833 continue;
834 up_read(&mm->mmap_sem);
835 down_write(&mm->mmap_sem);
836 for (vma = mm->mmap; vma; vma = vma->vm_next) {
837 vma->vm_flags &= ~VM_SOFTDIRTY;
838 vma_set_page_prot(vma);
839 }
840 downgrade_write(&mm->mmap_sem);
841 break;
842 }
833 mmu_notifier_invalidate_range_start(mm, 0, -1);
843 mmu_notifier_invalidate_range_start(mm, 0, -1);
844 }
834 for (vma = mm->mmap; vma; vma = vma->vm_next) {
835 cp.vma = vma;
836 if (is_vm_hugetlb_page(vma))
837 continue;
838 /*
839 * Writing 1 to /proc/pid/clear_refs affects all pages.
840 *
841 * Writing 2 to /proc/pid/clear_refs only affects
842 * Anonymous pages.
843 *
844 * Writing 3 to /proc/pid/clear_refs only affects file
845 * mapped pages.
846 *
847 * Writing 4 to /proc/pid/clear_refs affects all pages.
848 */
849 if (type == CLEAR_REFS_ANON && vma->vm_file)
850 continue;
851 if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
852 continue;
845 for (vma = mm->mmap; vma; vma = vma->vm_next) {
846 cp.vma = vma;
847 if (is_vm_hugetlb_page(vma))
848 continue;
849 /*
850 * Writing 1 to /proc/pid/clear_refs affects all pages.
851 *
852 * Writing 2 to /proc/pid/clear_refs only affects
853 * Anonymous pages.
854 *
855 * Writing 3 to /proc/pid/clear_refs only affects file
856 * mapped pages.
857 *
858 * Writing 4 to /proc/pid/clear_refs affects all pages.
859 */
860 if (type == CLEAR_REFS_ANON && vma->vm_file)
861 continue;
862 if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
863 continue;
853 if (type == CLEAR_REFS_SOFT_DIRTY) {
854 if (vma->vm_flags & VM_SOFTDIRTY)
855 vma->vm_flags &= ~VM_SOFTDIRTY;
856 }
857 walk_page_range(vma->vm_start, vma->vm_end,
858 &clear_refs_walk);
859 }
860 if (type == CLEAR_REFS_SOFT_DIRTY)
861 mmu_notifier_invalidate_range_end(mm, 0, -1);
862 flush_tlb_mm(mm);
863 up_read(&mm->mmap_sem);
864 mmput(mm);

--- 159 unchanged lines hidden (view full) ---

1024static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
1025 struct mm_walk *walk)
1026{
1027 struct vm_area_struct *vma;
1028 struct pagemapread *pm = walk->private;
1029 spinlock_t *ptl;
1030 pte_t *pte;
1031 int err = 0;
864 walk_page_range(vma->vm_start, vma->vm_end,
865 &clear_refs_walk);
866 }
867 if (type == CLEAR_REFS_SOFT_DIRTY)
868 mmu_notifier_invalidate_range_end(mm, 0, -1);
869 flush_tlb_mm(mm);
870 up_read(&mm->mmap_sem);
871 mmput(mm);

--- 159 unchanged lines hidden (view full) ---

1031static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
1032 struct mm_walk *walk)
1033{
1034 struct vm_area_struct *vma;
1035 struct pagemapread *pm = walk->private;
1036 spinlock_t *ptl;
1037 pte_t *pte;
1038 int err = 0;
1032 pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
1033
1034 /* find the first VMA at or above 'addr' */
1035 vma = find_vma(walk->mm, addr);
1036 if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
1037 int pmd_flags2;
1038
1039 if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
1040 pmd_flags2 = __PM_SOFT_DIRTY;
1041 else
1042 pmd_flags2 = 0;
1043
1044 for (; addr != end; addr += PAGE_SIZE) {
1045 unsigned long offset;
1039
1040 /* find the first VMA at or above 'addr' */
1041 vma = find_vma(walk->mm, addr);
1042 if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
1043 int pmd_flags2;
1044
1045 if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
1046 pmd_flags2 = __PM_SOFT_DIRTY;
1047 else
1048 pmd_flags2 = 0;
1049
1050 for (; addr != end; addr += PAGE_SIZE) {
1051 unsigned long offset;
1052 pagemap_entry_t pme;
1046
1047 offset = (addr & ~PAGEMAP_WALK_MASK) >>
1048 PAGE_SHIFT;
1049 thp_pmd_to_pagemap_entry(&pme, pm, *pmd, offset, pmd_flags2);
1050 err = add_to_pagemap(addr, &pme, pm);
1051 if (err)
1052 break;
1053 }
1054 spin_unlock(ptl);
1055 return err;
1056 }
1057
1058 if (pmd_trans_unstable(pmd))
1059 return 0;
1053
1054 offset = (addr & ~PAGEMAP_WALK_MASK) >>
1055 PAGE_SHIFT;
1056 thp_pmd_to_pagemap_entry(&pme, pm, *pmd, offset, pmd_flags2);
1057 err = add_to_pagemap(addr, &pme, pm);
1058 if (err)
1059 break;
1060 }
1061 spin_unlock(ptl);
1062 return err;
1063 }
1064
1065 if (pmd_trans_unstable(pmd))
1066 return 0;
1060 for (; addr != end; addr += PAGE_SIZE) {
1061 int flags2;
1062
1067
1063 /* check to see if we've left 'vma' behind
1064 * and need a new, higher one */
1065 if (vma && (addr >= vma->vm_end)) {
1066 vma = find_vma(walk->mm, addr);
1067 if (vma && (vma->vm_flags & VM_SOFTDIRTY))
1068 flags2 = __PM_SOFT_DIRTY;
1069 else
1070 flags2 = 0;
1071 pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2));
1068 while (1) {
1069 /* End of address space hole, which we mark as non-present. */
1070 unsigned long hole_end;
1071
1072 if (vma)
1073 hole_end = min(end, vma->vm_start);
1074 else
1075 hole_end = end;
1076
1077 for (; addr < hole_end; addr += PAGE_SIZE) {
1078 pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
1079
1080 err = add_to_pagemap(addr, &pme, pm);
1081 if (err)
1082 return err;
1072 }
1073
1083 }
1084
1074 /* check that 'vma' actually covers this address,
1075 * and that it isn't a huge page vma */
1076 if (vma && (vma->vm_start <= addr) &&
1077 !is_vm_hugetlb_page(vma)) {
1085 if (!vma || vma->vm_start >= end)
1086 break;
1087 /*
1088 * We can't possibly be in a hugetlb VMA. In general,
1089 * for a mm_walk with a pmd_entry and a hugetlb_entry,
1090 * the pmd_entry can only be called on addresses in a
1091 * hugetlb if the walk starts in a non-hugetlb VMA and
1092 * spans a hugepage VMA. Since pagemap_read walks are
1093 * PMD-sized and PMD-aligned, this will never be true.
1094 */
1095 BUG_ON(is_vm_hugetlb_page(vma));
1096
1097 /* Addresses in the VMA. */
1098 for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) {
1099 pagemap_entry_t pme;
1078 pte = pte_offset_map(pmd, addr);
1079 pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
1100 pte = pte_offset_map(pmd, addr);
1101 pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
1080 /* unmap before userspace copy */
1081 pte_unmap(pte);
1102 pte_unmap(pte);
1103 err = add_to_pagemap(addr, &pme, pm);
1104 if (err)
1105 return err;
1082 }
1106 }
1083 err = add_to_pagemap(addr, &pme, pm);
1084 if (err)
1085 return err;
1107
1108 if (addr == end)
1109 break;
1110
1111 vma = find_vma(walk->mm, addr);
1086 }
1087
1088 cond_resched();
1089
1090 return err;
1091}
1092
1093#ifdef CONFIG_HUGETLB_PAGE

--- 316 unchanged lines hidden (view full) ---

1410 */
1411static int show_numa_map(struct seq_file *m, void *v, int is_pid)
1412{
1413 struct numa_maps_private *numa_priv = m->private;
1414 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
1415 struct vm_area_struct *vma = v;
1416 struct numa_maps *md = &numa_priv->md;
1417 struct file *file = vma->vm_file;
1112 }
1113
1114 cond_resched();
1115
1116 return err;
1117}
1118
1119#ifdef CONFIG_HUGETLB_PAGE

--- 316 unchanged lines hidden (view full) ---

1436 */
1437static int show_numa_map(struct seq_file *m, void *v, int is_pid)
1438{
1439 struct numa_maps_private *numa_priv = m->private;
1440 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
1441 struct vm_area_struct *vma = v;
1442 struct numa_maps *md = &numa_priv->md;
1443 struct file *file = vma->vm_file;
1418 struct task_struct *task = proc_priv->task;
1419 struct mm_struct *mm = vma->vm_mm;
1420 struct mm_walk walk = {};
1421 struct mempolicy *pol;
1422 char buffer[64];
1423 int nid;
1424
1425 if (!mm)
1426 return 0;
1427
1428 /* Ensure we start with an empty set of numa_maps statistics. */
1429 memset(md, 0, sizeof(*md));
1430
1431 md->vma = vma;
1432
1433 walk.hugetlb_entry = gather_hugetbl_stats;
1434 walk.pmd_entry = gather_pte_stats;
1435 walk.private = md;
1436 walk.mm = mm;
1437
1444 struct mm_struct *mm = vma->vm_mm;
1445 struct mm_walk walk = {};
1446 struct mempolicy *pol;
1447 char buffer[64];
1448 int nid;
1449
1450 if (!mm)
1451 return 0;
1452
1453 /* Ensure we start with an empty set of numa_maps statistics. */
1454 memset(md, 0, sizeof(*md));
1455
1456 md->vma = vma;
1457
1458 walk.hugetlb_entry = gather_hugetbl_stats;
1459 walk.pmd_entry = gather_pte_stats;
1460 walk.private = md;
1461 walk.mm = mm;
1462
1438 pol = get_vma_policy(task, vma, vma->vm_start);
1439 mpol_to_str(buffer, sizeof(buffer), pol);
1440 mpol_cond_put(pol);
1463 pol = __get_vma_policy(vma, vma->vm_start);
1464 if (pol) {
1465 mpol_to_str(buffer, sizeof(buffer), pol);
1466 mpol_cond_put(pol);
1467 } else {
1468 mpol_to_str(buffer, sizeof(buffer), proc_priv->task_mempolicy);
1469 }
1441
1442 seq_printf(m, "%08lx %s", vma->vm_start, buffer);
1443
1444 if (file) {
1445 seq_puts(m, " file=");
1446 seq_path(m, &file->f_path, "\n\t= ");
1447 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
1448 seq_puts(m, " heap");
1449 } else {
1470
1471 seq_printf(m, "%08lx %s", vma->vm_start, buffer);
1472
1473 if (file) {
1474 seq_puts(m, " file=");
1475 seq_path(m, &file->f_path, "\n\t= ");
1476 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
1477 seq_puts(m, " heap");
1478 } else {
1450 pid_t tid = vm_is_stack(task, vma, is_pid);
1479 pid_t tid = pid_of_stack(proc_priv, vma, is_pid);
1451 if (tid != 0) {
1452 /*
1453 * Thread stack in /proc/PID/task/TID/maps or
1454 * the main process stack.
1455 */
1456 if (!is_pid || (vma->vm_start <= mm->start_stack &&
1457 vma->vm_end >= mm->start_stack))
1458 seq_puts(m, " stack");

--- 31 unchanged lines hidden (view full) ---

1490 if (md->writeback)
1491 seq_printf(m, " writeback=%lu", md->writeback);
1492
1493 for_each_node_state(nid, N_MEMORY)
1494 if (md->node[nid])
1495 seq_printf(m, " N%d=%lu", nid, md->node[nid]);
1496out:
1497 seq_putc(m, '\n');
1480 if (tid != 0) {
1481 /*
1482 * Thread stack in /proc/PID/task/TID/maps or
1483 * the main process stack.
1484 */
1485 if (!is_pid || (vma->vm_start <= mm->start_stack &&
1486 vma->vm_end >= mm->start_stack))
1487 seq_puts(m, " stack");

--- 31 unchanged lines hidden (view full) ---

1519 if (md->writeback)
1520 seq_printf(m, " writeback=%lu", md->writeback);
1521
1522 for_each_node_state(nid, N_MEMORY)
1523 if (md->node[nid])
1524 seq_printf(m, " N%d=%lu", nid, md->node[nid]);
1525out:
1526 seq_putc(m, '\n');
1498
1499 if (m->count < m->size)
1500 m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0;
1527 m_cache_vma(m, vma);
1501 return 0;
1502}
1503
1504static int show_pid_numa_map(struct seq_file *m, void *v)
1505{
1506 return show_numa_map(m, v, 1);
1507}
1508

--- 14 unchanged lines hidden (view full) ---

1523 .next = m_next,
1524 .stop = m_stop,
1525 .show = show_tid_numa_map,
1526};
1527
1528static int numa_maps_open(struct inode *inode, struct file *file,
1529 const struct seq_operations *ops)
1530{
1528 return 0;
1529}
1530
1531static int show_pid_numa_map(struct seq_file *m, void *v)
1532{
1533 return show_numa_map(m, v, 1);
1534}
1535

--- 14 unchanged lines hidden (view full) ---

1550 .next = m_next,
1551 .stop = m_stop,
1552 .show = show_tid_numa_map,
1553};
1554
1555static int numa_maps_open(struct inode *inode, struct file *file,
1556 const struct seq_operations *ops)
1557{
1531 struct numa_maps_private *priv;
1532 int ret = -ENOMEM;
1533 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
1534 if (priv) {
1535 priv->proc_maps.pid = proc_pid(inode);
1536 ret = seq_open(file, ops);
1537 if (!ret) {
1538 struct seq_file *m = file->private_data;
1539 m->private = priv;
1540 } else {
1541 kfree(priv);
1542 }
1543 }
1544 return ret;
1558 return proc_maps_open(inode, file, ops,
1559 sizeof(struct numa_maps_private));
1545}
1546
1547static int pid_numa_maps_open(struct inode *inode, struct file *file)
1548{
1549 return numa_maps_open(inode, file, &proc_pid_numa_maps_op);
1550}
1551
1552static int tid_numa_maps_open(struct inode *inode, struct file *file)
1553{
1554 return numa_maps_open(inode, file, &proc_tid_numa_maps_op);
1555}
1556
1557const struct file_operations proc_pid_numa_maps_operations = {
1558 .open = pid_numa_maps_open,
1559 .read = seq_read,
1560 .llseek = seq_lseek,
1560}
1561
1562static int pid_numa_maps_open(struct inode *inode, struct file *file)
1563{
1564 return numa_maps_open(inode, file, &proc_pid_numa_maps_op);
1565}
1566
1567static int tid_numa_maps_open(struct inode *inode, struct file *file)
1568{
1569 return numa_maps_open(inode, file, &proc_tid_numa_maps_op);
1570}
1571
1572const struct file_operations proc_pid_numa_maps_operations = {
1573 .open = pid_numa_maps_open,
1574 .read = seq_read,
1575 .llseek = seq_lseek,
1561 .release = seq_release_private,
1576 .release = proc_map_release,
1562};
1563
1564const struct file_operations proc_tid_numa_maps_operations = {
1565 .open = tid_numa_maps_open,
1566 .read = seq_read,
1567 .llseek = seq_lseek,
1577};
1578
1579const struct file_operations proc_tid_numa_maps_operations = {
1580 .open = tid_numa_maps_open,
1581 .read = seq_read,
1582 .llseek = seq_lseek,
1568 .release = seq_release_private,
1583 .release = proc_map_release,
1569};
1570#endif /* CONFIG_NUMA */
1584};
1585#endif /* CONFIG_NUMA */