mempolicy.c (d498471133ff1f9586a06820beaeebc575fe2814) mempolicy.c (38e35860dbe6197a4b42eb6e8b47da940b7695dd)
1/*
2 * Simple NUMA memory policy for the Linux kernel.
3 *
4 * Copyright 2003,2004 Andi Kleen, SuSE Labs.
5 * (C) Copyright 2005 Christoph Lameter, Silicon Graphics, Inc.
6 * Subject to the GNU Public License, version 2.
7 *
8 * NUMA policy allows the user to give hints in which node(s) memory should

--- 74 unchanged lines hidden (view full) ---

83#include <linux/init.h>
84#include <linux/compat.h>
85#include <linux/mempolicy.h>
86#include <linux/swap.h>
87
88#include <asm/tlbflush.h>
89#include <asm/uaccess.h>
90
1/*
2 * Simple NUMA memory policy for the Linux kernel.
3 *
4 * Copyright 2003,2004 Andi Kleen, SuSE Labs.
5 * (C) Copyright 2005 Christoph Lameter, Silicon Graphics, Inc.
6 * Subject to the GNU Public License, version 2.
7 *
8 * NUMA policy allows the user to give hints in which node(s) memory should

--- 74 unchanged lines hidden (view full) ---

83#include <linux/init.h>
84#include <linux/compat.h>
85#include <linux/mempolicy.h>
86#include <linux/swap.h>
87
88#include <asm/tlbflush.h>
89#include <asm/uaccess.h>
90
91/* Internal MPOL_MF_xxx flags */
91/* Internal flags */
92#define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */
92#define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */
93#define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */
93
94static kmem_cache_t *policy_cache;
95static kmem_cache_t *sn_cache;
96
97#define PDprintk(fmt...)
98
99/* Highest zone. An specific allocation for a zone below that is not
100 policied. */

--- 121 unchanged lines hidden (view full) ---

222 /*
223 * If the isolate attempt was not successful then we just
224 * encountered an unswappable page. Something must be wrong.
225 */
226 WARN_ON(rc == 0);
227 }
228}
229
94
95static kmem_cache_t *policy_cache;
96static kmem_cache_t *sn_cache;
97
98#define PDprintk(fmt...)
99
100/* Highest zone. An specific allocation for a zone below that is not
101 policied. */

--- 121 unchanged lines hidden (view full) ---

223 /*
224 * If the isolate attempt was not successful then we just
225 * encountered an unswappable page. Something must be wrong.
226 */
227 WARN_ON(rc == 0);
228 }
229}
230
230/* Ensure all existing pages follow the policy. */
231/* Scan through pages checking if pages follow certain conditions. */
231static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
232 unsigned long addr, unsigned long end,
233 const nodemask_t *nodes, unsigned long flags,
232static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
233 unsigned long addr, unsigned long end,
234 const nodemask_t *nodes, unsigned long flags,
234 struct list_head *pagelist)
235 void *private)
235{
236 pte_t *orig_pte;
237 pte_t *pte;
238 spinlock_t *ptl;
239
240 orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
241 do {
242 struct page *page;
243 unsigned int nid;
244
245 if (!pte_present(*pte))
246 continue;
247 page = vm_normal_page(vma, addr, *pte);
248 if (!page)
249 continue;
250 nid = page_to_nid(page);
236{
237 pte_t *orig_pte;
238 pte_t *pte;
239 spinlock_t *ptl;
240
241 orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
242 do {
243 struct page *page;
244 unsigned int nid;
245
246 if (!pte_present(*pte))
247 continue;
248 page = vm_normal_page(vma, addr, *pte);
249 if (!page)
250 continue;
251 nid = page_to_nid(page);
251 if (!node_isset(nid, *nodes)) {
252 if (pagelist)
253 migrate_page_add(vma, page, pagelist, flags);
254 else
255 break;
256 }
252 if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
253 continue;
254
255 if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
256 migrate_page_add(vma, page, private, flags);
257 else
258 break;
257 } while (pte++, addr += PAGE_SIZE, addr != end);
258 pte_unmap_unlock(orig_pte, ptl);
259 return addr != end;
260}
261
262static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud,
263 unsigned long addr, unsigned long end,
264 const nodemask_t *nodes, unsigned long flags,
259 } while (pte++, addr += PAGE_SIZE, addr != end);
260 pte_unmap_unlock(orig_pte, ptl);
261 return addr != end;
262}
263
264static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud,
265 unsigned long addr, unsigned long end,
266 const nodemask_t *nodes, unsigned long flags,
265 struct list_head *pagelist)
267 void *private)
266{
267 pmd_t *pmd;
268 unsigned long next;
269
270 pmd = pmd_offset(pud, addr);
271 do {
272 next = pmd_addr_end(addr, end);
273 if (pmd_none_or_clear_bad(pmd))
274 continue;
275 if (check_pte_range(vma, pmd, addr, next, nodes,
268{
269 pmd_t *pmd;
270 unsigned long next;
271
272 pmd = pmd_offset(pud, addr);
273 do {
274 next = pmd_addr_end(addr, end);
275 if (pmd_none_or_clear_bad(pmd))
276 continue;
277 if (check_pte_range(vma, pmd, addr, next, nodes,
276 flags, pagelist))
278 flags, private))
277 return -EIO;
278 } while (pmd++, addr = next, addr != end);
279 return 0;
280}
281
282static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
283 unsigned long addr, unsigned long end,
284 const nodemask_t *nodes, unsigned long flags,
279 return -EIO;
280 } while (pmd++, addr = next, addr != end);
281 return 0;
282}
283
284static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
285 unsigned long addr, unsigned long end,
286 const nodemask_t *nodes, unsigned long flags,
285 struct list_head *pagelist)
287 void *private)
286{
287 pud_t *pud;
288 unsigned long next;
289
290 pud = pud_offset(pgd, addr);
291 do {
292 next = pud_addr_end(addr, end);
293 if (pud_none_or_clear_bad(pud))
294 continue;
295 if (check_pmd_range(vma, pud, addr, next, nodes,
288{
289 pud_t *pud;
290 unsigned long next;
291
292 pud = pud_offset(pgd, addr);
293 do {
294 next = pud_addr_end(addr, end);
295 if (pud_none_or_clear_bad(pud))
296 continue;
297 if (check_pmd_range(vma, pud, addr, next, nodes,
296 flags, pagelist))
298 flags, private))
297 return -EIO;
298 } while (pud++, addr = next, addr != end);
299 return 0;
300}
301
302static inline int check_pgd_range(struct vm_area_struct *vma,
303 unsigned long addr, unsigned long end,
304 const nodemask_t *nodes, unsigned long flags,
299 return -EIO;
300 } while (pud++, addr = next, addr != end);
301 return 0;
302}
303
304static inline int check_pgd_range(struct vm_area_struct *vma,
305 unsigned long addr, unsigned long end,
306 const nodemask_t *nodes, unsigned long flags,
305 struct list_head *pagelist)
307 void *private)
306{
307 pgd_t *pgd;
308 unsigned long next;
309
310 pgd = pgd_offset(vma->vm_mm, addr);
311 do {
312 next = pgd_addr_end(addr, end);
313 if (pgd_none_or_clear_bad(pgd))
314 continue;
315 if (check_pud_range(vma, pgd, addr, next, nodes,
308{
309 pgd_t *pgd;
310 unsigned long next;
311
312 pgd = pgd_offset(vma->vm_mm, addr);
313 do {
314 next = pgd_addr_end(addr, end);
315 if (pgd_none_or_clear_bad(pgd))
316 continue;
317 if (check_pud_range(vma, pgd, addr, next, nodes,
316 flags, pagelist))
318 flags, private))
317 return -EIO;
318 } while (pgd++, addr = next, addr != end);
319 return 0;
320}
321
322/* Check if a vma is migratable */
323static inline int vma_migratable(struct vm_area_struct *vma)
324{

--- 5 unchanged lines hidden (view full) ---

330
331/*
332 * Check if all pages in a range are on a set of nodes.
333 * If pagelist != NULL then isolate pages from the LRU and
334 * put them on the pagelist.
335 */
336static struct vm_area_struct *
337check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
319 return -EIO;
320 } while (pgd++, addr = next, addr != end);
321 return 0;
322}
323
324/* Check if a vma is migratable */
325static inline int vma_migratable(struct vm_area_struct *vma)
326{

--- 5 unchanged lines hidden (view full) ---

332
333/*
334 * Check if all pages in a range are on a set of nodes.
335 * If pagelist != NULL then isolate pages from the LRU and
336 * put them on the pagelist.
337 */
338static struct vm_area_struct *
339check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
338 const nodemask_t *nodes, unsigned long flags,
339 struct list_head *pagelist)
340 const nodemask_t *nodes, unsigned long flags, void *private)
340{
341 int err;
342 struct vm_area_struct *first, *vma, *prev;
343
344 first = find_vma(mm, start);
345 if (!first)
346 return ERR_PTR(-EFAULT);
347 prev = NULL;

--- 10 unchanged lines hidden (view full) ---

358 vma_migratable(vma)))) {
359 unsigned long endvma = vma->vm_end;
360
361 if (endvma > end)
362 endvma = end;
363 if (vma->vm_start > start)
364 start = vma->vm_start;
365 err = check_pgd_range(vma, start, endvma, nodes,
341{
342 int err;
343 struct vm_area_struct *first, *vma, *prev;
344
345 first = find_vma(mm, start);
346 if (!first)
347 return ERR_PTR(-EFAULT);
348 prev = NULL;

--- 10 unchanged lines hidden (view full) ---

359 vma_migratable(vma)))) {
360 unsigned long endvma = vma->vm_end;
361
362 if (endvma > end)
363 endvma = end;
364 if (vma->vm_start > start)
365 start = vma->vm_start;
366 err = check_pgd_range(vma, start, endvma, nodes,
366 flags, pagelist);
367 flags, private);
367 if (err) {
368 first = ERR_PTR(err);
369 break;
370 }
371 }
372 prev = vma;
373 }
374 return first;

--- 72 unchanged lines hidden (view full) ---

447{
448 struct vm_area_struct *vma;
449 struct mm_struct *mm = current->mm;
450 struct mempolicy *new;
451 unsigned long end;
452 int err;
453 LIST_HEAD(pagelist);
454
368 if (err) {
369 first = ERR_PTR(err);
370 break;
371 }
372 }
373 prev = vma;
374 }
375 return first;

--- 72 unchanged lines hidden (view full) ---

448{
449 struct vm_area_struct *vma;
450 struct mm_struct *mm = current->mm;
451 struct mempolicy *new;
452 unsigned long end;
453 int err;
454 LIST_HEAD(pagelist);
455
455 if ((flags & ~(unsigned long)(MPOL_MF_STRICT|MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
456 if ((flags & ~(unsigned long)(MPOL_MF_STRICT |
457 MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
456 || mode > MPOL_MAX)
457 return -EINVAL;
458 if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_RESOURCE))
459 return -EPERM;
460
461 if (start & ~PAGE_MASK)
462 return -EINVAL;
463

--- 21 unchanged lines hidden (view full) ---

485 */
486 if (!new)
487 flags |= MPOL_MF_DISCONTIG_OK;
488
489 PDprintk("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len,
490 mode,nodes_addr(nodes)[0]);
491
492 down_write(&mm->mmap_sem);
458 || mode > MPOL_MAX)
459 return -EINVAL;
460 if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_RESOURCE))
461 return -EPERM;
462
463 if (start & ~PAGE_MASK)
464 return -EINVAL;
465

--- 21 unchanged lines hidden (view full) ---

487 */
488 if (!new)
489 flags |= MPOL_MF_DISCONTIG_OK;
490
491 PDprintk("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len,
492 mode,nodes_addr(nodes)[0]);
493
494 down_write(&mm->mmap_sem);
493 vma = check_range(mm, start, end, nmask, flags,
494 (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) ? &pagelist : NULL);
495 vma = check_range(mm, start, end, nmask,
496 flags | MPOL_MF_INVERT, &pagelist);
497
495 err = PTR_ERR(vma);
496 if (!IS_ERR(vma)) {
497 int nr_failed = 0;
498
499 err = mbind_range(vma, start, end, new);
500 if (!list_empty(&pagelist))
501 nr_failed = swap_pages(&pagelist);
502

--- 138 unchanged lines hidden (view full) ---

641int do_migrate_pages(struct mm_struct *mm,
642 const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags)
643{
644 LIST_HEAD(pagelist);
645 int count = 0;
646 nodemask_t nodes;
647
648 nodes_andnot(nodes, *from_nodes, *to_nodes);
498 err = PTR_ERR(vma);
499 if (!IS_ERR(vma)) {
500 int nr_failed = 0;
501
502 err = mbind_range(vma, start, end, new);
503 if (!list_empty(&pagelist))
504 nr_failed = swap_pages(&pagelist);
505

--- 138 unchanged lines hidden (view full) ---

644int do_migrate_pages(struct mm_struct *mm,
645 const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags)
646{
647 LIST_HEAD(pagelist);
648 int count = 0;
649 nodemask_t nodes;
650
651 nodes_andnot(nodes, *from_nodes, *to_nodes);
649 nodes_complement(nodes, nodes);
650
651 down_read(&mm->mmap_sem);
652 check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nodes,
653 flags | MPOL_MF_DISCONTIG_OK, &pagelist);
654
655 if (!list_empty(&pagelist)) {
656 count = swap_pages(&pagelist);
657 putback_lru_pages(&pagelist);

--- 803 unchanged lines hidden ---
652
653 down_read(&mm->mmap_sem);
654 check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nodes,
655 flags | MPOL_MF_DISCONTIG_OK, &pagelist);
656
657 if (!list_empty(&pagelist)) {
658 count = swap_pages(&pagelist);
659 putback_lru_pages(&pagelist);

--- 803 unchanged lines hidden ---