mempolicy.c (d498471133ff1f9586a06820beaeebc575fe2814) | mempolicy.c (38e35860dbe6197a4b42eb6e8b47da940b7695dd) |
---|---|
1/* 2 * Simple NUMA memory policy for the Linux kernel. 3 * 4 * Copyright 2003,2004 Andi Kleen, SuSE Labs. 5 * (C) Copyright 2005 Christoph Lameter, Silicon Graphics, Inc. 6 * Subject to the GNU Public License, version 2. 7 * 8 * NUMA policy allows the user to give hints in which node(s) memory should --- 74 unchanged lines hidden (view full) --- 83#include <linux/init.h> 84#include <linux/compat.h> 85#include <linux/mempolicy.h> 86#include <linux/swap.h> 87 88#include <asm/tlbflush.h> 89#include <asm/uaccess.h> 90 | 1/* 2 * Simple NUMA memory policy for the Linux kernel. 3 * 4 * Copyright 2003,2004 Andi Kleen, SuSE Labs. 5 * (C) Copyright 2005 Christoph Lameter, Silicon Graphics, Inc. 6 * Subject to the GNU Public License, version 2. 7 * 8 * NUMA policy allows the user to give hints in which node(s) memory should --- 74 unchanged lines hidden (view full) --- 83#include <linux/init.h> 84#include <linux/compat.h> 85#include <linux/mempolicy.h> 86#include <linux/swap.h> 87 88#include <asm/tlbflush.h> 89#include <asm/uaccess.h> 90 |
91/* Internal MPOL_MF_xxx flags */ | 91/* Internal flags */ |
92#define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */ | 92#define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */ |
93#define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */ |
|
93 94static kmem_cache_t *policy_cache; 95static kmem_cache_t *sn_cache; 96 97#define PDprintk(fmt...) 98 99/* Highest zone. An specific allocation for a zone below that is not 100 policied. */ --- 121 unchanged lines hidden (view full) --- 222 /* 223 * If the isolate attempt was not successful then we just 224 * encountered an unswappable page. Something must be wrong. 225 */ 226 WARN_ON(rc == 0); 227 } 228} 229 | 94 95static kmem_cache_t *policy_cache; 96static kmem_cache_t *sn_cache; 97 98#define PDprintk(fmt...) 99 100/* Highest zone. An specific allocation for a zone below that is not 101 policied. */ --- 121 unchanged lines hidden (view full) --- 223 /* 224 * If the isolate attempt was not successful then we just 225 * encountered an unswappable page. Something must be wrong. 226 */ 227 WARN_ON(rc == 0); 228 } 229} 230 |
230/* Ensure all existing pages follow the policy. */ | 231/* Scan through pages checking if pages follow certain conditions. */ |
231static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 232 unsigned long addr, unsigned long end, 233 const nodemask_t *nodes, unsigned long flags, | 232static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 233 unsigned long addr, unsigned long end, 234 const nodemask_t *nodes, unsigned long flags, |
234 struct list_head *pagelist) | 235 void *private) |
235{ 236 pte_t *orig_pte; 237 pte_t *pte; 238 spinlock_t *ptl; 239 240 orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 241 do { 242 struct page *page; 243 unsigned int nid; 244 245 if (!pte_present(*pte)) 246 continue; 247 page = vm_normal_page(vma, addr, *pte); 248 if (!page) 249 continue; 250 nid = page_to_nid(page); | 236{ 237 pte_t *orig_pte; 238 pte_t *pte; 239 spinlock_t *ptl; 240 241 orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 242 do { 243 struct page *page; 244 unsigned int nid; 245 246 if (!pte_present(*pte)) 247 continue; 248 page = vm_normal_page(vma, addr, *pte); 249 if (!page) 250 continue; 251 nid = page_to_nid(page); |
251 if (!node_isset(nid, *nodes)) { 252 if (pagelist) 253 migrate_page_add(vma, page, pagelist, flags); 254 else 255 break; 256 } | 252 if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) 253 continue; 254 255 if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) 256 migrate_page_add(vma, page, private, flags); 257 else 258 break; |
257 } while (pte++, addr += PAGE_SIZE, addr != end); 258 pte_unmap_unlock(orig_pte, ptl); 259 return addr != end; 260} 261 262static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, 263 unsigned long addr, unsigned long end, 264 const nodemask_t *nodes, unsigned long flags, | 259 } while (pte++, addr += PAGE_SIZE, addr != end); 260 pte_unmap_unlock(orig_pte, ptl); 261 return addr != end; 262} 263 264static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, 265 unsigned long addr, unsigned long end, 266 const nodemask_t *nodes, unsigned long flags, |
265 struct list_head *pagelist) | 267 void *private) |
266{ 267 pmd_t *pmd; 268 unsigned long next; 269 270 pmd = pmd_offset(pud, addr); 271 do { 272 next = pmd_addr_end(addr, end); 273 if (pmd_none_or_clear_bad(pmd)) 274 continue; 275 if (check_pte_range(vma, pmd, addr, next, nodes, | 268{ 269 pmd_t *pmd; 270 unsigned long next; 271 272 pmd = pmd_offset(pud, addr); 273 do { 274 next = pmd_addr_end(addr, end); 275 if (pmd_none_or_clear_bad(pmd)) 276 continue; 277 if (check_pte_range(vma, pmd, addr, next, nodes, |
276 flags, pagelist)) | 278 flags, private)) |
277 return -EIO; 278 } while (pmd++, addr = next, addr != end); 279 return 0; 280} 281 282static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, 283 unsigned long addr, unsigned long end, 284 const nodemask_t *nodes, unsigned long flags, | 279 return -EIO; 280 } while (pmd++, addr = next, addr != end); 281 return 0; 282} 283 284static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, 285 unsigned long addr, unsigned long end, 286 const nodemask_t *nodes, unsigned long flags, |
285 struct list_head *pagelist) | 287 void *private) |
286{ 287 pud_t *pud; 288 unsigned long next; 289 290 pud = pud_offset(pgd, addr); 291 do { 292 next = pud_addr_end(addr, end); 293 if (pud_none_or_clear_bad(pud)) 294 continue; 295 if (check_pmd_range(vma, pud, addr, next, nodes, | 288{ 289 pud_t *pud; 290 unsigned long next; 291 292 pud = pud_offset(pgd, addr); 293 do { 294 next = pud_addr_end(addr, end); 295 if (pud_none_or_clear_bad(pud)) 296 continue; 297 if (check_pmd_range(vma, pud, addr, next, nodes, |
296 flags, pagelist)) | 298 flags, private)) |
297 return -EIO; 298 } while (pud++, addr = next, addr != end); 299 return 0; 300} 301 302static inline int check_pgd_range(struct vm_area_struct *vma, 303 unsigned long addr, unsigned long end, 304 const nodemask_t *nodes, unsigned long flags, | 299 return -EIO; 300 } while (pud++, addr = next, addr != end); 301 return 0; 302} 303 304static inline int check_pgd_range(struct vm_area_struct *vma, 305 unsigned long addr, unsigned long end, 306 const nodemask_t *nodes, unsigned long flags, |
305 struct list_head *pagelist) | 307 void *private) |
306{ 307 pgd_t *pgd; 308 unsigned long next; 309 310 pgd = pgd_offset(vma->vm_mm, addr); 311 do { 312 next = pgd_addr_end(addr, end); 313 if (pgd_none_or_clear_bad(pgd)) 314 continue; 315 if (check_pud_range(vma, pgd, addr, next, nodes, | 308{ 309 pgd_t *pgd; 310 unsigned long next; 311 312 pgd = pgd_offset(vma->vm_mm, addr); 313 do { 314 next = pgd_addr_end(addr, end); 315 if (pgd_none_or_clear_bad(pgd)) 316 continue; 317 if (check_pud_range(vma, pgd, addr, next, nodes, |
316 flags, pagelist)) | 318 flags, private)) |
317 return -EIO; 318 } while (pgd++, addr = next, addr != end); 319 return 0; 320} 321 322/* Check if a vma is migratable */ 323static inline int vma_migratable(struct vm_area_struct *vma) 324{ --- 5 unchanged lines hidden (view full) --- 330 331/* 332 * Check if all pages in a range are on a set of nodes. 333 * If pagelist != NULL then isolate pages from the LRU and 334 * put them on the pagelist. 335 */ 336static struct vm_area_struct * 337check_range(struct mm_struct *mm, unsigned long start, unsigned long end, | 319 return -EIO; 320 } while (pgd++, addr = next, addr != end); 321 return 0; 322} 323 324/* Check if a vma is migratable */ 325static inline int vma_migratable(struct vm_area_struct *vma) 326{ --- 5 unchanged lines hidden (view full) --- 332 333/* 334 * Check if all pages in a range are on a set of nodes. 335 * If pagelist != NULL then isolate pages from the LRU and 336 * put them on the pagelist. 337 */ 338static struct vm_area_struct * 339check_range(struct mm_struct *mm, unsigned long start, unsigned long end, |
338 const nodemask_t *nodes, unsigned long flags, 339 struct list_head *pagelist) | 340 const nodemask_t *nodes, unsigned long flags, void *private) |
340{ 341 int err; 342 struct vm_area_struct *first, *vma, *prev; 343 344 first = find_vma(mm, start); 345 if (!first) 346 return ERR_PTR(-EFAULT); 347 prev = NULL; --- 10 unchanged lines hidden (view full) --- 358 vma_migratable(vma)))) { 359 unsigned long endvma = vma->vm_end; 360 361 if (endvma > end) 362 endvma = end; 363 if (vma->vm_start > start) 364 start = vma->vm_start; 365 err = check_pgd_range(vma, start, endvma, nodes, | 341{ 342 int err; 343 struct vm_area_struct *first, *vma, *prev; 344 345 first = find_vma(mm, start); 346 if (!first) 347 return ERR_PTR(-EFAULT); 348 prev = NULL; --- 10 unchanged lines hidden (view full) --- 359 vma_migratable(vma)))) { 360 unsigned long endvma = vma->vm_end; 361 362 if (endvma > end) 363 endvma = end; 364 if (vma->vm_start > start) 365 start = vma->vm_start; 366 err = check_pgd_range(vma, start, endvma, nodes, |
366 flags, pagelist); | 367 flags, private); |
367 if (err) { 368 first = ERR_PTR(err); 369 break; 370 } 371 } 372 prev = vma; 373 } 374 return first; --- 72 unchanged lines hidden (view full) --- 447{ 448 struct vm_area_struct *vma; 449 struct mm_struct *mm = current->mm; 450 struct mempolicy *new; 451 unsigned long end; 452 int err; 453 LIST_HEAD(pagelist); 454 | 368 if (err) { 369 first = ERR_PTR(err); 370 break; 371 } 372 } 373 prev = vma; 374 } 375 return first; --- 72 unchanged lines hidden (view full) --- 448{ 449 struct vm_area_struct *vma; 450 struct mm_struct *mm = current->mm; 451 struct mempolicy *new; 452 unsigned long end; 453 int err; 454 LIST_HEAD(pagelist); 455 |
455 if ((flags & ~(unsigned long)(MPOL_MF_STRICT|MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) | 456 if ((flags & ~(unsigned long)(MPOL_MF_STRICT | 457 MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) |
456 || mode > MPOL_MAX) 457 return -EINVAL; 458 if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_RESOURCE)) 459 return -EPERM; 460 461 if (start & ~PAGE_MASK) 462 return -EINVAL; 463 --- 21 unchanged lines hidden (view full) --- 485 */ 486 if (!new) 487 flags |= MPOL_MF_DISCONTIG_OK; 488 489 PDprintk("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len, 490 mode,nodes_addr(nodes)[0]); 491 492 down_write(&mm->mmap_sem); | 458 || mode > MPOL_MAX) 459 return -EINVAL; 460 if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_RESOURCE)) 461 return -EPERM; 462 463 if (start & ~PAGE_MASK) 464 return -EINVAL; 465 --- 21 unchanged lines hidden (view full) --- 487 */ 488 if (!new) 489 flags |= MPOL_MF_DISCONTIG_OK; 490 491 PDprintk("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len, 492 mode,nodes_addr(nodes)[0]); 493 494 down_write(&mm->mmap_sem); |
493 vma = check_range(mm, start, end, nmask, flags, 494 (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) ? &pagelist : NULL); | 495 vma = check_range(mm, start, end, nmask, 496 flags | MPOL_MF_INVERT, &pagelist); 497 |
495 err = PTR_ERR(vma); 496 if (!IS_ERR(vma)) { 497 int nr_failed = 0; 498 499 err = mbind_range(vma, start, end, new); 500 if (!list_empty(&pagelist)) 501 nr_failed = swap_pages(&pagelist); 502 --- 138 unchanged lines hidden (view full) --- 641int do_migrate_pages(struct mm_struct *mm, 642 const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) 643{ 644 LIST_HEAD(pagelist); 645 int count = 0; 646 nodemask_t nodes; 647 648 nodes_andnot(nodes, *from_nodes, *to_nodes); | 498 err = PTR_ERR(vma); 499 if (!IS_ERR(vma)) { 500 int nr_failed = 0; 501 502 err = mbind_range(vma, start, end, new); 503 if (!list_empty(&pagelist)) 504 nr_failed = swap_pages(&pagelist); 505 --- 138 unchanged lines hidden (view full) --- 644int do_migrate_pages(struct mm_struct *mm, 645 const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) 646{ 647 LIST_HEAD(pagelist); 648 int count = 0; 649 nodemask_t nodes; 650 651 nodes_andnot(nodes, *from_nodes, *to_nodes); |
649 nodes_complement(nodes, nodes); | |
650 651 down_read(&mm->mmap_sem); 652 check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nodes, 653 flags | MPOL_MF_DISCONTIG_OK, &pagelist); 654 655 if (!list_empty(&pagelist)) { 656 count = swap_pages(&pagelist); 657 putback_lru_pages(&pagelist); --- 803 unchanged lines hidden --- | 652 653 down_read(&mm->mmap_sem); 654 check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nodes, 655 flags | MPOL_MF_DISCONTIG_OK, &pagelist); 656 657 if (!list_empty(&pagelist)) { 658 count = swap_pages(&pagelist); 659 putback_lru_pages(&pagelist); --- 803 unchanged lines hidden --- |