H A D | mempolicy.c | diff e26a51148f3ebd859bca8bf2e0f212839b447f62 Wed Dec 28 17:57:11 CST 2011 KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> mm/mempolicy.c: refix mbind_range() vma issue
commit 8aacc9f550 ("mm/mempolicy.c: fix pgoff in mbind vma merge") is the slightly incorrect fix.
Why? Think following case.
1. map 4 pages of a file at offset 0
[0123]
2. map 2 pages just after the first mapping of the same file but with page offset 2
[0123][23]
3. mbind() 2 pages from the first mapping at offset 2. mbind_range() should treat new vma is,
[0123][23] |23| mbind vma
but it does
[0123][23] |01| mbind vma
Oops. then, it makes wrong vma merge and splitting ([01][0123] or similar).
This patch fixes it.
[testcase] test result - before the patch
case4: 126: test failed. expect '2,4', actual '2,2,2' case5: passed case6: passed case7: passed case8: passed case_n: 246: test failed. expect '4,2', actual '1,4'
------------[ cut here ]------------ kernel BUG at mm/filemap.c:135! invalid opcode: 0000 [#4] SMP DEBUG_PAGEALLOC
(snip long bug on messages)
test result - after the patch
case4: passed case5: passed case6: passed case7: passed case8: passed case_n: passed
source: mbind_vma_test.c ============================================================ #include <numaif.h> #include <numa.h> #include <sys/mman.h> #include <stdio.h> #include <unistd.h> #include <stdlib.h> #include <string.h>
static unsigned long pagesize; void* mmap_addr; struct bitmask *nmask; char buf[1024]; FILE *file; char retbuf[10240] = ""; int mapped_fd;
char *rubysrc = "ruby -e '\ pid = %d; \ vstart = 0x%llx; \ vend = 0x%llx; \ s = `pmap -q #{pid}`; \ rary = []; \ s.each_line {|line|; \ ary=line.split(\" \"); \ addr = ary[0].to_i(16); \ if(vstart <= addr && addr < vend) then \ rary.push(ary[1].to_i()/4); \ end; \ }; \ print rary.join(\",\"); \ '";
void init(void) { void* addr; char buf[128];
nmask = numa_allocate_nodemask(); numa_bitmask_setbit(nmask, 0);
pagesize = getpagesize();
sprintf(buf, "%s", "mbind_vma_XXXXXX"); mapped_fd = mkstemp(buf); if (mapped_fd == -1) perror("mkstemp "), exit(1); unlink(buf);
if (lseek(mapped_fd, pagesize*8, SEEK_SET) < 0) perror("lseek "), exit(1); if (write(mapped_fd, "\0", 1) < 0) perror("write "), exit(1);
addr = mmap(NULL, pagesize*8, PROT_NONE, MAP_SHARED, mapped_fd, 0); if (addr == MAP_FAILED) perror("mmap "), exit(1);
if (mprotect(addr+pagesize, pagesize*6, PROT_READ|PROT_WRITE) < 0) perror("mprotect "), exit(1);
mmap_addr = addr + pagesize;
/* make page populate */ memset(mmap_addr, 0, pagesize*6); }
void fin(void) { void* addr = mmap_addr - pagesize; munmap(addr, pagesize*8);
memset(buf, 0, sizeof(buf)); memset(retbuf, 0, sizeof(retbuf)); }
void mem_bind(int index, int len) { int err;
err = mbind(mmap_addr+pagesize*index, pagesize*len, MPOL_BIND, nmask->maskp, nmask->size, 0); if (err) perror("mbind "), exit(err); }
void mem_interleave(int index, int len) { int err;
err = mbind(mmap_addr+pagesize*index, pagesize*len, MPOL_INTERLEAVE, nmask->maskp, nmask->size, 0); if (err) perror("mbind "), exit(err); }
void mem_unbind(int index, int len) { int err;
err = mbind(mmap_addr+pagesize*index, pagesize*len, MPOL_DEFAULT, NULL, 0, 0); if (err) perror("mbind "), exit(err); }
void Assert(char *expected, char *value, char *name, int line) { if (strcmp(expected, value) == 0) { fprintf(stderr, "%s: passed\n", name); return; } else { fprintf(stderr, "%s: %d: test failed. expect '%s', actual '%s'\n", name, line, expected, value); // exit(1); } }
/* AAAA PPPPPPNNNNNN might become PPNNNNNNNNNN case 4 below */ void case4(void) { init(); sprintf(buf, rubysrc, getpid(), mmap_addr, mmap_addr+pagesize*6);
mem_bind(0, 4); mem_unbind(2, 2);
file = popen(buf, "r"); fread(retbuf, sizeof(retbuf), 1, file); Assert("2,4", retbuf, "case4", __LINE__);
fin(); }
/* AAAA PPPPPPNNNNNN might become PPPPPPPPPPNN case 5 below */ void case5(void) { init(); sprintf(buf, rubysrc, getpid(), mmap_addr, mmap_addr+pagesize*6);
mem_bind(0, 2); mem_bind(2, 2);
file = popen(buf, "r"); fread(retbuf, sizeof(retbuf), 1, file); Assert("4,2", retbuf, "case5", __LINE__);
fin(); }
/* AAAA PPPPNNNNXXXX might become PPPPPPPPPPPP 6 */ void case6(void) { init(); sprintf(buf, rubysrc, getpid(), mmap_addr, mmap_addr+pagesize*6);
mem_bind(0, 2); mem_bind(4, 2); mem_bind(2, 2);
file = popen(buf, "r"); fread(retbuf, sizeof(retbuf), 1, file); Assert("6", retbuf, "case6", __LINE__);
fin(); }
/* AAAA PPPPNNNNXXXX might become PPPPPPPPXXXX 7 */ void case7(void) { init(); sprintf(buf, rubysrc, getpid(), mmap_addr, mmap_addr+pagesize*6);
mem_bind(0, 2); mem_interleave(4, 2); mem_bind(2, 2);
file = popen(buf, "r"); fread(retbuf, sizeof(retbuf), 1, file); Assert("4,2", retbuf, "case7", __LINE__);
fin(); }
/* AAAA PPPPNNNNXXXX might become PPPPNNNNNNNN 8 */ void case8(void) { init(); sprintf(buf, rubysrc, getpid(), mmap_addr, mmap_addr+pagesize*6);
mem_bind(0, 2); mem_interleave(4, 2); mem_interleave(2, 2);
file = popen(buf, "r"); fread(retbuf, sizeof(retbuf), 1, file); Assert("2,4", retbuf, "case8", __LINE__);
fin(); }
void case_n(void) { init(); sprintf(buf, rubysrc, getpid(), mmap_addr, mmap_addr+pagesize*6);
/* make redundunt mappings [0][1234][34][7] */ mmap(mmap_addr + pagesize*4, pagesize*2, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED, mapped_fd, pagesize*3);
/* Expect to do nothing. */ mem_unbind(2, 2);
file = popen(buf, "r"); fread(retbuf, sizeof(retbuf), 1, file); Assert("4,2", retbuf, "case_n", __LINE__);
fin(); }
int main(int argc, char** argv) { case4(); case5(); case6(); case7(); case8(); case_n();
return 0; } =============================================================
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Caspar Zhang <caspar@casparzhang.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Christoph Lameter <cl@linux.com> Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: <stable@vger.kernel.org> [3.1.x] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|