memory-failure.c (271ecc5253e2b317d729d366560789cd7f93836c) | memory-failure.c (1170532bb49f9468aedabdc1d5a560e2521a2bcc) |
---|---|
1/* 2 * Copyright (C) 2008, 2009 Intel Corporation 3 * Authors: Andi Kleen, Fengguang Wu 4 * 5 * This software may be redistributed and/or modified under the terms of 6 * the GNU General Public License ("GPL") version 2 only as published by the 7 * Free Software Foundation. 8 * --- 170 unchanged lines hidden (view full) --- 179 * ``action required'' if error happened in current execution context 180 */ 181static int kill_proc(struct task_struct *t, unsigned long addr, int trapno, 182 unsigned long pfn, struct page *page, int flags) 183{ 184 struct siginfo si; 185 int ret; 186 | 1/* 2 * Copyright (C) 2008, 2009 Intel Corporation 3 * Authors: Andi Kleen, Fengguang Wu 4 * 5 * This software may be redistributed and/or modified under the terms of 6 * the GNU General Public License ("GPL") version 2 only as published by the 7 * Free Software Foundation. 8 * --- 170 unchanged lines hidden (view full) --- 179 * ``action required'' if error happened in current execution context 180 */ 181static int kill_proc(struct task_struct *t, unsigned long addr, int trapno, 182 unsigned long pfn, struct page *page, int flags) 183{ 184 struct siginfo si; 185 int ret; 186 |
187 printk(KERN_ERR 188 "MCE %#lx: Killing %s:%d due to hardware memory corruption\n", 189 pfn, t->comm, t->pid); | 187 pr_err("MCE %#lx: Killing %s:%d due to hardware memory corruption\n", 188 pfn, t->comm, t->pid); |
190 si.si_signo = SIGBUS; 191 si.si_errno = 0; 192 si.si_addr = (void *)addr; 193#ifdef __ARCH_SI_TRAPNO 194 si.si_trapno = trapno; 195#endif 196 si.si_addr_lsb = compound_order(compound_head(page)) + PAGE_SHIFT; 197 --- 6 unchanged lines hidden (view full) --- 204 * can be temporarily blocked. 205 * This could cause a loop when the user sets SIGBUS 206 * to SIG_IGN, but hopefully no one will do that? 207 */ 208 si.si_code = BUS_MCEERR_AO; 209 ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */ 210 } 211 if (ret < 0) | 189 si.si_signo = SIGBUS; 190 si.si_errno = 0; 191 si.si_addr = (void *)addr; 192#ifdef __ARCH_SI_TRAPNO 193 si.si_trapno = trapno; 194#endif 195 si.si_addr_lsb = compound_order(compound_head(page)) + PAGE_SHIFT; 196 --- 6 unchanged lines hidden (view full) --- 203 * can be temporarily blocked. 204 * This could cause a loop when the user sets SIGBUS 205 * to SIG_IGN, but hopefully no one will do that? 206 */ 207 si.si_code = BUS_MCEERR_AO; 208 ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */ 209 } 210 if (ret < 0) |
212 printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n", 213 t->comm, t->pid, ret); | 211 pr_info("MCE: Error sending signal to %s:%d: %d\n", 212 t->comm, t->pid, ret); |
214 return ret; 215} 216 217/* 218 * When a unknown page type is encountered drain as many buffers as possible 219 * in the hope to turn the page into a LRU or free page, which we can handle. 220 */ 221void shake_page(struct page *p, int access) --- 63 unchanged lines hidden (view full) --- 285 struct to_kill *tk; 286 287 if (*tkc) { 288 tk = *tkc; 289 *tkc = NULL; 290 } else { 291 tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC); 292 if (!tk) { | 213 return ret; 214} 215 216/* 217 * When a unknown page type is encountered drain as many buffers as possible 218 * in the hope to turn the page into a LRU or free page, which we can handle. 219 */ 220void shake_page(struct page *p, int access) --- 63 unchanged lines hidden (view full) --- 284 struct to_kill *tk; 285 286 if (*tkc) { 287 tk = *tkc; 288 *tkc = NULL; 289 } else { 290 tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC); 291 if (!tk) { |
293 printk(KERN_ERR 294 "MCE: Out of memory while machine check handling\n"); | 292 pr_err("MCE: Out of memory while machine check handling\n"); |
295 return; 296 } 297 } 298 tk->addr = page_address_in_vma(p, vma); 299 tk->addr_valid = 1; 300 301 /* 302 * In theory we don't have to kill when the page was --- 28 unchanged lines hidden (view full) --- 331 list_for_each_entry_safe (tk, next, to_kill, nd) { 332 if (forcekill) { 333 /* 334 * In case something went wrong with munmapping 335 * make sure the process doesn't catch the 336 * signal and then access the memory. Just kill it. 337 */ 338 if (fail || tk->addr_valid == 0) { | 293 return; 294 } 295 } 296 tk->addr = page_address_in_vma(p, vma); 297 tk->addr_valid = 1; 298 299 /* 300 * In theory we don't have to kill when the page was --- 28 unchanged lines hidden (view full) --- 329 list_for_each_entry_safe (tk, next, to_kill, nd) { 330 if (forcekill) { 331 /* 332 * In case something went wrong with munmapping 333 * make sure the process doesn't catch the 334 * signal and then access the memory. Just kill it. 335 */ 336 if (fail || tk->addr_valid == 0) { |
339 printk(KERN_ERR 340 "MCE %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n", 341 pfn, tk->tsk->comm, tk->tsk->pid); | 337 pr_err("MCE %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n", 338 pfn, tk->tsk->comm, tk->tsk->pid); |
342 force_sig(SIGKILL, tk->tsk); 343 } 344 345 /* 346 * In theory the process could have mapped 347 * something else on the address in-between. We could 348 * check for that, but we need to tell the 349 * process anyways. 350 */ 351 else if (kill_proc(tk->tsk, tk->addr, trapno, 352 pfn, page, flags) < 0) | 339 force_sig(SIGKILL, tk->tsk); 340 } 341 342 /* 343 * In theory the process could have mapped 344 * something else on the address in-between. We could 345 * check for that, but we need to tell the 346 * process anyways. 347 */ 348 else if (kill_proc(tk->tsk, tk->addr, trapno, 349 pfn, page, flags) < 0) |
353 printk(KERN_ERR 354 "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n", 355 pfn, tk->tsk->comm, tk->tsk->pid); | 350 pr_err("MCE %#lx: Cannot send advisory machine check signal to %s:%d\n", 351 pfn, tk->tsk->comm, tk->tsk->pid); |
356 } 357 put_task_struct(tk->tsk); 358 kfree(tk); 359 } 360} 361 362/* 363 * Find a dedicated thread which is supposed to handle SIGBUS(BUS_MCEERR_AO) --- 194 unchanged lines hidden (view full) --- 558 return MF_IGNORED; 559} 560 561/* 562 * Page in unknown state. Do nothing. 563 */ 564static int me_unknown(struct page *p, unsigned long pfn) 565{ | 352 } 353 put_task_struct(tk->tsk); 354 kfree(tk); 355 } 356} 357 358/* 359 * Find a dedicated thread which is supposed to handle SIGBUS(BUS_MCEERR_AO) --- 194 unchanged lines hidden (view full) --- 554 return MF_IGNORED; 555} 556 557/* 558 * Page in unknown state. Do nothing. 559 */ 560static int me_unknown(struct page *p, unsigned long pfn) 561{ |
566 printk(KERN_ERR "MCE %#lx: Unknown page state\n", pfn); | 562 pr_err("MCE %#lx: Unknown page state\n", pfn); |
567 return MF_FAILED; 568} 569 570/* 571 * Clean (or cleaned) page cache page. 572 */ 573static int me_pagecache_clean(struct page *p, unsigned long pfn) 574{ --- 28 unchanged lines hidden (view full) --- 603 /* 604 * Truncation is a bit tricky. Enable it per file system for now. 605 * 606 * Open: to take i_mutex or not for this? Right now we don't. 607 */ 608 if (mapping->a_ops->error_remove_page) { 609 err = mapping->a_ops->error_remove_page(mapping, p); 610 if (err != 0) { | 563 return MF_FAILED; 564} 565 566/* 567 * Clean (or cleaned) page cache page. 568 */ 569static int me_pagecache_clean(struct page *p, unsigned long pfn) 570{ --- 28 unchanged lines hidden (view full) --- 599 /* 600 * Truncation is a bit tricky. Enable it per file system for now. 601 * 602 * Open: to take i_mutex or not for this? Right now we don't. 603 */ 604 if (mapping->a_ops->error_remove_page) { 605 err = mapping->a_ops->error_remove_page(mapping, p); 606 if (err != 0) { |
611 printk(KERN_INFO "MCE %#lx: Failed to punch page: %d\n", 612 pfn, err); | 607 pr_info("MCE %#lx: Failed to punch page: %d\n", 608 pfn, err); |
613 } else if (page_has_private(p) && 614 !try_to_release_page(p, GFP_NOIO)) { 615 pr_info("MCE %#lx: failed to release buffers\n", pfn); 616 } else { 617 ret = MF_RECOVERED; 618 } 619 } else { 620 /* 621 * If the file system doesn't support it just invalidate 622 * This fails on dirty or anything with private pages 623 */ 624 if (invalidate_inode_page(p)) 625 ret = MF_RECOVERED; 626 else | 609 } else if (page_has_private(p) && 610 !try_to_release_page(p, GFP_NOIO)) { 611 pr_info("MCE %#lx: failed to release buffers\n", pfn); 612 } else { 613 ret = MF_RECOVERED; 614 } 615 } else { 616 /* 617 * If the file system doesn't support it just invalidate 618 * This fails on dirty or anything with private pages 619 */ 620 if (invalidate_inode_page(p)) 621 ret = MF_RECOVERED; 622 else |
627 printk(KERN_INFO "MCE %#lx: Failed to invalidate\n", 628 pfn); | 623 pr_info("MCE %#lx: Failed to invalidate\n", pfn); |
629 } 630 return ret; 631} 632 633/* 634 * Dirty pagecache page 635 * Issues: when the error hit a hole page the error is not properly 636 * propagated. --- 212 unchanged lines hidden (view full) --- 849 int count; 850 851 result = ps->action(p, pfn); 852 853 count = page_count(p) - 1; 854 if (ps->action == me_swapcache_dirty && result == MF_DELAYED) 855 count--; 856 if (count != 0) { | 624 } 625 return ret; 626} 627 628/* 629 * Dirty pagecache page 630 * Issues: when the error hit a hole page the error is not properly 631 * propagated. --- 212 unchanged lines hidden (view full) --- 844 int count; 845 846 result = ps->action(p, pfn); 847 848 count = page_count(p) - 1; 849 if (ps->action == me_swapcache_dirty && result == MF_DELAYED) 850 count--; 851 if (count != 0) { |
857 printk(KERN_ERR 858 "MCE %#lx: %s still referenced by %d users\n", | 852 pr_err("MCE %#lx: %s still referenced by %d users\n", |
859 pfn, action_page_types[ps->type], count); 860 result = MF_FAILED; 861 } 862 action_result(pfn, ps->type, result); 863 864 /* Could do more checks here if page looks ok */ 865 /* 866 * Could adjust zone counters here to correct for the missing page. --- 62 unchanged lines hidden (view full) --- 929 return SWAP_SUCCESS; 930 931 if (PageKsm(p)) { 932 pr_err("MCE %#lx: can't handle KSM pages.\n", pfn); 933 return SWAP_FAIL; 934 } 935 936 if (PageSwapCache(p)) { | 853 pfn, action_page_types[ps->type], count); 854 result = MF_FAILED; 855 } 856 action_result(pfn, ps->type, result); 857 858 /* Could do more checks here if page looks ok */ 859 /* 860 * Could adjust zone counters here to correct for the missing page. --- 62 unchanged lines hidden (view full) --- 923 return SWAP_SUCCESS; 924 925 if (PageKsm(p)) { 926 pr_err("MCE %#lx: can't handle KSM pages.\n", pfn); 927 return SWAP_FAIL; 928 } 929 930 if (PageSwapCache(p)) { |
937 printk(KERN_ERR 938 "MCE %#lx: keeping poisoned page in swap cache\n", pfn); | 931 pr_err("MCE %#lx: keeping poisoned page in swap cache\n", pfn); |
939 ttu |= TTU_IGNORE_HWPOISON; 940 } 941 942 /* 943 * Propagate the dirty bit from PTEs to struct page first, because we 944 * need this to decide if we should kill or just drop the page. 945 * XXX: the dirty test could be racy: set_page_dirty() may not always 946 * be called inside page lock (it's recommended but not enforced). 947 */ 948 mapping = page_mapping(hpage); 949 if (!(flags & MF_MUST_KILL) && !PageDirty(hpage) && mapping && 950 mapping_cap_writeback_dirty(mapping)) { 951 if (page_mkclean(hpage)) { 952 SetPageDirty(hpage); 953 } else { 954 kill = 0; 955 ttu |= TTU_IGNORE_HWPOISON; | 932 ttu |= TTU_IGNORE_HWPOISON; 933 } 934 935 /* 936 * Propagate the dirty bit from PTEs to struct page first, because we 937 * need this to decide if we should kill or just drop the page. 938 * XXX: the dirty test could be racy: set_page_dirty() may not always 939 * be called inside page lock (it's recommended but not enforced). 940 */ 941 mapping = page_mapping(hpage); 942 if (!(flags & MF_MUST_KILL) && !PageDirty(hpage) && mapping && 943 mapping_cap_writeback_dirty(mapping)) { 944 if (page_mkclean(hpage)) { 945 SetPageDirty(hpage); 946 } else { 947 kill = 0; 948 ttu |= TTU_IGNORE_HWPOISON; |
956 printk(KERN_INFO 957 "MCE %#lx: corrupted page was clean: dropped without side effects\n", | 949 pr_info("MCE %#lx: corrupted page was clean: dropped without side effects\n", |
958 pfn); 959 } 960 } 961 962 /* 963 * First collect all the processes that have the page 964 * mapped in dirty form. This has to be done before try_to_unmap, 965 * because ttu takes the rmap data structures down. 966 * 967 * Error handling: We ignore errors here because 968 * there's nothing that can be done. 969 */ 970 if (kill) 971 collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED); 972 973 ret = try_to_unmap(hpage, ttu); 974 if (ret != SWAP_SUCCESS) | 950 pfn); 951 } 952 } 953 954 /* 955 * First collect all the processes that have the page 956 * mapped in dirty form. This has to be done before try_to_unmap, 957 * because ttu takes the rmap data structures down. 958 * 959 * Error handling: We ignore errors here because 960 * there's nothing that can be done. 961 */ 962 if (kill) 963 collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED); 964 965 ret = try_to_unmap(hpage, ttu); 966 if (ret != SWAP_SUCCESS) |
975 printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n", 976 pfn, page_mapcount(hpage)); | 967 pr_err("MCE %#lx: failed to unmap page (mapcount=%d)\n", 968 pfn, page_mapcount(hpage)); |
977 978 /* 979 * Now that the dirty bit has been propagated to the 980 * struct page and all unmaps done we can decide if 981 * killing is needed or not. Only kill when the page 982 * was dirty or the process is not restartable, 983 * otherwise the tokill list is merely 984 * freed. When there was a problem unmapping earlier --- 50 unchanged lines hidden (view full) --- 1035 int res; 1036 unsigned int nr_pages; 1037 unsigned long page_flags; 1038 1039 if (!sysctl_memory_failure_recovery) 1040 panic("Memory failure from trap %d on page %lx", trapno, pfn); 1041 1042 if (!pfn_valid(pfn)) { | 969 970 /* 971 * Now that the dirty bit has been propagated to the 972 * struct page and all unmaps done we can decide if 973 * killing is needed or not. Only kill when the page 974 * was dirty or the process is not restartable, 975 * otherwise the tokill list is merely 976 * freed. When there was a problem unmapping earlier --- 50 unchanged lines hidden (view full) --- 1027 int res; 1028 unsigned int nr_pages; 1029 unsigned long page_flags; 1030 1031 if (!sysctl_memory_failure_recovery) 1032 panic("Memory failure from trap %d on page %lx", trapno, pfn); 1033 1034 if (!pfn_valid(pfn)) { |
1043 printk(KERN_ERR 1044 "MCE %#lx: memory outside kernel control\n", 1045 pfn); | 1035 pr_err("MCE %#lx: memory outside kernel control\n", pfn); |
1046 return -ENXIO; 1047 } 1048 1049 p = pfn_to_page(pfn); 1050 orig_head = hpage = compound_head(p); 1051 if (TestSetPageHWPoison(p)) { | 1036 return -ENXIO; 1037 } 1038 1039 p = pfn_to_page(pfn); 1040 orig_head = hpage = compound_head(p); 1041 if (TestSetPageHWPoison(p)) { |
1052 printk(KERN_ERR "MCE %#lx: already hardware poisoned\n", pfn); | 1042 pr_err("MCE %#lx: already hardware poisoned\n", pfn); |
1053 return 0; 1054 } 1055 1056 /* 1057 * Currently errors on hugetlbfs pages are measured in hugepage units, 1058 * so nr_pages should be 1 << compound_order. OTOH when errors are on 1059 * transparent hugepages, they are supposed to be split and error 1060 * measurement is done in normal page units. So nr_pages should be one --- 114 unchanged lines hidden (view full) --- 1175 * correctly, we save a copy of the page flags at this time. 1176 */ 1177 page_flags = p->flags; 1178 1179 /* 1180 * unpoison always clear PG_hwpoison inside page lock 1181 */ 1182 if (!PageHWPoison(p)) { | 1043 return 0; 1044 } 1045 1046 /* 1047 * Currently errors on hugetlbfs pages are measured in hugepage units, 1048 * so nr_pages should be 1 << compound_order. OTOH when errors are on 1049 * transparent hugepages, they are supposed to be split and error 1050 * measurement is done in normal page units. So nr_pages should be one --- 114 unchanged lines hidden (view full) --- 1165 * correctly, we save a copy of the page flags at this time. 1166 */ 1167 page_flags = p->flags; 1168 1169 /* 1170 * unpoison always clear PG_hwpoison inside page lock 1171 */ 1172 if (!PageHWPoison(p)) { |
1183 printk(KERN_ERR "MCE %#lx: just unpoisoned\n", pfn); | 1173 pr_err("MCE %#lx: just unpoisoned\n", pfn); |
1184 num_poisoned_pages_sub(nr_pages); 1185 unlock_page(hpage); 1186 put_hwpoison_page(hpage); 1187 return 0; 1188 } 1189 if (hwpoison_filter(p)) { 1190 if (TestClearPageHWPoison(p)) 1191 num_poisoned_pages_sub(nr_pages); --- 581 unchanged lines hidden --- | 1174 num_poisoned_pages_sub(nr_pages); 1175 unlock_page(hpage); 1176 put_hwpoison_page(hpage); 1177 return 0; 1178 } 1179 if (hwpoison_filter(p)) { 1180 if (TestClearPageHWPoison(p)) 1181 num_poisoned_pages_sub(nr_pages); --- 581 unchanged lines hidden --- |