memory-failure.c (271ecc5253e2b317d729d366560789cd7f93836c) memory-failure.c (1170532bb49f9468aedabdc1d5a560e2521a2bcc)
1/*
2 * Copyright (C) 2008, 2009 Intel Corporation
3 * Authors: Andi Kleen, Fengguang Wu
4 *
5 * This software may be redistributed and/or modified under the terms of
6 * the GNU General Public License ("GPL") version 2 only as published by the
7 * Free Software Foundation.
8 *

--- 170 unchanged lines hidden (view full) ---

179 * ``action required'' if error happened in current execution context
180 */
181static int kill_proc(struct task_struct *t, unsigned long addr, int trapno,
182 unsigned long pfn, struct page *page, int flags)
183{
184 struct siginfo si;
185 int ret;
186
1/*
2 * Copyright (C) 2008, 2009 Intel Corporation
3 * Authors: Andi Kleen, Fengguang Wu
4 *
5 * This software may be redistributed and/or modified under the terms of
6 * the GNU General Public License ("GPL") version 2 only as published by the
7 * Free Software Foundation.
8 *

--- 170 unchanged lines hidden (view full) ---

179 * ``action required'' if error happened in current execution context
180 */
181static int kill_proc(struct task_struct *t, unsigned long addr, int trapno,
182 unsigned long pfn, struct page *page, int flags)
183{
184 struct siginfo si;
185 int ret;
186
187 printk(KERN_ERR
188 "MCE %#lx: Killing %s:%d due to hardware memory corruption\n",
189 pfn, t->comm, t->pid);
187 pr_err("MCE %#lx: Killing %s:%d due to hardware memory corruption\n",
188 pfn, t->comm, t->pid);
190 si.si_signo = SIGBUS;
191 si.si_errno = 0;
192 si.si_addr = (void *)addr;
193#ifdef __ARCH_SI_TRAPNO
194 si.si_trapno = trapno;
195#endif
196 si.si_addr_lsb = compound_order(compound_head(page)) + PAGE_SHIFT;
197

--- 6 unchanged lines hidden (view full) ---

204 * can be temporarily blocked.
205 * This could cause a loop when the user sets SIGBUS
206 * to SIG_IGN, but hopefully no one will do that?
207 */
208 si.si_code = BUS_MCEERR_AO;
209 ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */
210 }
211 if (ret < 0)
189 si.si_signo = SIGBUS;
190 si.si_errno = 0;
191 si.si_addr = (void *)addr;
192#ifdef __ARCH_SI_TRAPNO
193 si.si_trapno = trapno;
194#endif
195 si.si_addr_lsb = compound_order(compound_head(page)) + PAGE_SHIFT;
196

--- 6 unchanged lines hidden (view full) ---

203 * can be temporarily blocked.
204 * This could cause a loop when the user sets SIGBUS
205 * to SIG_IGN, but hopefully no one will do that?
206 */
207 si.si_code = BUS_MCEERR_AO;
208 ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */
209 }
210 if (ret < 0)
212 printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n",
213 t->comm, t->pid, ret);
211 pr_info("MCE: Error sending signal to %s:%d: %d\n",
212 t->comm, t->pid, ret);
214 return ret;
215}
216
217/*
218 * When a unknown page type is encountered drain as many buffers as possible
219 * in the hope to turn the page into a LRU or free page, which we can handle.
220 */
221void shake_page(struct page *p, int access)

--- 63 unchanged lines hidden (view full) ---

285 struct to_kill *tk;
286
287 if (*tkc) {
288 tk = *tkc;
289 *tkc = NULL;
290 } else {
291 tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC);
292 if (!tk) {
213 return ret;
214}
215
216/*
217 * When a unknown page type is encountered drain as many buffers as possible
218 * in the hope to turn the page into a LRU or free page, which we can handle.
219 */
220void shake_page(struct page *p, int access)

--- 63 unchanged lines hidden (view full) ---

284 struct to_kill *tk;
285
286 if (*tkc) {
287 tk = *tkc;
288 *tkc = NULL;
289 } else {
290 tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC);
291 if (!tk) {
293 printk(KERN_ERR
294 "MCE: Out of memory while machine check handling\n");
292 pr_err("MCE: Out of memory while machine check handling\n");
295 return;
296 }
297 }
298 tk->addr = page_address_in_vma(p, vma);
299 tk->addr_valid = 1;
300
301 /*
302 * In theory we don't have to kill when the page was

--- 28 unchanged lines hidden (view full) ---

331 list_for_each_entry_safe (tk, next, to_kill, nd) {
332 if (forcekill) {
333 /*
334 * In case something went wrong with munmapping
335 * make sure the process doesn't catch the
336 * signal and then access the memory. Just kill it.
337 */
338 if (fail || tk->addr_valid == 0) {
293 return;
294 }
295 }
296 tk->addr = page_address_in_vma(p, vma);
297 tk->addr_valid = 1;
298
299 /*
300 * In theory we don't have to kill when the page was

--- 28 unchanged lines hidden (view full) ---

329 list_for_each_entry_safe (tk, next, to_kill, nd) {
330 if (forcekill) {
331 /*
332 * In case something went wrong with munmapping
333 * make sure the process doesn't catch the
334 * signal and then access the memory. Just kill it.
335 */
336 if (fail || tk->addr_valid == 0) {
339 printk(KERN_ERR
340 "MCE %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
341 pfn, tk->tsk->comm, tk->tsk->pid);
337 pr_err("MCE %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
338 pfn, tk->tsk->comm, tk->tsk->pid);
342 force_sig(SIGKILL, tk->tsk);
343 }
344
345 /*
346 * In theory the process could have mapped
347 * something else on the address in-between. We could
348 * check for that, but we need to tell the
349 * process anyways.
350 */
351 else if (kill_proc(tk->tsk, tk->addr, trapno,
352 pfn, page, flags) < 0)
339 force_sig(SIGKILL, tk->tsk);
340 }
341
342 /*
343 * In theory the process could have mapped
344 * something else on the address in-between. We could
345 * check for that, but we need to tell the
346 * process anyways.
347 */
348 else if (kill_proc(tk->tsk, tk->addr, trapno,
349 pfn, page, flags) < 0)
353 printk(KERN_ERR
354 "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n",
355 pfn, tk->tsk->comm, tk->tsk->pid);
350 pr_err("MCE %#lx: Cannot send advisory machine check signal to %s:%d\n",
351 pfn, tk->tsk->comm, tk->tsk->pid);
356 }
357 put_task_struct(tk->tsk);
358 kfree(tk);
359 }
360}
361
362/*
363 * Find a dedicated thread which is supposed to handle SIGBUS(BUS_MCEERR_AO)

--- 194 unchanged lines hidden (view full) ---

558 return MF_IGNORED;
559}
560
561/*
562 * Page in unknown state. Do nothing.
563 */
564static int me_unknown(struct page *p, unsigned long pfn)
565{
352 }
353 put_task_struct(tk->tsk);
354 kfree(tk);
355 }
356}
357
358/*
359 * Find a dedicated thread which is supposed to handle SIGBUS(BUS_MCEERR_AO)

--- 194 unchanged lines hidden (view full) ---

554 return MF_IGNORED;
555}
556
557/*
558 * Page in unknown state. Do nothing.
559 */
560static int me_unknown(struct page *p, unsigned long pfn)
561{
566 printk(KERN_ERR "MCE %#lx: Unknown page state\n", pfn);
562 pr_err("MCE %#lx: Unknown page state\n", pfn);
567 return MF_FAILED;
568}
569
570/*
571 * Clean (or cleaned) page cache page.
572 */
573static int me_pagecache_clean(struct page *p, unsigned long pfn)
574{

--- 28 unchanged lines hidden (view full) ---

603 /*
604 * Truncation is a bit tricky. Enable it per file system for now.
605 *
606 * Open: to take i_mutex or not for this? Right now we don't.
607 */
608 if (mapping->a_ops->error_remove_page) {
609 err = mapping->a_ops->error_remove_page(mapping, p);
610 if (err != 0) {
563 return MF_FAILED;
564}
565
566/*
567 * Clean (or cleaned) page cache page.
568 */
569static int me_pagecache_clean(struct page *p, unsigned long pfn)
570{

--- 28 unchanged lines hidden (view full) ---

599 /*
600 * Truncation is a bit tricky. Enable it per file system for now.
601 *
602 * Open: to take i_mutex or not for this? Right now we don't.
603 */
604 if (mapping->a_ops->error_remove_page) {
605 err = mapping->a_ops->error_remove_page(mapping, p);
606 if (err != 0) {
611 printk(KERN_INFO "MCE %#lx: Failed to punch page: %d\n",
612 pfn, err);
607 pr_info("MCE %#lx: Failed to punch page: %d\n",
608 pfn, err);
613 } else if (page_has_private(p) &&
614 !try_to_release_page(p, GFP_NOIO)) {
615 pr_info("MCE %#lx: failed to release buffers\n", pfn);
616 } else {
617 ret = MF_RECOVERED;
618 }
619 } else {
620 /*
621 * If the file system doesn't support it just invalidate
622 * This fails on dirty or anything with private pages
623 */
624 if (invalidate_inode_page(p))
625 ret = MF_RECOVERED;
626 else
609 } else if (page_has_private(p) &&
610 !try_to_release_page(p, GFP_NOIO)) {
611 pr_info("MCE %#lx: failed to release buffers\n", pfn);
612 } else {
613 ret = MF_RECOVERED;
614 }
615 } else {
616 /*
617 * If the file system doesn't support it just invalidate
618 * This fails on dirty or anything with private pages
619 */
620 if (invalidate_inode_page(p))
621 ret = MF_RECOVERED;
622 else
627 printk(KERN_INFO "MCE %#lx: Failed to invalidate\n",
628 pfn);
623 pr_info("MCE %#lx: Failed to invalidate\n", pfn);
629 }
630 return ret;
631}
632
633/*
634 * Dirty pagecache page
635 * Issues: when the error hit a hole page the error is not properly
636 * propagated.

--- 212 unchanged lines hidden (view full) ---

849 int count;
850
851 result = ps->action(p, pfn);
852
853 count = page_count(p) - 1;
854 if (ps->action == me_swapcache_dirty && result == MF_DELAYED)
855 count--;
856 if (count != 0) {
624 }
625 return ret;
626}
627
628/*
629 * Dirty pagecache page
630 * Issues: when the error hit a hole page the error is not properly
631 * propagated.

--- 212 unchanged lines hidden (view full) ---

844 int count;
845
846 result = ps->action(p, pfn);
847
848 count = page_count(p) - 1;
849 if (ps->action == me_swapcache_dirty && result == MF_DELAYED)
850 count--;
851 if (count != 0) {
857 printk(KERN_ERR
858 "MCE %#lx: %s still referenced by %d users\n",
852 pr_err("MCE %#lx: %s still referenced by %d users\n",
859 pfn, action_page_types[ps->type], count);
860 result = MF_FAILED;
861 }
862 action_result(pfn, ps->type, result);
863
864 /* Could do more checks here if page looks ok */
865 /*
866 * Could adjust zone counters here to correct for the missing page.

--- 62 unchanged lines hidden (view full) ---

929 return SWAP_SUCCESS;
930
931 if (PageKsm(p)) {
932 pr_err("MCE %#lx: can't handle KSM pages.\n", pfn);
933 return SWAP_FAIL;
934 }
935
936 if (PageSwapCache(p)) {
853 pfn, action_page_types[ps->type], count);
854 result = MF_FAILED;
855 }
856 action_result(pfn, ps->type, result);
857
858 /* Could do more checks here if page looks ok */
859 /*
860 * Could adjust zone counters here to correct for the missing page.

--- 62 unchanged lines hidden (view full) ---

923 return SWAP_SUCCESS;
924
925 if (PageKsm(p)) {
926 pr_err("MCE %#lx: can't handle KSM pages.\n", pfn);
927 return SWAP_FAIL;
928 }
929
930 if (PageSwapCache(p)) {
937 printk(KERN_ERR
938 "MCE %#lx: keeping poisoned page in swap cache\n", pfn);
931 pr_err("MCE %#lx: keeping poisoned page in swap cache\n", pfn);
939 ttu |= TTU_IGNORE_HWPOISON;
940 }
941
942 /*
943 * Propagate the dirty bit from PTEs to struct page first, because we
944 * need this to decide if we should kill or just drop the page.
945 * XXX: the dirty test could be racy: set_page_dirty() may not always
946 * be called inside page lock (it's recommended but not enforced).
947 */
948 mapping = page_mapping(hpage);
949 if (!(flags & MF_MUST_KILL) && !PageDirty(hpage) && mapping &&
950 mapping_cap_writeback_dirty(mapping)) {
951 if (page_mkclean(hpage)) {
952 SetPageDirty(hpage);
953 } else {
954 kill = 0;
955 ttu |= TTU_IGNORE_HWPOISON;
932 ttu |= TTU_IGNORE_HWPOISON;
933 }
934
935 /*
936 * Propagate the dirty bit from PTEs to struct page first, because we
937 * need this to decide if we should kill or just drop the page.
938 * XXX: the dirty test could be racy: set_page_dirty() may not always
939 * be called inside page lock (it's recommended but not enforced).
940 */
941 mapping = page_mapping(hpage);
942 if (!(flags & MF_MUST_KILL) && !PageDirty(hpage) && mapping &&
943 mapping_cap_writeback_dirty(mapping)) {
944 if (page_mkclean(hpage)) {
945 SetPageDirty(hpage);
946 } else {
947 kill = 0;
948 ttu |= TTU_IGNORE_HWPOISON;
956 printk(KERN_INFO
957 "MCE %#lx: corrupted page was clean: dropped without side effects\n",
949 pr_info("MCE %#lx: corrupted page was clean: dropped without side effects\n",
958 pfn);
959 }
960 }
961
962 /*
963 * First collect all the processes that have the page
964 * mapped in dirty form. This has to be done before try_to_unmap,
965 * because ttu takes the rmap data structures down.
966 *
967 * Error handling: We ignore errors here because
968 * there's nothing that can be done.
969 */
970 if (kill)
971 collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED);
972
973 ret = try_to_unmap(hpage, ttu);
974 if (ret != SWAP_SUCCESS)
950 pfn);
951 }
952 }
953
954 /*
955 * First collect all the processes that have the page
956 * mapped in dirty form. This has to be done before try_to_unmap,
957 * because ttu takes the rmap data structures down.
958 *
959 * Error handling: We ignore errors here because
960 * there's nothing that can be done.
961 */
962 if (kill)
963 collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED);
964
965 ret = try_to_unmap(hpage, ttu);
966 if (ret != SWAP_SUCCESS)
975 printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n",
976 pfn, page_mapcount(hpage));
967 pr_err("MCE %#lx: failed to unmap page (mapcount=%d)\n",
968 pfn, page_mapcount(hpage));
977
978 /*
979 * Now that the dirty bit has been propagated to the
980 * struct page and all unmaps done we can decide if
981 * killing is needed or not. Only kill when the page
982 * was dirty or the process is not restartable,
983 * otherwise the tokill list is merely
984 * freed. When there was a problem unmapping earlier

--- 50 unchanged lines hidden (view full) ---

1035 int res;
1036 unsigned int nr_pages;
1037 unsigned long page_flags;
1038
1039 if (!sysctl_memory_failure_recovery)
1040 panic("Memory failure from trap %d on page %lx", trapno, pfn);
1041
1042 if (!pfn_valid(pfn)) {
969
970 /*
971 * Now that the dirty bit has been propagated to the
972 * struct page and all unmaps done we can decide if
973 * killing is needed or not. Only kill when the page
974 * was dirty or the process is not restartable,
975 * otherwise the tokill list is merely
976 * freed. When there was a problem unmapping earlier

--- 50 unchanged lines hidden (view full) ---

1027 int res;
1028 unsigned int nr_pages;
1029 unsigned long page_flags;
1030
1031 if (!sysctl_memory_failure_recovery)
1032 panic("Memory failure from trap %d on page %lx", trapno, pfn);
1033
1034 if (!pfn_valid(pfn)) {
1043 printk(KERN_ERR
1044 "MCE %#lx: memory outside kernel control\n",
1045 pfn);
1035 pr_err("MCE %#lx: memory outside kernel control\n", pfn);
1046 return -ENXIO;
1047 }
1048
1049 p = pfn_to_page(pfn);
1050 orig_head = hpage = compound_head(p);
1051 if (TestSetPageHWPoison(p)) {
1036 return -ENXIO;
1037 }
1038
1039 p = pfn_to_page(pfn);
1040 orig_head = hpage = compound_head(p);
1041 if (TestSetPageHWPoison(p)) {
1052 printk(KERN_ERR "MCE %#lx: already hardware poisoned\n", pfn);
1042 pr_err("MCE %#lx: already hardware poisoned\n", pfn);
1053 return 0;
1054 }
1055
1056 /*
1057 * Currently errors on hugetlbfs pages are measured in hugepage units,
1058 * so nr_pages should be 1 << compound_order. OTOH when errors are on
1059 * transparent hugepages, they are supposed to be split and error
1060 * measurement is done in normal page units. So nr_pages should be one

--- 114 unchanged lines hidden (view full) ---

1175 * correctly, we save a copy of the page flags at this time.
1176 */
1177 page_flags = p->flags;
1178
1179 /*
1180 * unpoison always clear PG_hwpoison inside page lock
1181 */
1182 if (!PageHWPoison(p)) {
1043 return 0;
1044 }
1045
1046 /*
1047 * Currently errors on hugetlbfs pages are measured in hugepage units,
1048 * so nr_pages should be 1 << compound_order. OTOH when errors are on
1049 * transparent hugepages, they are supposed to be split and error
1050 * measurement is done in normal page units. So nr_pages should be one

--- 114 unchanged lines hidden (view full) ---

1165 * correctly, we save a copy of the page flags at this time.
1166 */
1167 page_flags = p->flags;
1168
1169 /*
1170 * unpoison always clear PG_hwpoison inside page lock
1171 */
1172 if (!PageHWPoison(p)) {
1183 printk(KERN_ERR "MCE %#lx: just unpoisoned\n", pfn);
1173 pr_err("MCE %#lx: just unpoisoned\n", pfn);
1184 num_poisoned_pages_sub(nr_pages);
1185 unlock_page(hpage);
1186 put_hwpoison_page(hpage);
1187 return 0;
1188 }
1189 if (hwpoison_filter(p)) {
1190 if (TestClearPageHWPoison(p))
1191 num_poisoned_pages_sub(nr_pages);

--- 581 unchanged lines hidden ---
1174 num_poisoned_pages_sub(nr_pages);
1175 unlock_page(hpage);
1176 put_hwpoison_page(hpage);
1177 return 0;
1178 }
1179 if (hwpoison_filter(p)) {
1180 if (TestClearPageHWPoison(p))
1181 num_poisoned_pages_sub(nr_pages);

--- 581 unchanged lines hidden ---