1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * This file contains common generic and tag-based KASAN code. 4 * 5 * Copyright (c) 2014 Samsung Electronics Co., Ltd. 6 * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com> 7 * 8 * Some code borrowed from https://github.com/xairy/kasan-prototype by 9 * Andrey Konovalov <andreyknvl@gmail.com> 10 * 11 * This program is free software; you can redistribute it and/or modify 12 * it under the terms of the GNU General Public License version 2 as 13 * published by the Free Software Foundation. 14 * 15 */ 16 17 #include <linux/export.h> 18 #include <linux/interrupt.h> 19 #include <linux/init.h> 20 #include <linux/kasan.h> 21 #include <linux/kernel.h> 22 #include <linux/kmemleak.h> 23 #include <linux/linkage.h> 24 #include <linux/memblock.h> 25 #include <linux/memory.h> 26 #include <linux/mm.h> 27 #include <linux/module.h> 28 #include <linux/printk.h> 29 #include <linux/sched.h> 30 #include <linux/sched/task_stack.h> 31 #include <linux/slab.h> 32 #include <linux/stacktrace.h> 33 #include <linux/string.h> 34 #include <linux/types.h> 35 #include <linux/vmalloc.h> 36 #include <linux/bug.h> 37 #include <linux/uaccess.h> 38 39 #include <asm/cacheflush.h> 40 #include <asm/tlbflush.h> 41 42 #include "kasan.h" 43 #include "../slab.h" 44 45 static inline int in_irqentry_text(unsigned long ptr) 46 { 47 return (ptr >= (unsigned long)&__irqentry_text_start && 48 ptr < (unsigned long)&__irqentry_text_end) || 49 (ptr >= (unsigned long)&__softirqentry_text_start && 50 ptr < (unsigned long)&__softirqentry_text_end); 51 } 52 53 static inline unsigned int filter_irq_stacks(unsigned long *entries, 54 unsigned int nr_entries) 55 { 56 unsigned int i; 57 58 for (i = 0; i < nr_entries; i++) { 59 if (in_irqentry_text(entries[i])) { 60 /* Include the irqentry function into the stack. */ 61 return i + 1; 62 } 63 } 64 return nr_entries; 65 } 66 67 static inline depot_stack_handle_t save_stack(gfp_t flags) 68 { 69 unsigned long entries[KASAN_STACK_DEPTH]; 70 unsigned int nr_entries; 71 72 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0); 73 nr_entries = filter_irq_stacks(entries, nr_entries); 74 return stack_depot_save(entries, nr_entries, flags); 75 } 76 77 static inline void set_track(struct kasan_track *track, gfp_t flags) 78 { 79 track->pid = current->pid; 80 track->stack = save_stack(flags); 81 } 82 83 void kasan_enable_current(void) 84 { 85 current->kasan_depth++; 86 } 87 88 void kasan_disable_current(void) 89 { 90 current->kasan_depth--; 91 } 92 93 bool __kasan_check_read(const volatile void *p, unsigned int size) 94 { 95 return check_memory_region((unsigned long)p, size, false, _RET_IP_); 96 } 97 EXPORT_SYMBOL(__kasan_check_read); 98 99 bool __kasan_check_write(const volatile void *p, unsigned int size) 100 { 101 return check_memory_region((unsigned long)p, size, true, _RET_IP_); 102 } 103 EXPORT_SYMBOL(__kasan_check_write); 104 105 #undef memset 106 void *memset(void *addr, int c, size_t len) 107 { 108 check_memory_region((unsigned long)addr, len, true, _RET_IP_); 109 110 return __memset(addr, c, len); 111 } 112 113 #ifdef __HAVE_ARCH_MEMMOVE 114 #undef memmove 115 void *memmove(void *dest, const void *src, size_t len) 116 { 117 check_memory_region((unsigned long)src, len, false, _RET_IP_); 118 check_memory_region((unsigned long)dest, len, true, _RET_IP_); 119 120 return __memmove(dest, src, len); 121 } 122 #endif 123 124 #undef memcpy 125 void *memcpy(void *dest, const void *src, size_t len) 126 { 127 check_memory_region((unsigned long)src, len, false, _RET_IP_); 128 check_memory_region((unsigned long)dest, len, true, _RET_IP_); 129 130 return __memcpy(dest, src, len); 131 } 132 133 /* 134 * Poisons the shadow memory for 'size' bytes starting from 'addr'. 135 * Memory addresses should be aligned to KASAN_SHADOW_SCALE_SIZE. 136 */ 137 void kasan_poison_shadow(const void *address, size_t size, u8 value) 138 { 139 void *shadow_start, *shadow_end; 140 141 /* 142 * Perform shadow offset calculation based on untagged address, as 143 * some of the callers (e.g. kasan_poison_object_data) pass tagged 144 * addresses to this function. 145 */ 146 address = reset_tag(address); 147 148 shadow_start = kasan_mem_to_shadow(address); 149 shadow_end = kasan_mem_to_shadow(address + size); 150 151 __memset(shadow_start, value, shadow_end - shadow_start); 152 } 153 154 void kasan_unpoison_shadow(const void *address, size_t size) 155 { 156 u8 tag = get_tag(address); 157 158 /* 159 * Perform shadow offset calculation based on untagged address, as 160 * some of the callers (e.g. kasan_unpoison_object_data) pass tagged 161 * addresses to this function. 162 */ 163 address = reset_tag(address); 164 165 kasan_poison_shadow(address, size, tag); 166 167 if (size & KASAN_SHADOW_MASK) { 168 u8 *shadow = (u8 *)kasan_mem_to_shadow(address + size); 169 170 if (IS_ENABLED(CONFIG_KASAN_SW_TAGS)) 171 *shadow = tag; 172 else 173 *shadow = size & KASAN_SHADOW_MASK; 174 } 175 } 176 177 static void __kasan_unpoison_stack(struct task_struct *task, const void *sp) 178 { 179 void *base = task_stack_page(task); 180 size_t size = sp - base; 181 182 kasan_unpoison_shadow(base, size); 183 } 184 185 /* Unpoison the entire stack for a task. */ 186 void kasan_unpoison_task_stack(struct task_struct *task) 187 { 188 __kasan_unpoison_stack(task, task_stack_page(task) + THREAD_SIZE); 189 } 190 191 /* Unpoison the stack for the current task beyond a watermark sp value. */ 192 asmlinkage void kasan_unpoison_task_stack_below(const void *watermark) 193 { 194 /* 195 * Calculate the task stack base address. Avoid using 'current' 196 * because this function is called by early resume code which hasn't 197 * yet set up the percpu register (%gs). 198 */ 199 void *base = (void *)((unsigned long)watermark & ~(THREAD_SIZE - 1)); 200 201 kasan_unpoison_shadow(base, watermark - base); 202 } 203 204 /* 205 * Clear all poison for the region between the current SP and a provided 206 * watermark value, as is sometimes required prior to hand-crafted asm function 207 * returns in the middle of functions. 208 */ 209 void kasan_unpoison_stack_above_sp_to(const void *watermark) 210 { 211 const void *sp = __builtin_frame_address(0); 212 size_t size = watermark - sp; 213 214 if (WARN_ON(sp > watermark)) 215 return; 216 kasan_unpoison_shadow(sp, size); 217 } 218 219 void kasan_alloc_pages(struct page *page, unsigned int order) 220 { 221 u8 tag; 222 unsigned long i; 223 224 if (unlikely(PageHighMem(page))) 225 return; 226 227 tag = random_tag(); 228 for (i = 0; i < (1 << order); i++) 229 page_kasan_tag_set(page + i, tag); 230 kasan_unpoison_shadow(page_address(page), PAGE_SIZE << order); 231 } 232 233 void kasan_free_pages(struct page *page, unsigned int order) 234 { 235 if (likely(!PageHighMem(page))) 236 kasan_poison_shadow(page_address(page), 237 PAGE_SIZE << order, 238 KASAN_FREE_PAGE); 239 } 240 241 /* 242 * Adaptive redzone policy taken from the userspace AddressSanitizer runtime. 243 * For larger allocations larger redzones are used. 244 */ 245 static inline unsigned int optimal_redzone(unsigned int object_size) 246 { 247 if (IS_ENABLED(CONFIG_KASAN_SW_TAGS)) 248 return 0; 249 250 return 251 object_size <= 64 - 16 ? 16 : 252 object_size <= 128 - 32 ? 32 : 253 object_size <= 512 - 64 ? 64 : 254 object_size <= 4096 - 128 ? 128 : 255 object_size <= (1 << 14) - 256 ? 256 : 256 object_size <= (1 << 15) - 512 ? 512 : 257 object_size <= (1 << 16) - 1024 ? 1024 : 2048; 258 } 259 260 void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, 261 slab_flags_t *flags) 262 { 263 unsigned int orig_size = *size; 264 unsigned int redzone_size; 265 int redzone_adjust; 266 267 /* Add alloc meta. */ 268 cache->kasan_info.alloc_meta_offset = *size; 269 *size += sizeof(struct kasan_alloc_meta); 270 271 /* Add free meta. */ 272 if (IS_ENABLED(CONFIG_KASAN_GENERIC) && 273 (cache->flags & SLAB_TYPESAFE_BY_RCU || cache->ctor || 274 cache->object_size < sizeof(struct kasan_free_meta))) { 275 cache->kasan_info.free_meta_offset = *size; 276 *size += sizeof(struct kasan_free_meta); 277 } 278 279 redzone_size = optimal_redzone(cache->object_size); 280 redzone_adjust = redzone_size - (*size - cache->object_size); 281 if (redzone_adjust > 0) 282 *size += redzone_adjust; 283 284 *size = min_t(unsigned int, KMALLOC_MAX_SIZE, 285 max(*size, cache->object_size + redzone_size)); 286 287 /* 288 * If the metadata doesn't fit, don't enable KASAN at all. 289 */ 290 if (*size <= cache->kasan_info.alloc_meta_offset || 291 *size <= cache->kasan_info.free_meta_offset) { 292 cache->kasan_info.alloc_meta_offset = 0; 293 cache->kasan_info.free_meta_offset = 0; 294 *size = orig_size; 295 return; 296 } 297 298 *flags |= SLAB_KASAN; 299 } 300 301 size_t kasan_metadata_size(struct kmem_cache *cache) 302 { 303 return (cache->kasan_info.alloc_meta_offset ? 304 sizeof(struct kasan_alloc_meta) : 0) + 305 (cache->kasan_info.free_meta_offset ? 306 sizeof(struct kasan_free_meta) : 0); 307 } 308 309 struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache, 310 const void *object) 311 { 312 return (void *)object + cache->kasan_info.alloc_meta_offset; 313 } 314 315 struct kasan_free_meta *get_free_info(struct kmem_cache *cache, 316 const void *object) 317 { 318 BUILD_BUG_ON(sizeof(struct kasan_free_meta) > 32); 319 return (void *)object + cache->kasan_info.free_meta_offset; 320 } 321 322 323 static void kasan_set_free_info(struct kmem_cache *cache, 324 void *object, u8 tag) 325 { 326 struct kasan_alloc_meta *alloc_meta; 327 u8 idx = 0; 328 329 alloc_meta = get_alloc_info(cache, object); 330 331 #ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY 332 idx = alloc_meta->free_track_idx; 333 alloc_meta->free_pointer_tag[idx] = tag; 334 alloc_meta->free_track_idx = (idx + 1) % KASAN_NR_FREE_STACKS; 335 #endif 336 337 set_track(&alloc_meta->free_track[idx], GFP_NOWAIT); 338 } 339 340 void kasan_poison_slab(struct page *page) 341 { 342 unsigned long i; 343 344 for (i = 0; i < compound_nr(page); i++) 345 page_kasan_tag_reset(page + i); 346 kasan_poison_shadow(page_address(page), page_size(page), 347 KASAN_KMALLOC_REDZONE); 348 } 349 350 void kasan_unpoison_object_data(struct kmem_cache *cache, void *object) 351 { 352 kasan_unpoison_shadow(object, cache->object_size); 353 } 354 355 void kasan_poison_object_data(struct kmem_cache *cache, void *object) 356 { 357 kasan_poison_shadow(object, 358 round_up(cache->object_size, KASAN_SHADOW_SCALE_SIZE), 359 KASAN_KMALLOC_REDZONE); 360 } 361 362 /* 363 * This function assigns a tag to an object considering the following: 364 * 1. A cache might have a constructor, which might save a pointer to a slab 365 * object somewhere (e.g. in the object itself). We preassign a tag for 366 * each object in caches with constructors during slab creation and reuse 367 * the same tag each time a particular object is allocated. 368 * 2. A cache might be SLAB_TYPESAFE_BY_RCU, which means objects can be 369 * accessed after being freed. We preassign tags for objects in these 370 * caches as well. 371 * 3. For SLAB allocator we can't preassign tags randomly since the freelist 372 * is stored as an array of indexes instead of a linked list. Assign tags 373 * based on objects indexes, so that objects that are next to each other 374 * get different tags. 375 */ 376 static u8 assign_tag(struct kmem_cache *cache, const void *object, 377 bool init, bool keep_tag) 378 { 379 /* 380 * 1. When an object is kmalloc()'ed, two hooks are called: 381 * kasan_slab_alloc() and kasan_kmalloc(). We assign the 382 * tag only in the first one. 383 * 2. We reuse the same tag for krealloc'ed objects. 384 */ 385 if (keep_tag) 386 return get_tag(object); 387 388 /* 389 * If the cache neither has a constructor nor has SLAB_TYPESAFE_BY_RCU 390 * set, assign a tag when the object is being allocated (init == false). 391 */ 392 if (!cache->ctor && !(cache->flags & SLAB_TYPESAFE_BY_RCU)) 393 return init ? KASAN_TAG_KERNEL : random_tag(); 394 395 /* For caches that either have a constructor or SLAB_TYPESAFE_BY_RCU: */ 396 #ifdef CONFIG_SLAB 397 /* For SLAB assign tags based on the object index in the freelist. */ 398 return (u8)obj_to_index(cache, virt_to_page(object), (void *)object); 399 #else 400 /* 401 * For SLUB assign a random tag during slab creation, otherwise reuse 402 * the already assigned tag. 403 */ 404 return init ? random_tag() : get_tag(object); 405 #endif 406 } 407 408 void * __must_check kasan_init_slab_obj(struct kmem_cache *cache, 409 const void *object) 410 { 411 struct kasan_alloc_meta *alloc_info; 412 413 if (!(cache->flags & SLAB_KASAN)) 414 return (void *)object; 415 416 alloc_info = get_alloc_info(cache, object); 417 __memset(alloc_info, 0, sizeof(*alloc_info)); 418 419 if (IS_ENABLED(CONFIG_KASAN_SW_TAGS)) 420 object = set_tag(object, 421 assign_tag(cache, object, true, false)); 422 423 return (void *)object; 424 } 425 426 static inline bool shadow_invalid(u8 tag, s8 shadow_byte) 427 { 428 if (IS_ENABLED(CONFIG_KASAN_GENERIC)) 429 return shadow_byte < 0 || 430 shadow_byte >= KASAN_SHADOW_SCALE_SIZE; 431 432 /* else CONFIG_KASAN_SW_TAGS: */ 433 if ((u8)shadow_byte == KASAN_TAG_INVALID) 434 return true; 435 if ((tag != KASAN_TAG_KERNEL) && (tag != (u8)shadow_byte)) 436 return true; 437 438 return false; 439 } 440 441 static bool __kasan_slab_free(struct kmem_cache *cache, void *object, 442 unsigned long ip, bool quarantine) 443 { 444 s8 shadow_byte; 445 u8 tag; 446 void *tagged_object; 447 unsigned long rounded_up_size; 448 449 tag = get_tag(object); 450 tagged_object = object; 451 object = reset_tag(object); 452 453 if (unlikely(nearest_obj(cache, virt_to_head_page(object), object) != 454 object)) { 455 kasan_report_invalid_free(tagged_object, ip); 456 return true; 457 } 458 459 /* RCU slabs could be legally used after free within the RCU period */ 460 if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU)) 461 return false; 462 463 shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(object)); 464 if (shadow_invalid(tag, shadow_byte)) { 465 kasan_report_invalid_free(tagged_object, ip); 466 return true; 467 } 468 469 rounded_up_size = round_up(cache->object_size, KASAN_SHADOW_SCALE_SIZE); 470 kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE); 471 472 if ((IS_ENABLED(CONFIG_KASAN_GENERIC) && !quarantine) || 473 unlikely(!(cache->flags & SLAB_KASAN))) 474 return false; 475 476 kasan_set_free_info(cache, object, tag); 477 478 quarantine_put(get_free_info(cache, object), cache); 479 480 return IS_ENABLED(CONFIG_KASAN_GENERIC); 481 } 482 483 bool kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip) 484 { 485 return __kasan_slab_free(cache, object, ip, true); 486 } 487 488 static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object, 489 size_t size, gfp_t flags, bool keep_tag) 490 { 491 unsigned long redzone_start; 492 unsigned long redzone_end; 493 u8 tag = 0xff; 494 495 if (gfpflags_allow_blocking(flags)) 496 quarantine_reduce(); 497 498 if (unlikely(object == NULL)) 499 return NULL; 500 501 redzone_start = round_up((unsigned long)(object + size), 502 KASAN_SHADOW_SCALE_SIZE); 503 redzone_end = round_up((unsigned long)object + cache->object_size, 504 KASAN_SHADOW_SCALE_SIZE); 505 506 if (IS_ENABLED(CONFIG_KASAN_SW_TAGS)) 507 tag = assign_tag(cache, object, false, keep_tag); 508 509 /* Tag is ignored in set_tag without CONFIG_KASAN_SW_TAGS */ 510 kasan_unpoison_shadow(set_tag(object, tag), size); 511 kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start, 512 KASAN_KMALLOC_REDZONE); 513 514 if (cache->flags & SLAB_KASAN) 515 set_track(&get_alloc_info(cache, object)->alloc_track, flags); 516 517 return set_tag(object, tag); 518 } 519 520 void * __must_check kasan_slab_alloc(struct kmem_cache *cache, void *object, 521 gfp_t flags) 522 { 523 return __kasan_kmalloc(cache, object, cache->object_size, flags, false); 524 } 525 526 void * __must_check kasan_kmalloc(struct kmem_cache *cache, const void *object, 527 size_t size, gfp_t flags) 528 { 529 return __kasan_kmalloc(cache, object, size, flags, true); 530 } 531 EXPORT_SYMBOL(kasan_kmalloc); 532 533 void * __must_check kasan_kmalloc_large(const void *ptr, size_t size, 534 gfp_t flags) 535 { 536 struct page *page; 537 unsigned long redzone_start; 538 unsigned long redzone_end; 539 540 if (gfpflags_allow_blocking(flags)) 541 quarantine_reduce(); 542 543 if (unlikely(ptr == NULL)) 544 return NULL; 545 546 page = virt_to_page(ptr); 547 redzone_start = round_up((unsigned long)(ptr + size), 548 KASAN_SHADOW_SCALE_SIZE); 549 redzone_end = (unsigned long)ptr + page_size(page); 550 551 kasan_unpoison_shadow(ptr, size); 552 kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start, 553 KASAN_PAGE_REDZONE); 554 555 return (void *)ptr; 556 } 557 558 void * __must_check kasan_krealloc(const void *object, size_t size, gfp_t flags) 559 { 560 struct page *page; 561 562 if (unlikely(object == ZERO_SIZE_PTR)) 563 return (void *)object; 564 565 page = virt_to_head_page(object); 566 567 if (unlikely(!PageSlab(page))) 568 return kasan_kmalloc_large(object, size, flags); 569 else 570 return __kasan_kmalloc(page->slab_cache, object, size, 571 flags, true); 572 } 573 574 void kasan_poison_kfree(void *ptr, unsigned long ip) 575 { 576 struct page *page; 577 578 page = virt_to_head_page(ptr); 579 580 if (unlikely(!PageSlab(page))) { 581 if (ptr != page_address(page)) { 582 kasan_report_invalid_free(ptr, ip); 583 return; 584 } 585 kasan_poison_shadow(ptr, page_size(page), KASAN_FREE_PAGE); 586 } else { 587 __kasan_slab_free(page->slab_cache, ptr, ip, false); 588 } 589 } 590 591 void kasan_kfree_large(void *ptr, unsigned long ip) 592 { 593 if (ptr != page_address(virt_to_head_page(ptr))) 594 kasan_report_invalid_free(ptr, ip); 595 /* The object will be poisoned by page_alloc. */ 596 } 597 598 #ifndef CONFIG_KASAN_VMALLOC 599 int kasan_module_alloc(void *addr, size_t size) 600 { 601 void *ret; 602 size_t scaled_size; 603 size_t shadow_size; 604 unsigned long shadow_start; 605 606 shadow_start = (unsigned long)kasan_mem_to_shadow(addr); 607 scaled_size = (size + KASAN_SHADOW_MASK) >> KASAN_SHADOW_SCALE_SHIFT; 608 shadow_size = round_up(scaled_size, PAGE_SIZE); 609 610 if (WARN_ON(!PAGE_ALIGNED(shadow_start))) 611 return -EINVAL; 612 613 ret = __vmalloc_node_range(shadow_size, 1, shadow_start, 614 shadow_start + shadow_size, 615 GFP_KERNEL, 616 PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE, 617 __builtin_return_address(0)); 618 619 if (ret) { 620 __memset(ret, KASAN_SHADOW_INIT, shadow_size); 621 find_vm_area(addr)->flags |= VM_KASAN; 622 kmemleak_ignore(ret); 623 return 0; 624 } 625 626 return -ENOMEM; 627 } 628 629 void kasan_free_shadow(const struct vm_struct *vm) 630 { 631 if (vm->flags & VM_KASAN) 632 vfree(kasan_mem_to_shadow(vm->addr)); 633 } 634 #endif 635 636 extern void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); 637 638 void kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip) 639 { 640 unsigned long flags = user_access_save(); 641 __kasan_report(addr, size, is_write, ip); 642 user_access_restore(flags); 643 } 644 645 #ifdef CONFIG_MEMORY_HOTPLUG 646 static bool shadow_mapped(unsigned long addr) 647 { 648 pgd_t *pgd = pgd_offset_k(addr); 649 p4d_t *p4d; 650 pud_t *pud; 651 pmd_t *pmd; 652 pte_t *pte; 653 654 if (pgd_none(*pgd)) 655 return false; 656 p4d = p4d_offset(pgd, addr); 657 if (p4d_none(*p4d)) 658 return false; 659 pud = pud_offset(p4d, addr); 660 if (pud_none(*pud)) 661 return false; 662 663 /* 664 * We can't use pud_large() or pud_huge(), the first one is 665 * arch-specific, the last one depends on HUGETLB_PAGE. So let's abuse 666 * pud_bad(), if pud is bad then it's bad because it's huge. 667 */ 668 if (pud_bad(*pud)) 669 return true; 670 pmd = pmd_offset(pud, addr); 671 if (pmd_none(*pmd)) 672 return false; 673 674 if (pmd_bad(*pmd)) 675 return true; 676 pte = pte_offset_kernel(pmd, addr); 677 return !pte_none(*pte); 678 } 679 680 static int __meminit kasan_mem_notifier(struct notifier_block *nb, 681 unsigned long action, void *data) 682 { 683 struct memory_notify *mem_data = data; 684 unsigned long nr_shadow_pages, start_kaddr, shadow_start; 685 unsigned long shadow_end, shadow_size; 686 687 nr_shadow_pages = mem_data->nr_pages >> KASAN_SHADOW_SCALE_SHIFT; 688 start_kaddr = (unsigned long)pfn_to_kaddr(mem_data->start_pfn); 689 shadow_start = (unsigned long)kasan_mem_to_shadow((void *)start_kaddr); 690 shadow_size = nr_shadow_pages << PAGE_SHIFT; 691 shadow_end = shadow_start + shadow_size; 692 693 if (WARN_ON(mem_data->nr_pages % KASAN_SHADOW_SCALE_SIZE) || 694 WARN_ON(start_kaddr % (KASAN_SHADOW_SCALE_SIZE << PAGE_SHIFT))) 695 return NOTIFY_BAD; 696 697 switch (action) { 698 case MEM_GOING_ONLINE: { 699 void *ret; 700 701 /* 702 * If shadow is mapped already than it must have been mapped 703 * during the boot. This could happen if we onlining previously 704 * offlined memory. 705 */ 706 if (shadow_mapped(shadow_start)) 707 return NOTIFY_OK; 708 709 ret = __vmalloc_node_range(shadow_size, PAGE_SIZE, shadow_start, 710 shadow_end, GFP_KERNEL, 711 PAGE_KERNEL, VM_NO_GUARD, 712 pfn_to_nid(mem_data->start_pfn), 713 __builtin_return_address(0)); 714 if (!ret) 715 return NOTIFY_BAD; 716 717 kmemleak_ignore(ret); 718 return NOTIFY_OK; 719 } 720 case MEM_CANCEL_ONLINE: 721 case MEM_OFFLINE: { 722 struct vm_struct *vm; 723 724 /* 725 * shadow_start was either mapped during boot by kasan_init() 726 * or during memory online by __vmalloc_node_range(). 727 * In the latter case we can use vfree() to free shadow. 728 * Non-NULL result of the find_vm_area() will tell us if 729 * that was the second case. 730 * 731 * Currently it's not possible to free shadow mapped 732 * during boot by kasan_init(). It's because the code 733 * to do that hasn't been written yet. So we'll just 734 * leak the memory. 735 */ 736 vm = find_vm_area((void *)shadow_start); 737 if (vm) 738 vfree((void *)shadow_start); 739 } 740 } 741 742 return NOTIFY_OK; 743 } 744 745 static int __init kasan_memhotplug_init(void) 746 { 747 hotplug_memory_notifier(kasan_mem_notifier, 0); 748 749 return 0; 750 } 751 752 core_initcall(kasan_memhotplug_init); 753 #endif 754 755 #ifdef CONFIG_KASAN_VMALLOC 756 static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr, 757 void *unused) 758 { 759 unsigned long page; 760 pte_t pte; 761 762 if (likely(!pte_none(*ptep))) 763 return 0; 764 765 page = __get_free_page(GFP_KERNEL); 766 if (!page) 767 return -ENOMEM; 768 769 memset((void *)page, KASAN_VMALLOC_INVALID, PAGE_SIZE); 770 pte = pfn_pte(PFN_DOWN(__pa(page)), PAGE_KERNEL); 771 772 spin_lock(&init_mm.page_table_lock); 773 if (likely(pte_none(*ptep))) { 774 set_pte_at(&init_mm, addr, ptep, pte); 775 page = 0; 776 } 777 spin_unlock(&init_mm.page_table_lock); 778 if (page) 779 free_page(page); 780 return 0; 781 } 782 783 int kasan_populate_vmalloc(unsigned long addr, unsigned long size) 784 { 785 unsigned long shadow_start, shadow_end; 786 int ret; 787 788 if (!is_vmalloc_or_module_addr((void *)addr)) 789 return 0; 790 791 shadow_start = (unsigned long)kasan_mem_to_shadow((void *)addr); 792 shadow_start = ALIGN_DOWN(shadow_start, PAGE_SIZE); 793 shadow_end = (unsigned long)kasan_mem_to_shadow((void *)addr + size); 794 shadow_end = ALIGN(shadow_end, PAGE_SIZE); 795 796 ret = apply_to_page_range(&init_mm, shadow_start, 797 shadow_end - shadow_start, 798 kasan_populate_vmalloc_pte, NULL); 799 if (ret) 800 return ret; 801 802 flush_cache_vmap(shadow_start, shadow_end); 803 804 /* 805 * We need to be careful about inter-cpu effects here. Consider: 806 * 807 * CPU#0 CPU#1 808 * WRITE_ONCE(p, vmalloc(100)); while (x = READ_ONCE(p)) ; 809 * p[99] = 1; 810 * 811 * With compiler instrumentation, that ends up looking like this: 812 * 813 * CPU#0 CPU#1 814 * // vmalloc() allocates memory 815 * // let a = area->addr 816 * // we reach kasan_populate_vmalloc 817 * // and call kasan_unpoison_shadow: 818 * STORE shadow(a), unpoison_val 819 * ... 820 * STORE shadow(a+99), unpoison_val x = LOAD p 821 * // rest of vmalloc process <data dependency> 822 * STORE p, a LOAD shadow(x+99) 823 * 824 * If there is no barrier between the end of unpoisioning the shadow 825 * and the store of the result to p, the stores could be committed 826 * in a different order by CPU#0, and CPU#1 could erroneously observe 827 * poison in the shadow. 828 * 829 * We need some sort of barrier between the stores. 830 * 831 * In the vmalloc() case, this is provided by a smp_wmb() in 832 * clear_vm_uninitialized_flag(). In the per-cpu allocator and in 833 * get_vm_area() and friends, the caller gets shadow allocated but 834 * doesn't have any pages mapped into the virtual address space that 835 * has been reserved. Mapping those pages in will involve taking and 836 * releasing a page-table lock, which will provide the barrier. 837 */ 838 839 return 0; 840 } 841 842 /* 843 * Poison the shadow for a vmalloc region. Called as part of the 844 * freeing process at the time the region is freed. 845 */ 846 void kasan_poison_vmalloc(const void *start, unsigned long size) 847 { 848 if (!is_vmalloc_or_module_addr(start)) 849 return; 850 851 size = round_up(size, KASAN_SHADOW_SCALE_SIZE); 852 kasan_poison_shadow(start, size, KASAN_VMALLOC_INVALID); 853 } 854 855 void kasan_unpoison_vmalloc(const void *start, unsigned long size) 856 { 857 if (!is_vmalloc_or_module_addr(start)) 858 return; 859 860 kasan_unpoison_shadow(start, size); 861 } 862 863 static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr, 864 void *unused) 865 { 866 unsigned long page; 867 868 page = (unsigned long)__va(pte_pfn(*ptep) << PAGE_SHIFT); 869 870 spin_lock(&init_mm.page_table_lock); 871 872 if (likely(!pte_none(*ptep))) { 873 pte_clear(&init_mm, addr, ptep); 874 free_page(page); 875 } 876 spin_unlock(&init_mm.page_table_lock); 877 878 return 0; 879 } 880 881 /* 882 * Release the backing for the vmalloc region [start, end), which 883 * lies within the free region [free_region_start, free_region_end). 884 * 885 * This can be run lazily, long after the region was freed. It runs 886 * under vmap_area_lock, so it's not safe to interact with the vmalloc/vmap 887 * infrastructure. 888 * 889 * How does this work? 890 * ------------------- 891 * 892 * We have a region that is page aligned, labelled as A. 893 * That might not map onto the shadow in a way that is page-aligned: 894 * 895 * start end 896 * v v 897 * |????????|????????|AAAAAAAA|AA....AA|AAAAAAAA|????????| < vmalloc 898 * -------- -------- -------- -------- -------- 899 * | | | | | 900 * | | | /-------/ | 901 * \-------\|/------/ |/---------------/ 902 * ||| || 903 * |??AAAAAA|AAAAAAAA|AA??????| < shadow 904 * (1) (2) (3) 905 * 906 * First we align the start upwards and the end downwards, so that the 907 * shadow of the region aligns with shadow page boundaries. In the 908 * example, this gives us the shadow page (2). This is the shadow entirely 909 * covered by this allocation. 910 * 911 * Then we have the tricky bits. We want to know if we can free the 912 * partially covered shadow pages - (1) and (3) in the example. For this, 913 * we are given the start and end of the free region that contains this 914 * allocation. Extending our previous example, we could have: 915 * 916 * free_region_start free_region_end 917 * | start end | 918 * v v v v 919 * |FFFFFFFF|FFFFFFFF|AAAAAAAA|AA....AA|AAAAAAAA|FFFFFFFF| < vmalloc 920 * -------- -------- -------- -------- -------- 921 * | | | | | 922 * | | | /-------/ | 923 * \-------\|/------/ |/---------------/ 924 * ||| || 925 * |FFAAAAAA|AAAAAAAA|AAF?????| < shadow 926 * (1) (2) (3) 927 * 928 * Once again, we align the start of the free region up, and the end of 929 * the free region down so that the shadow is page aligned. So we can free 930 * page (1) - we know no allocation currently uses anything in that page, 931 * because all of it is in the vmalloc free region. But we cannot free 932 * page (3), because we can't be sure that the rest of it is unused. 933 * 934 * We only consider pages that contain part of the original region for 935 * freeing: we don't try to free other pages from the free region or we'd 936 * end up trying to free huge chunks of virtual address space. 937 * 938 * Concurrency 939 * ----------- 940 * 941 * How do we know that we're not freeing a page that is simultaneously 942 * being used for a fresh allocation in kasan_populate_vmalloc(_pte)? 943 * 944 * We _can_ have kasan_release_vmalloc and kasan_populate_vmalloc running 945 * at the same time. While we run under free_vmap_area_lock, the population 946 * code does not. 947 * 948 * free_vmap_area_lock instead operates to ensure that the larger range 949 * [free_region_start, free_region_end) is safe: because __alloc_vmap_area and 950 * the per-cpu region-finding algorithm both run under free_vmap_area_lock, 951 * no space identified as free will become used while we are running. This 952 * means that so long as we are careful with alignment and only free shadow 953 * pages entirely covered by the free region, we will not run in to any 954 * trouble - any simultaneous allocations will be for disjoint regions. 955 */ 956 void kasan_release_vmalloc(unsigned long start, unsigned long end, 957 unsigned long free_region_start, 958 unsigned long free_region_end) 959 { 960 void *shadow_start, *shadow_end; 961 unsigned long region_start, region_end; 962 unsigned long size; 963 964 region_start = ALIGN(start, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE); 965 region_end = ALIGN_DOWN(end, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE); 966 967 free_region_start = ALIGN(free_region_start, 968 PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE); 969 970 if (start != region_start && 971 free_region_start < region_start) 972 region_start -= PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE; 973 974 free_region_end = ALIGN_DOWN(free_region_end, 975 PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE); 976 977 if (end != region_end && 978 free_region_end > region_end) 979 region_end += PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE; 980 981 shadow_start = kasan_mem_to_shadow((void *)region_start); 982 shadow_end = kasan_mem_to_shadow((void *)region_end); 983 984 if (shadow_end > shadow_start) { 985 size = shadow_end - shadow_start; 986 apply_to_existing_page_range(&init_mm, 987 (unsigned long)shadow_start, 988 size, kasan_depopulate_vmalloc_pte, 989 NULL); 990 flush_tlb_kernel_range((unsigned long)shadow_start, 991 (unsigned long)shadow_end); 992 } 993 } 994 #endif 995