dax.c (62b31a045757eac81fed94b19df47418a0818528) dax.c (694565356c2e06224d94774a42709cc8dfab49ee)
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * fs/dax.c - Direct Access filesystem code
4 * Copyright (c) 2013-2014 Intel Corporation
5 * Author: Matthew Wilcox <matthew.r.wilcox@intel.com>
6 * Author: Ross Zwisler <ross.zwisler@linux.intel.com>
7 */
8

--- 545 unchanged lines hidden (view full) ---

554 return xa_mk_internal(VM_FAULT_SIGBUS);
555 return entry;
556fallback:
557 xas_unlock_irq(xas);
558 return xa_mk_internal(VM_FAULT_FALLBACK);
559}
560
561/**
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * fs/dax.c - Direct Access filesystem code
4 * Copyright (c) 2013-2014 Intel Corporation
5 * Author: Matthew Wilcox <matthew.r.wilcox@intel.com>
6 * Author: Ross Zwisler <ross.zwisler@linux.intel.com>
7 */
8

--- 545 unchanged lines hidden (view full) ---

554 return xa_mk_internal(VM_FAULT_SIGBUS);
555 return entry;
556fallback:
557 xas_unlock_irq(xas);
558 return xa_mk_internal(VM_FAULT_FALLBACK);
559}
560
561/**
562 * dax_layout_busy_page - find first pinned page in @mapping
562 * dax_layout_busy_page_range - find first pinned page in @mapping
563 * @mapping: address space to scan for a page with ref count > 1
563 * @mapping: address space to scan for a page with ref count > 1
564 * @start: Starting offset. Page containing 'start' is included.
565 * @end: End offset. Page containing 'end' is included. If 'end' is LLONG_MAX,
566 * pages from 'start' till the end of file are included.
564 *
565 * DAX requires ZONE_DEVICE mapped pages. These pages are never
566 * 'onlined' to the page allocator so they are considered idle when
567 * page->count == 1. A filesystem uses this interface to determine if
568 * any page in the mapping is busy, i.e. for DMA, or other
569 * get_user_pages() usages.
570 *
571 * It is expected that the filesystem is holding locks to block the
572 * establishment of new mappings in this address_space. I.e. it expects
573 * to be able to run unmap_mapping_range() and subsequently not race
574 * mapping_mapped() becoming true.
575 */
567 *
568 * DAX requires ZONE_DEVICE mapped pages. These pages are never
569 * 'onlined' to the page allocator so they are considered idle when
570 * page->count == 1. A filesystem uses this interface to determine if
571 * any page in the mapping is busy, i.e. for DMA, or other
572 * get_user_pages() usages.
573 *
574 * It is expected that the filesystem is holding locks to block the
575 * establishment of new mappings in this address_space. I.e. it expects
576 * to be able to run unmap_mapping_range() and subsequently not race
577 * mapping_mapped() becoming true.
578 */
576struct page *dax_layout_busy_page(struct address_space *mapping)
579struct page *dax_layout_busy_page_range(struct address_space *mapping,
580 loff_t start, loff_t end)
577{
581{
578 XA_STATE(xas, &mapping->i_pages, 0);
579 void *entry;
580 unsigned int scanned = 0;
581 struct page *page = NULL;
582 void *entry;
583 unsigned int scanned = 0;
584 struct page *page = NULL;
585 pgoff_t start_idx = start >> PAGE_SHIFT;
586 pgoff_t end_idx;
587 XA_STATE(xas, &mapping->i_pages, start_idx);
582
583 /*
584 * In the 'limited' case get_user_pages() for dax is disabled.
585 */
586 if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
587 return NULL;
588
589 if (!dax_mapping(mapping) || !mapping_mapped(mapping))
590 return NULL;
591
588
589 /*
590 * In the 'limited' case get_user_pages() for dax is disabled.
591 */
592 if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
593 return NULL;
594
595 if (!dax_mapping(mapping) || !mapping_mapped(mapping))
596 return NULL;
597
598 /* If end == LLONG_MAX, all pages from start to till end of file */
599 if (end == LLONG_MAX)
600 end_idx = ULONG_MAX;
601 else
602 end_idx = end >> PAGE_SHIFT;
592 /*
593 * If we race get_user_pages_fast() here either we'll see the
594 * elevated page count in the iteration and wait, or
595 * get_user_pages_fast() will see that the page it took a reference
596 * against is no longer mapped in the page tables and bail to the
597 * get_user_pages() slow path. The slow path is protected by
598 * pte_lock() and pmd_lock(). New references are not taken without
603 /*
604 * If we race get_user_pages_fast() here either we'll see the
605 * elevated page count in the iteration and wait, or
606 * get_user_pages_fast() will see that the page it took a reference
607 * against is no longer mapped in the page tables and bail to the
608 * get_user_pages() slow path. The slow path is protected by
609 * pte_lock() and pmd_lock(). New references are not taken without
599 * holding those locks, and unmap_mapping_range() will not zero the
610 * holding those locks, and unmap_mapping_pages() will not zero the
600 * pte or pmd without holding the respective lock, so we are
601 * guaranteed to either see new references or prevent new
602 * references from being established.
603 */
611 * pte or pmd without holding the respective lock, so we are
612 * guaranteed to either see new references or prevent new
613 * references from being established.
614 */
604 unmap_mapping_range(mapping, 0, 0, 0);
615 unmap_mapping_pages(mapping, start_idx, end_idx - start_idx + 1, 0);
605
606 xas_lock_irq(&xas);
616
617 xas_lock_irq(&xas);
607 xas_for_each(&xas, entry, ULONG_MAX) {
618 xas_for_each(&xas, entry, end_idx) {
608 if (WARN_ON_ONCE(!xa_is_value(entry)))
609 continue;
610 if (unlikely(dax_is_locked(entry)))
611 entry = get_unlocked_entry(&xas, 0);
612 if (entry)
613 page = dax_busy_page(entry);
614 put_unlocked_entry(&xas, entry);
615 if (page)

--- 4 unchanged lines hidden (view full) ---

620 xas_pause(&xas);
621 xas_unlock_irq(&xas);
622 cond_resched();
623 xas_lock_irq(&xas);
624 }
625 xas_unlock_irq(&xas);
626 return page;
627}
619 if (WARN_ON_ONCE(!xa_is_value(entry)))
620 continue;
621 if (unlikely(dax_is_locked(entry)))
622 entry = get_unlocked_entry(&xas, 0);
623 if (entry)
624 page = dax_busy_page(entry);
625 put_unlocked_entry(&xas, entry);
626 if (page)

--- 4 unchanged lines hidden (view full) ---

631 xas_pause(&xas);
632 xas_unlock_irq(&xas);
633 cond_resched();
634 xas_lock_irq(&xas);
635 }
636 xas_unlock_irq(&xas);
637 return page;
638}
639EXPORT_SYMBOL_GPL(dax_layout_busy_page_range);
640
641struct page *dax_layout_busy_page(struct address_space *mapping)
642{
643 return dax_layout_busy_page_range(mapping, 0, LLONG_MAX);
644}
628EXPORT_SYMBOL_GPL(dax_layout_busy_page);
629
630static int __dax_invalidate_entry(struct address_space *mapping,
631 pgoff_t index, bool trunc)
632{
633 XA_STATE(xas, &mapping->i_pages, index);
634 int ret = 0;
635 void *entry;

--- 396 unchanged lines hidden (view full) ---

1032 *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
1033 DAX_ZERO_PAGE, false);
1034
1035 ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
1036 trace_dax_load_hole(inode, vmf, ret);
1037 return ret;
1038}
1039
645EXPORT_SYMBOL_GPL(dax_layout_busy_page);
646
647static int __dax_invalidate_entry(struct address_space *mapping,
648 pgoff_t index, bool trunc)
649{
650 XA_STATE(xas, &mapping->i_pages, index);
651 int ret = 0;
652 void *entry;

--- 396 unchanged lines hidden (view full) ---

1049 *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
1050 DAX_ZERO_PAGE, false);
1051
1052 ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
1053 trace_dax_load_hole(inode, vmf, ret);
1054 return ret;
1055}
1056
1040int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
1041 struct iomap *iomap)
1057s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap)
1042{
1043 sector_t sector = iomap_sector(iomap, pos & PAGE_MASK);
1044 pgoff_t pgoff;
1045 long rc, id;
1046 void *kaddr;
1047 bool page_aligned = false;
1058{
1059 sector_t sector = iomap_sector(iomap, pos & PAGE_MASK);
1060 pgoff_t pgoff;
1061 long rc, id;
1062 void *kaddr;
1063 bool page_aligned = false;
1064 unsigned offset = offset_in_page(pos);
1065 unsigned size = min_t(u64, PAGE_SIZE - offset, length);
1048
1066
1049
1050 if (IS_ALIGNED(sector << SECTOR_SHIFT, PAGE_SIZE) &&
1067 if (IS_ALIGNED(sector << SECTOR_SHIFT, PAGE_SIZE) &&
1051 IS_ALIGNED(size, PAGE_SIZE))
1068 (size == PAGE_SIZE))
1052 page_aligned = true;
1053
1054 rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff);
1055 if (rc)
1056 return rc;
1057
1058 id = dax_read_lock();
1059
1060 if (page_aligned)
1069 page_aligned = true;
1070
1071 rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff);
1072 if (rc)
1073 return rc;
1074
1075 id = dax_read_lock();
1076
1077 if (page_aligned)
1061 rc = dax_zero_page_range(iomap->dax_dev, pgoff,
1062 size >> PAGE_SHIFT);
1078 rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1);
1063 else
1064 rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL);
1065 if (rc < 0) {
1066 dax_read_unlock(id);
1067 return rc;
1068 }
1069
1070 if (!page_aligned) {
1071 memset(kaddr + offset, 0, size);
1072 dax_flush(iomap->dax_dev, kaddr + offset, size);
1073 }
1074 dax_read_unlock(id);
1079 else
1080 rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL);
1081 if (rc < 0) {
1082 dax_read_unlock(id);
1083 return rc;
1084 }
1085
1086 if (!page_aligned) {
1087 memset(kaddr + offset, 0, size);
1088 dax_flush(iomap->dax_dev, kaddr + offset, size);
1089 }
1090 dax_read_unlock(id);
1075 return 0;
1091 return size;
1076}
1077
1078static loff_t
1079dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
1080 struct iomap *iomap, struct iomap *srcmap)
1081{
1082 struct block_device *bdev = iomap->bdev;
1083 struct dax_device *dax_dev = iomap->dax_dev;

--- 633 unchanged lines hidden ---
1092}
1093
1094static loff_t
1095dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
1096 struct iomap *iomap, struct iomap *srcmap)
1097{
1098 struct block_device *bdev = iomap->bdev;
1099 struct dax_device *dax_dev = iomap->dax_dev;

--- 633 unchanged lines hidden ---