1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * mm/fadvise.c 4 * 5 * Copyright (C) 2002, Linus Torvalds 6 * 7 * 11Jan2003 Andrew Morton 8 * Initial version. 9 */ 10 11 #include <linux/kernel.h> 12 #include <linux/file.h> 13 #include <linux/fs.h> 14 #include <linux/mm.h> 15 #include <linux/pagemap.h> 16 #include <linux/backing-dev.h> 17 #include <linux/pagevec.h> 18 #include <linux/fadvise.h> 19 #include <linux/writeback.h> 20 #include <linux/syscalls.h> 21 #include <linux/swap.h> 22 23 #include <asm/unistd.h> 24 25 /* 26 * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could 27 * deactivate the pages and clear PG_Referenced. 28 */ 29 SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) 30 { 31 struct fd f = fdget(fd); 32 struct inode *inode; 33 struct address_space *mapping; 34 struct backing_dev_info *bdi; 35 loff_t endbyte; /* inclusive */ 36 pgoff_t start_index; 37 pgoff_t end_index; 38 unsigned long nrpages; 39 int ret = 0; 40 41 if (!f.file) 42 return -EBADF; 43 44 inode = file_inode(f.file); 45 if (S_ISFIFO(inode->i_mode)) { 46 ret = -ESPIPE; 47 goto out; 48 } 49 50 mapping = f.file->f_mapping; 51 if (!mapping || len < 0) { 52 ret = -EINVAL; 53 goto out; 54 } 55 56 bdi = inode_to_bdi(mapping->host); 57 58 if (IS_DAX(inode) || (bdi == &noop_backing_dev_info)) { 59 switch (advice) { 60 case POSIX_FADV_NORMAL: 61 case POSIX_FADV_RANDOM: 62 case POSIX_FADV_SEQUENTIAL: 63 case POSIX_FADV_WILLNEED: 64 case POSIX_FADV_NOREUSE: 65 case POSIX_FADV_DONTNEED: 66 /* no bad return value, but ignore advice */ 67 break; 68 default: 69 ret = -EINVAL; 70 } 71 goto out; 72 } 73 74 /* Careful about overflows. Len == 0 means "as much as possible" */ 75 endbyte = offset + len; 76 if (!len || endbyte < len) 77 endbyte = -1; 78 else 79 endbyte--; /* inclusive */ 80 81 switch (advice) { 82 case POSIX_FADV_NORMAL: 83 f.file->f_ra.ra_pages = bdi->ra_pages; 84 spin_lock(&f.file->f_lock); 85 f.file->f_mode &= ~FMODE_RANDOM; 86 spin_unlock(&f.file->f_lock); 87 break; 88 case POSIX_FADV_RANDOM: 89 spin_lock(&f.file->f_lock); 90 f.file->f_mode |= FMODE_RANDOM; 91 spin_unlock(&f.file->f_lock); 92 break; 93 case POSIX_FADV_SEQUENTIAL: 94 f.file->f_ra.ra_pages = bdi->ra_pages * 2; 95 spin_lock(&f.file->f_lock); 96 f.file->f_mode &= ~FMODE_RANDOM; 97 spin_unlock(&f.file->f_lock); 98 break; 99 case POSIX_FADV_WILLNEED: 100 /* First and last PARTIAL page! */ 101 start_index = offset >> PAGE_SHIFT; 102 end_index = endbyte >> PAGE_SHIFT; 103 104 /* Careful about overflow on the "+1" */ 105 nrpages = end_index - start_index + 1; 106 if (!nrpages) 107 nrpages = ~0UL; 108 109 /* 110 * Ignore return value because fadvise() shall return 111 * success even if filesystem can't retrieve a hint, 112 */ 113 force_page_cache_readahead(mapping, f.file, start_index, 114 nrpages); 115 break; 116 case POSIX_FADV_NOREUSE: 117 break; 118 case POSIX_FADV_DONTNEED: 119 if (!inode_write_congested(mapping->host)) 120 __filemap_fdatawrite_range(mapping, offset, endbyte, 121 WB_SYNC_NONE); 122 123 /* 124 * First and last FULL page! Partial pages are deliberately 125 * preserved on the expectation that it is better to preserve 126 * needed memory than to discard unneeded memory. 127 */ 128 start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT; 129 end_index = (endbyte >> PAGE_SHIFT); 130 /* 131 * The page at end_index will be inclusively discarded according 132 * by invalidate_mapping_pages(), so subtracting 1 from 133 * end_index means we will skip the last page. But if endbyte 134 * is page aligned or is at the end of file, we should not skip 135 * that page - discarding the last page is safe enough. 136 */ 137 if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK && 138 endbyte != inode->i_size - 1) { 139 /* First page is tricky as 0 - 1 = -1, but pgoff_t 140 * is unsigned, so the end_index >= start_index 141 * check below would be true and we'll discard the whole 142 * file cache which is not what was asked. 143 */ 144 if (end_index == 0) 145 break; 146 147 end_index--; 148 } 149 150 if (end_index >= start_index) { 151 unsigned long count; 152 153 /* 154 * It's common to FADV_DONTNEED right after 155 * the read or write that instantiates the 156 * pages, in which case there will be some 157 * sitting on the local LRU cache. Try to 158 * avoid the expensive remote drain and the 159 * second cache tree walk below by flushing 160 * them out right away. 161 */ 162 lru_add_drain(); 163 164 count = invalidate_mapping_pages(mapping, 165 start_index, end_index); 166 167 /* 168 * If fewer pages were invalidated than expected then 169 * it is possible that some of the pages were on 170 * a per-cpu pagevec for a remote CPU. Drain all 171 * pagevecs and try again. 172 */ 173 if (count < (end_index - start_index + 1)) { 174 lru_add_drain_all(); 175 invalidate_mapping_pages(mapping, start_index, 176 end_index); 177 } 178 } 179 break; 180 default: 181 ret = -EINVAL; 182 } 183 out: 184 fdput(f); 185 return ret; 186 } 187 188 #ifdef __ARCH_WANT_SYS_FADVISE64 189 190 SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice) 191 { 192 return sys_fadvise64_64(fd, offset, len, advice); 193 } 194 195 #endif 196