1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * mm/fadvise.c 4 * 5 * Copyright (C) 2002, Linus Torvalds 6 * 7 * 11Jan2003 Andrew Morton 8 * Initial version. 9 */ 10 11 #include <linux/kernel.h> 12 #include <linux/file.h> 13 #include <linux/fs.h> 14 #include <linux/mm.h> 15 #include <linux/pagemap.h> 16 #include <linux/backing-dev.h> 17 #include <linux/pagevec.h> 18 #include <linux/fadvise.h> 19 #include <linux/writeback.h> 20 #include <linux/syscalls.h> 21 #include <linux/swap.h> 22 23 #include <asm/unistd.h> 24 25 /* 26 * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could 27 * deactivate the pages and clear PG_Referenced. 28 */ 29 30 int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice) 31 { 32 struct fd f = fdget(fd); 33 struct inode *inode; 34 struct address_space *mapping; 35 struct backing_dev_info *bdi; 36 loff_t endbyte; /* inclusive */ 37 pgoff_t start_index; 38 pgoff_t end_index; 39 unsigned long nrpages; 40 int ret = 0; 41 42 if (!f.file) 43 return -EBADF; 44 45 inode = file_inode(f.file); 46 if (S_ISFIFO(inode->i_mode)) { 47 ret = -ESPIPE; 48 goto out; 49 } 50 51 mapping = f.file->f_mapping; 52 if (!mapping || len < 0) { 53 ret = -EINVAL; 54 goto out; 55 } 56 57 bdi = inode_to_bdi(mapping->host); 58 59 if (IS_DAX(inode) || (bdi == &noop_backing_dev_info)) { 60 switch (advice) { 61 case POSIX_FADV_NORMAL: 62 case POSIX_FADV_RANDOM: 63 case POSIX_FADV_SEQUENTIAL: 64 case POSIX_FADV_WILLNEED: 65 case POSIX_FADV_NOREUSE: 66 case POSIX_FADV_DONTNEED: 67 /* no bad return value, but ignore advice */ 68 break; 69 default: 70 ret = -EINVAL; 71 } 72 goto out; 73 } 74 75 /* 76 * Careful about overflows. Len == 0 means "as much as possible". Use 77 * unsigned math because signed overflows are undefined and UBSan 78 * complains. 79 */ 80 endbyte = (u64)offset + (u64)len; 81 if (!len || endbyte < len) 82 endbyte = -1; 83 else 84 endbyte--; /* inclusive */ 85 86 switch (advice) { 87 case POSIX_FADV_NORMAL: 88 f.file->f_ra.ra_pages = bdi->ra_pages; 89 spin_lock(&f.file->f_lock); 90 f.file->f_mode &= ~FMODE_RANDOM; 91 spin_unlock(&f.file->f_lock); 92 break; 93 case POSIX_FADV_RANDOM: 94 spin_lock(&f.file->f_lock); 95 f.file->f_mode |= FMODE_RANDOM; 96 spin_unlock(&f.file->f_lock); 97 break; 98 case POSIX_FADV_SEQUENTIAL: 99 f.file->f_ra.ra_pages = bdi->ra_pages * 2; 100 spin_lock(&f.file->f_lock); 101 f.file->f_mode &= ~FMODE_RANDOM; 102 spin_unlock(&f.file->f_lock); 103 break; 104 case POSIX_FADV_WILLNEED: 105 /* First and last PARTIAL page! */ 106 start_index = offset >> PAGE_SHIFT; 107 end_index = endbyte >> PAGE_SHIFT; 108 109 /* Careful about overflow on the "+1" */ 110 nrpages = end_index - start_index + 1; 111 if (!nrpages) 112 nrpages = ~0UL; 113 114 /* 115 * Ignore return value because fadvise() shall return 116 * success even if filesystem can't retrieve a hint, 117 */ 118 force_page_cache_readahead(mapping, f.file, start_index, 119 nrpages); 120 break; 121 case POSIX_FADV_NOREUSE: 122 break; 123 case POSIX_FADV_DONTNEED: 124 if (!inode_write_congested(mapping->host)) 125 __filemap_fdatawrite_range(mapping, offset, endbyte, 126 WB_SYNC_NONE); 127 128 /* 129 * First and last FULL page! Partial pages are deliberately 130 * preserved on the expectation that it is better to preserve 131 * needed memory than to discard unneeded memory. 132 */ 133 start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT; 134 end_index = (endbyte >> PAGE_SHIFT); 135 /* 136 * The page at end_index will be inclusively discarded according 137 * by invalidate_mapping_pages(), so subtracting 1 from 138 * end_index means we will skip the last page. But if endbyte 139 * is page aligned or is at the end of file, we should not skip 140 * that page - discarding the last page is safe enough. 141 */ 142 if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK && 143 endbyte != inode->i_size - 1) { 144 /* First page is tricky as 0 - 1 = -1, but pgoff_t 145 * is unsigned, so the end_index >= start_index 146 * check below would be true and we'll discard the whole 147 * file cache which is not what was asked. 148 */ 149 if (end_index == 0) 150 break; 151 152 end_index--; 153 } 154 155 if (end_index >= start_index) { 156 unsigned long count; 157 158 /* 159 * It's common to FADV_DONTNEED right after 160 * the read or write that instantiates the 161 * pages, in which case there will be some 162 * sitting on the local LRU cache. Try to 163 * avoid the expensive remote drain and the 164 * second cache tree walk below by flushing 165 * them out right away. 166 */ 167 lru_add_drain(); 168 169 count = invalidate_mapping_pages(mapping, 170 start_index, end_index); 171 172 /* 173 * If fewer pages were invalidated than expected then 174 * it is possible that some of the pages were on 175 * a per-cpu pagevec for a remote CPU. Drain all 176 * pagevecs and try again. 177 */ 178 if (count < (end_index - start_index + 1)) { 179 lru_add_drain_all(); 180 invalidate_mapping_pages(mapping, start_index, 181 end_index); 182 } 183 } 184 break; 185 default: 186 ret = -EINVAL; 187 } 188 out: 189 fdput(f); 190 return ret; 191 } 192 193 SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) 194 { 195 return ksys_fadvise64_64(fd, offset, len, advice); 196 } 197 198 #ifdef __ARCH_WANT_SYS_FADVISE64 199 200 SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice) 201 { 202 return ksys_fadvise64_64(fd, offset, len, advice); 203 } 204 205 #endif 206