1 /* 2 * mm/fadvise.c 3 * 4 * Copyright (C) 2002, Linus Torvalds 5 * 6 * 11Jan2003 Andrew Morton 7 * Initial version. 8 */ 9 10 #include <linux/kernel.h> 11 #include <linux/file.h> 12 #include <linux/fs.h> 13 #include <linux/mm.h> 14 #include <linux/pagemap.h> 15 #include <linux/backing-dev.h> 16 #include <linux/pagevec.h> 17 #include <linux/fadvise.h> 18 #include <linux/writeback.h> 19 #include <linux/syscalls.h> 20 #include <linux/swap.h> 21 22 #include <asm/unistd.h> 23 24 /* 25 * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could 26 * deactivate the pages and clear PG_Referenced. 27 */ 28 SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) 29 { 30 struct fd f = fdget(fd); 31 struct inode *inode; 32 struct address_space *mapping; 33 struct backing_dev_info *bdi; 34 loff_t endbyte; /* inclusive */ 35 pgoff_t start_index; 36 pgoff_t end_index; 37 unsigned long nrpages; 38 int ret = 0; 39 40 if (!f.file) 41 return -EBADF; 42 43 inode = file_inode(f.file); 44 if (S_ISFIFO(inode->i_mode)) { 45 ret = -ESPIPE; 46 goto out; 47 } 48 49 mapping = f.file->f_mapping; 50 if (!mapping || len < 0) { 51 ret = -EINVAL; 52 goto out; 53 } 54 55 bdi = inode_to_bdi(mapping->host); 56 57 if (IS_DAX(inode) || (bdi == &noop_backing_dev_info)) { 58 switch (advice) { 59 case POSIX_FADV_NORMAL: 60 case POSIX_FADV_RANDOM: 61 case POSIX_FADV_SEQUENTIAL: 62 case POSIX_FADV_WILLNEED: 63 case POSIX_FADV_NOREUSE: 64 case POSIX_FADV_DONTNEED: 65 /* no bad return value, but ignore advice */ 66 break; 67 default: 68 ret = -EINVAL; 69 } 70 goto out; 71 } 72 73 /* Careful about overflows. Len == 0 means "as much as possible" */ 74 endbyte = offset + len; 75 if (!len || endbyte < len) 76 endbyte = -1; 77 else 78 endbyte--; /* inclusive */ 79 80 switch (advice) { 81 case POSIX_FADV_NORMAL: 82 f.file->f_ra.ra_pages = bdi->ra_pages; 83 spin_lock(&f.file->f_lock); 84 f.file->f_mode &= ~FMODE_RANDOM; 85 spin_unlock(&f.file->f_lock); 86 break; 87 case POSIX_FADV_RANDOM: 88 spin_lock(&f.file->f_lock); 89 f.file->f_mode |= FMODE_RANDOM; 90 spin_unlock(&f.file->f_lock); 91 break; 92 case POSIX_FADV_SEQUENTIAL: 93 f.file->f_ra.ra_pages = bdi->ra_pages * 2; 94 spin_lock(&f.file->f_lock); 95 f.file->f_mode &= ~FMODE_RANDOM; 96 spin_unlock(&f.file->f_lock); 97 break; 98 case POSIX_FADV_WILLNEED: 99 /* First and last PARTIAL page! */ 100 start_index = offset >> PAGE_SHIFT; 101 end_index = endbyte >> PAGE_SHIFT; 102 103 /* Careful about overflow on the "+1" */ 104 nrpages = end_index - start_index + 1; 105 if (!nrpages) 106 nrpages = ~0UL; 107 108 /* 109 * Ignore return value because fadvise() shall return 110 * success even if filesystem can't retrieve a hint, 111 */ 112 force_page_cache_readahead(mapping, f.file, start_index, 113 nrpages); 114 break; 115 case POSIX_FADV_NOREUSE: 116 break; 117 case POSIX_FADV_DONTNEED: 118 if (!inode_write_congested(mapping->host)) 119 __filemap_fdatawrite_range(mapping, offset, endbyte, 120 WB_SYNC_NONE); 121 122 /* 123 * First and last FULL page! Partial pages are deliberately 124 * preserved on the expectation that it is better to preserve 125 * needed memory than to discard unneeded memory. 126 */ 127 start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT; 128 end_index = (endbyte >> PAGE_SHIFT); 129 if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK) { 130 /* First page is tricky as 0 - 1 = -1, but pgoff_t 131 * is unsigned, so the end_index >= start_index 132 * check below would be true and we'll discard the whole 133 * file cache which is not what was asked. 134 */ 135 if (end_index == 0) 136 break; 137 138 end_index--; 139 } 140 141 if (end_index >= start_index) { 142 unsigned long count; 143 144 /* 145 * It's common to FADV_DONTNEED right after 146 * the read or write that instantiates the 147 * pages, in which case there will be some 148 * sitting on the local LRU cache. Try to 149 * avoid the expensive remote drain and the 150 * second cache tree walk below by flushing 151 * them out right away. 152 */ 153 lru_add_drain(); 154 155 count = invalidate_mapping_pages(mapping, 156 start_index, end_index); 157 158 /* 159 * If fewer pages were invalidated than expected then 160 * it is possible that some of the pages were on 161 * a per-cpu pagevec for a remote CPU. Drain all 162 * pagevecs and try again. 163 */ 164 if (count < (end_index - start_index + 1)) { 165 lru_add_drain_all(); 166 invalidate_mapping_pages(mapping, start_index, 167 end_index); 168 } 169 } 170 break; 171 default: 172 ret = -EINVAL; 173 } 174 out: 175 fdput(f); 176 return ret; 177 } 178 179 #ifdef __ARCH_WANT_SYS_FADVISE64 180 181 SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice) 182 { 183 return sys_fadvise64_64(fd, offset, len, advice); 184 } 185 186 #endif 187