1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Network filesystem high-level buffered read support. 3 * 4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #include <linux/export.h> 9 #include <linux/task_io_accounting_ops.h> 10 #include "internal.h" 11 12 /* 13 * Unlock the folios in a read operation. We need to set PG_fscache on any 14 * folios we're going to write back before we unlock them. 15 */ 16 void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) 17 { 18 struct netfs_io_subrequest *subreq; 19 struct folio *folio; 20 unsigned int iopos, account = 0; 21 pgoff_t start_page = rreq->start / PAGE_SIZE; 22 pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; 23 bool subreq_failed = false; 24 25 XA_STATE(xas, &rreq->mapping->i_pages, start_page); 26 27 if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) { 28 __clear_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags); 29 list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { 30 __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); 31 } 32 } 33 34 /* Walk through the pagecache and the I/O request lists simultaneously. 35 * We may have a mixture of cached and uncached sections and we only 36 * really want to write out the uncached sections. This is slightly 37 * complicated by the possibility that we might have huge pages with a 38 * mixture inside. 39 */ 40 subreq = list_first_entry(&rreq->subrequests, 41 struct netfs_io_subrequest, rreq_link); 42 iopos = 0; 43 subreq_failed = (subreq->error < 0); 44 45 trace_netfs_rreq(rreq, netfs_rreq_trace_unlock); 46 47 rcu_read_lock(); 48 xas_for_each(&xas, folio, last_page) { 49 unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE; 50 unsigned int pgend = pgpos + folio_size(folio); 51 bool pg_failed = false; 52 53 for (;;) { 54 if (!subreq) { 55 pg_failed = true; 56 break; 57 } 58 if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) 59 folio_start_fscache(folio); 60 pg_failed |= subreq_failed; 61 if (pgend < iopos + subreq->len) 62 break; 63 64 account += subreq->transferred; 65 iopos += subreq->len; 66 if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { 67 subreq = list_next_entry(subreq, rreq_link); 68 subreq_failed = (subreq->error < 0); 69 } else { 70 subreq = NULL; 71 subreq_failed = false; 72 } 73 if (pgend == iopos) 74 break; 75 } 76 77 if (!pg_failed) { 78 flush_dcache_folio(folio); 79 folio_mark_uptodate(folio); 80 } 81 82 if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { 83 if (folio_index(folio) == rreq->no_unlock_folio && 84 test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) 85 _debug("no unlock"); 86 else 87 folio_unlock(folio); 88 } 89 } 90 rcu_read_unlock(); 91 92 task_io_account_read(account); 93 if (rreq->netfs_ops->done) 94 rreq->netfs_ops->done(rreq); 95 } 96 97 static void netfs_cache_expand_readahead(struct netfs_io_request *rreq, 98 loff_t *_start, size_t *_len, loff_t i_size) 99 { 100 struct netfs_cache_resources *cres = &rreq->cache_resources; 101 102 if (cres->ops && cres->ops->expand_readahead) 103 cres->ops->expand_readahead(cres, _start, _len, i_size); 104 } 105 106 static void netfs_rreq_expand(struct netfs_io_request *rreq, 107 struct readahead_control *ractl) 108 { 109 /* Give the cache a chance to change the request parameters. The 110 * resultant request must contain the original region. 111 */ 112 netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size); 113 114 /* Give the netfs a chance to change the request parameters. The 115 * resultant request must contain the original region. 116 */ 117 if (rreq->netfs_ops->expand_readahead) 118 rreq->netfs_ops->expand_readahead(rreq); 119 120 /* Expand the request if the cache wants it to start earlier. Note 121 * that the expansion may get further extended if the VM wishes to 122 * insert THPs and the preferred start and/or end wind up in the middle 123 * of THPs. 124 * 125 * If this is the case, however, the THP size should be an integer 126 * multiple of the cache granule size, so we get a whole number of 127 * granules to deal with. 128 */ 129 if (rreq->start != readahead_pos(ractl) || 130 rreq->len != readahead_length(ractl)) { 131 readahead_expand(ractl, rreq->start, rreq->len); 132 rreq->start = readahead_pos(ractl); 133 rreq->len = readahead_length(ractl); 134 135 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), 136 netfs_read_trace_expanded); 137 } 138 } 139 140 /** 141 * netfs_readahead - Helper to manage a read request 142 * @ractl: The description of the readahead request 143 * 144 * Fulfil a readahead request by drawing data from the cache if possible, or 145 * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O 146 * requests from different sources will get munged together. If necessary, the 147 * readahead window can be expanded in either direction to a more convenient 148 * alighment for RPC efficiency or to make storage in the cache feasible. 149 * 150 * The calling netfs must initialise a netfs context contiguous to the vfs 151 * inode before calling this. 152 * 153 * This is usable whether or not caching is enabled. 154 */ 155 void netfs_readahead(struct readahead_control *ractl) 156 { 157 struct netfs_io_request *rreq; 158 struct netfs_i_context *ctx = netfs_i_context(ractl->mapping->host); 159 int ret; 160 161 _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); 162 163 if (readahead_count(ractl) == 0) 164 return; 165 166 rreq = netfs_alloc_request(ractl->mapping, ractl->file, 167 readahead_pos(ractl), 168 readahead_length(ractl), 169 NETFS_READAHEAD); 170 if (IS_ERR(rreq)) 171 return; 172 173 if (ctx->ops->begin_cache_operation) { 174 ret = ctx->ops->begin_cache_operation(rreq); 175 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 176 goto cleanup_free; 177 } 178 179 netfs_stat(&netfs_n_rh_readahead); 180 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), 181 netfs_read_trace_readahead); 182 183 netfs_rreq_expand(rreq, ractl); 184 185 /* Drop the refs on the folios here rather than in the cache or 186 * filesystem. The locks will be dropped in netfs_rreq_unlock(). 187 */ 188 while (readahead_folio(ractl)) 189 ; 190 191 netfs_begin_read(rreq, false); 192 return; 193 194 cleanup_free: 195 netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); 196 return; 197 } 198 EXPORT_SYMBOL(netfs_readahead); 199 200 /** 201 * netfs_readpage - Helper to manage a readpage request 202 * @file: The file to read from 203 * @subpage: A subpage of the folio to read 204 * 205 * Fulfil a readpage request by drawing data from the cache if possible, or the 206 * netfs if not. Space beyond the EOF is zero-filled. Multiple I/O requests 207 * from different sources will get munged together. 208 * 209 * The calling netfs must initialise a netfs context contiguous to the vfs 210 * inode before calling this. 211 * 212 * This is usable whether or not caching is enabled. 213 */ 214 int netfs_readpage(struct file *file, struct page *subpage) 215 { 216 struct folio *folio = page_folio(subpage); 217 struct address_space *mapping = folio_file_mapping(folio); 218 struct netfs_io_request *rreq; 219 struct netfs_i_context *ctx = netfs_i_context(mapping->host); 220 int ret; 221 222 _enter("%lx", folio_index(folio)); 223 224 rreq = netfs_alloc_request(mapping, file, 225 folio_file_pos(folio), folio_size(folio), 226 NETFS_READPAGE); 227 if (IS_ERR(rreq)) { 228 ret = PTR_ERR(rreq); 229 goto alloc_error; 230 } 231 232 if (ctx->ops->begin_cache_operation) { 233 ret = ctx->ops->begin_cache_operation(rreq); 234 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 235 goto discard; 236 } 237 238 netfs_stat(&netfs_n_rh_readpage); 239 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage); 240 return netfs_begin_read(rreq, true); 241 242 discard: 243 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); 244 alloc_error: 245 folio_unlock(folio); 246 return ret; 247 } 248 EXPORT_SYMBOL(netfs_readpage); 249 250 /* 251 * Prepare a folio for writing without reading first 252 * @folio: The folio being prepared 253 * @pos: starting position for the write 254 * @len: length of write 255 * @always_fill: T if the folio should always be completely filled/cleared 256 * 257 * In some cases, write_begin doesn't need to read at all: 258 * - full folio write 259 * - write that lies in a folio that is completely beyond EOF 260 * - write that covers the folio from start to EOF or beyond it 261 * 262 * If any of these criteria are met, then zero out the unwritten parts 263 * of the folio and return true. Otherwise, return false. 264 */ 265 static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len, 266 bool always_fill) 267 { 268 struct inode *inode = folio_inode(folio); 269 loff_t i_size = i_size_read(inode); 270 size_t offset = offset_in_folio(folio, pos); 271 size_t plen = folio_size(folio); 272 273 if (unlikely(always_fill)) { 274 if (pos - offset + len <= i_size) 275 return false; /* Page entirely before EOF */ 276 zero_user_segment(&folio->page, 0, plen); 277 folio_mark_uptodate(folio); 278 return true; 279 } 280 281 /* Full folio write */ 282 if (offset == 0 && len >= plen) 283 return true; 284 285 /* Page entirely beyond the end of the file */ 286 if (pos - offset >= i_size) 287 goto zero_out; 288 289 /* Write that covers from the start of the folio to EOF or beyond */ 290 if (offset == 0 && (pos + len) >= i_size) 291 goto zero_out; 292 293 return false; 294 zero_out: 295 zero_user_segments(&folio->page, 0, offset, offset + len, plen); 296 return true; 297 } 298 299 /** 300 * netfs_write_begin - Helper to prepare for writing 301 * @file: The file to read from 302 * @mapping: The mapping to read from 303 * @pos: File position at which the write will begin 304 * @len: The length of the write (may extend beyond the end of the folio chosen) 305 * @aop_flags: AOP_* flags 306 * @_folio: Where to put the resultant folio 307 * @_fsdata: Place for the netfs to store a cookie 308 * 309 * Pre-read data for a write-begin request by drawing data from the cache if 310 * possible, or the netfs if not. Space beyond the EOF is zero-filled. 311 * Multiple I/O requests from different sources will get munged together. If 312 * necessary, the readahead window can be expanded in either direction to a 313 * more convenient alighment for RPC efficiency or to make storage in the cache 314 * feasible. 315 * 316 * The calling netfs must provide a table of operations, only one of which, 317 * issue_op, is mandatory. 318 * 319 * The check_write_begin() operation can be provided to check for and flush 320 * conflicting writes once the folio is grabbed and locked. It is passed a 321 * pointer to the fsdata cookie that gets returned to the VM to be passed to 322 * write_end. It is permitted to sleep. It should return 0 if the request 323 * should go ahead; unlock the folio and return -EAGAIN to cause the folio to 324 * be regot; or return an error. 325 * 326 * The calling netfs must initialise a netfs context contiguous to the vfs 327 * inode before calling this. 328 * 329 * This is usable whether or not caching is enabled. 330 */ 331 int netfs_write_begin(struct file *file, struct address_space *mapping, 332 loff_t pos, unsigned int len, unsigned int aop_flags, 333 struct folio **_folio, void **_fsdata) 334 { 335 struct netfs_io_request *rreq; 336 struct netfs_i_context *ctx = netfs_i_context(file_inode(file )); 337 struct folio *folio; 338 unsigned int fgp_flags; 339 pgoff_t index = pos >> PAGE_SHIFT; 340 int ret; 341 342 DEFINE_READAHEAD(ractl, file, NULL, mapping, index); 343 344 retry: 345 fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE; 346 if (aop_flags & AOP_FLAG_NOFS) 347 fgp_flags |= FGP_NOFS; 348 folio = __filemap_get_folio(mapping, index, fgp_flags, 349 mapping_gfp_mask(mapping)); 350 if (!folio) 351 return -ENOMEM; 352 353 if (ctx->ops->check_write_begin) { 354 /* Allow the netfs (eg. ceph) to flush conflicts. */ 355 ret = ctx->ops->check_write_begin(file, pos, len, folio, _fsdata); 356 if (ret < 0) { 357 trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin); 358 if (ret == -EAGAIN) 359 goto retry; 360 goto error; 361 } 362 } 363 364 if (folio_test_uptodate(folio)) 365 goto have_folio; 366 367 /* If the page is beyond the EOF, we want to clear it - unless it's 368 * within the cache granule containing the EOF, in which case we need 369 * to preload the granule. 370 */ 371 if (!netfs_is_cache_enabled(ctx) && 372 netfs_skip_folio_read(folio, pos, len, false)) { 373 netfs_stat(&netfs_n_rh_write_zskip); 374 goto have_folio_no_wait; 375 } 376 377 rreq = netfs_alloc_request(mapping, file, 378 folio_file_pos(folio), folio_size(folio), 379 NETFS_READ_FOR_WRITE); 380 if (IS_ERR(rreq)) { 381 ret = PTR_ERR(rreq); 382 goto error; 383 } 384 rreq->no_unlock_folio = folio_index(folio); 385 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); 386 387 if (ctx->ops->begin_cache_operation) { 388 ret = ctx->ops->begin_cache_operation(rreq); 389 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 390 goto error_put; 391 } 392 393 netfs_stat(&netfs_n_rh_write_begin); 394 trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin); 395 396 /* Expand the request to meet caching requirements and download 397 * preferences. 398 */ 399 ractl._nr_pages = folio_nr_pages(folio); 400 netfs_rreq_expand(rreq, &ractl); 401 402 /* We hold the folio locks, so we can drop the references */ 403 folio_get(folio); 404 while (readahead_folio(&ractl)) 405 ; 406 407 ret = netfs_begin_read(rreq, true); 408 if (ret < 0) 409 goto error; 410 411 have_folio: 412 ret = folio_wait_fscache_killable(folio); 413 if (ret < 0) 414 goto error; 415 have_folio_no_wait: 416 *_folio = folio; 417 _leave(" = 0"); 418 return 0; 419 420 error_put: 421 netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); 422 error: 423 folio_unlock(folio); 424 folio_put(folio); 425 _leave(" = %d", ret); 426 return ret; 427 } 428 EXPORT_SYMBOL(netfs_write_begin); 429