1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Network filesystem high-level buffered read support. 3 * 4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #include <linux/export.h> 9 #include <linux/task_io_accounting_ops.h> 10 #include "internal.h" 11 12 /* 13 * Unlock the folios in a read operation. We need to set PG_fscache on any 14 * folios we're going to write back before we unlock them. 15 */ 16 void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) 17 { 18 struct netfs_io_subrequest *subreq; 19 struct folio *folio; 20 unsigned int iopos, account = 0; 21 pgoff_t start_page = rreq->start / PAGE_SIZE; 22 pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; 23 bool subreq_failed = false; 24 25 XA_STATE(xas, &rreq->mapping->i_pages, start_page); 26 27 if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) { 28 __clear_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags); 29 list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { 30 __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); 31 } 32 } 33 34 /* Walk through the pagecache and the I/O request lists simultaneously. 35 * We may have a mixture of cached and uncached sections and we only 36 * really want to write out the uncached sections. This is slightly 37 * complicated by the possibility that we might have huge pages with a 38 * mixture inside. 39 */ 40 subreq = list_first_entry(&rreq->subrequests, 41 struct netfs_io_subrequest, rreq_link); 42 iopos = 0; 43 subreq_failed = (subreq->error < 0); 44 45 trace_netfs_rreq(rreq, netfs_rreq_trace_unlock); 46 47 rcu_read_lock(); 48 xas_for_each(&xas, folio, last_page) { 49 unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE; 50 unsigned int pgend = pgpos + folio_size(folio); 51 bool pg_failed = false; 52 53 for (;;) { 54 if (!subreq) { 55 pg_failed = true; 56 break; 57 } 58 if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) 59 folio_start_fscache(folio); 60 pg_failed |= subreq_failed; 61 if (pgend < iopos + subreq->len) 62 break; 63 64 account += subreq->transferred; 65 iopos += subreq->len; 66 if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { 67 subreq = list_next_entry(subreq, rreq_link); 68 subreq_failed = (subreq->error < 0); 69 } else { 70 subreq = NULL; 71 subreq_failed = false; 72 } 73 if (pgend == iopos) 74 break; 75 } 76 77 if (!pg_failed) { 78 flush_dcache_folio(folio); 79 folio_mark_uptodate(folio); 80 } 81 82 if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { 83 if (folio_index(folio) == rreq->no_unlock_folio && 84 test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) 85 _debug("no unlock"); 86 else 87 folio_unlock(folio); 88 } 89 } 90 rcu_read_unlock(); 91 92 task_io_account_read(account); 93 if (rreq->netfs_ops->done) 94 rreq->netfs_ops->done(rreq); 95 } 96 97 static void netfs_cache_expand_readahead(struct netfs_io_request *rreq, 98 loff_t *_start, size_t *_len, loff_t i_size) 99 { 100 struct netfs_cache_resources *cres = &rreq->cache_resources; 101 102 if (cres->ops && cres->ops->expand_readahead) 103 cres->ops->expand_readahead(cres, _start, _len, i_size); 104 } 105 106 static void netfs_rreq_expand(struct netfs_io_request *rreq, 107 struct readahead_control *ractl) 108 { 109 /* Give the cache a chance to change the request parameters. The 110 * resultant request must contain the original region. 111 */ 112 netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size); 113 114 /* Give the netfs a chance to change the request parameters. The 115 * resultant request must contain the original region. 116 */ 117 if (rreq->netfs_ops->expand_readahead) 118 rreq->netfs_ops->expand_readahead(rreq); 119 120 /* Expand the request if the cache wants it to start earlier. Note 121 * that the expansion may get further extended if the VM wishes to 122 * insert THPs and the preferred start and/or end wind up in the middle 123 * of THPs. 124 * 125 * If this is the case, however, the THP size should be an integer 126 * multiple of the cache granule size, so we get a whole number of 127 * granules to deal with. 128 */ 129 if (rreq->start != readahead_pos(ractl) || 130 rreq->len != readahead_length(ractl)) { 131 readahead_expand(ractl, rreq->start, rreq->len); 132 rreq->start = readahead_pos(ractl); 133 rreq->len = readahead_length(ractl); 134 135 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), 136 netfs_read_trace_expanded); 137 } 138 } 139 140 /** 141 * netfs_readahead - Helper to manage a read request 142 * @ractl: The description of the readahead request 143 * 144 * Fulfil a readahead request by drawing data from the cache if possible, or 145 * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O 146 * requests from different sources will get munged together. If necessary, the 147 * readahead window can be expanded in either direction to a more convenient 148 * alighment for RPC efficiency or to make storage in the cache feasible. 149 * 150 * The calling netfs must initialise a netfs context contiguous to the vfs 151 * inode before calling this. 152 * 153 * This is usable whether or not caching is enabled. 154 */ 155 void netfs_readahead(struct readahead_control *ractl) 156 { 157 struct netfs_io_request *rreq; 158 struct netfs_inode *ctx = netfs_inode(ractl->mapping->host); 159 int ret; 160 161 _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); 162 163 if (readahead_count(ractl) == 0) 164 return; 165 166 rreq = netfs_alloc_request(ractl->mapping, ractl->file, 167 readahead_pos(ractl), 168 readahead_length(ractl), 169 NETFS_READAHEAD); 170 if (IS_ERR(rreq)) 171 return; 172 173 if (ctx->ops->begin_cache_operation) { 174 ret = ctx->ops->begin_cache_operation(rreq); 175 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 176 goto cleanup_free; 177 } 178 179 netfs_stat(&netfs_n_rh_readahead); 180 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), 181 netfs_read_trace_readahead); 182 183 netfs_rreq_expand(rreq, ractl); 184 185 /* Drop the refs on the folios here rather than in the cache or 186 * filesystem. The locks will be dropped in netfs_rreq_unlock(). 187 */ 188 while (readahead_folio(ractl)) 189 ; 190 191 netfs_begin_read(rreq, false); 192 return; 193 194 cleanup_free: 195 netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); 196 return; 197 } 198 EXPORT_SYMBOL(netfs_readahead); 199 200 /** 201 * netfs_read_folio - Helper to manage a read_folio request 202 * @file: The file to read from 203 * @folio: The folio to read 204 * 205 * Fulfil a read_folio request by drawing data from the cache if 206 * possible, or the netfs if not. Space beyond the EOF is zero-filled. 207 * Multiple I/O requests from different sources will get munged together. 208 * 209 * The calling netfs must initialise a netfs context contiguous to the vfs 210 * inode before calling this. 211 * 212 * This is usable whether or not caching is enabled. 213 */ 214 int netfs_read_folio(struct file *file, struct folio *folio) 215 { 216 struct address_space *mapping = folio_file_mapping(folio); 217 struct netfs_io_request *rreq; 218 struct netfs_inode *ctx = netfs_inode(mapping->host); 219 int ret; 220 221 _enter("%lx", folio_index(folio)); 222 223 rreq = netfs_alloc_request(mapping, file, 224 folio_file_pos(folio), folio_size(folio), 225 NETFS_READPAGE); 226 if (IS_ERR(rreq)) { 227 ret = PTR_ERR(rreq); 228 goto alloc_error; 229 } 230 231 if (ctx->ops->begin_cache_operation) { 232 ret = ctx->ops->begin_cache_operation(rreq); 233 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 234 goto discard; 235 } 236 237 netfs_stat(&netfs_n_rh_readpage); 238 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage); 239 return netfs_begin_read(rreq, true); 240 241 discard: 242 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); 243 alloc_error: 244 folio_unlock(folio); 245 return ret; 246 } 247 EXPORT_SYMBOL(netfs_read_folio); 248 249 /* 250 * Prepare a folio for writing without reading first 251 * @folio: The folio being prepared 252 * @pos: starting position for the write 253 * @len: length of write 254 * @always_fill: T if the folio should always be completely filled/cleared 255 * 256 * In some cases, write_begin doesn't need to read at all: 257 * - full folio write 258 * - write that lies in a folio that is completely beyond EOF 259 * - write that covers the folio from start to EOF or beyond it 260 * 261 * If any of these criteria are met, then zero out the unwritten parts 262 * of the folio and return true. Otherwise, return false. 263 */ 264 static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len, 265 bool always_fill) 266 { 267 struct inode *inode = folio_inode(folio); 268 loff_t i_size = i_size_read(inode); 269 size_t offset = offset_in_folio(folio, pos); 270 size_t plen = folio_size(folio); 271 272 if (unlikely(always_fill)) { 273 if (pos - offset + len <= i_size) 274 return false; /* Page entirely before EOF */ 275 zero_user_segment(&folio->page, 0, plen); 276 folio_mark_uptodate(folio); 277 return true; 278 } 279 280 /* Full folio write */ 281 if (offset == 0 && len >= plen) 282 return true; 283 284 /* Page entirely beyond the end of the file */ 285 if (pos - offset >= i_size) 286 goto zero_out; 287 288 /* Write that covers from the start of the folio to EOF or beyond */ 289 if (offset == 0 && (pos + len) >= i_size) 290 goto zero_out; 291 292 return false; 293 zero_out: 294 zero_user_segments(&folio->page, 0, offset, offset + len, plen); 295 return true; 296 } 297 298 /** 299 * netfs_write_begin - Helper to prepare for writing 300 * @file: The file to read from 301 * @mapping: The mapping to read from 302 * @pos: File position at which the write will begin 303 * @len: The length of the write (may extend beyond the end of the folio chosen) 304 * @_folio: Where to put the resultant folio 305 * @_fsdata: Place for the netfs to store a cookie 306 * 307 * Pre-read data for a write-begin request by drawing data from the cache if 308 * possible, or the netfs if not. Space beyond the EOF is zero-filled. 309 * Multiple I/O requests from different sources will get munged together. If 310 * necessary, the readahead window can be expanded in either direction to a 311 * more convenient alighment for RPC efficiency or to make storage in the cache 312 * feasible. 313 * 314 * The calling netfs must provide a table of operations, only one of which, 315 * issue_op, is mandatory. 316 * 317 * The check_write_begin() operation can be provided to check for and flush 318 * conflicting writes once the folio is grabbed and locked. It is passed a 319 * pointer to the fsdata cookie that gets returned to the VM to be passed to 320 * write_end. It is permitted to sleep. It should return 0 if the request 321 * should go ahead; unlock the folio and return -EAGAIN to cause the folio to 322 * be regot; or return an error. 323 * 324 * The calling netfs must initialise a netfs context contiguous to the vfs 325 * inode before calling this. 326 * 327 * This is usable whether or not caching is enabled. 328 */ 329 int netfs_write_begin(struct file *file, struct address_space *mapping, 330 loff_t pos, unsigned int len, struct folio **_folio, 331 void **_fsdata) 332 { 333 struct netfs_io_request *rreq; 334 struct netfs_inode *ctx = netfs_inode(file_inode(file )); 335 struct folio *folio; 336 unsigned int fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE; 337 pgoff_t index = pos >> PAGE_SHIFT; 338 int ret; 339 340 DEFINE_READAHEAD(ractl, file, NULL, mapping, index); 341 342 retry: 343 folio = __filemap_get_folio(mapping, index, fgp_flags, 344 mapping_gfp_mask(mapping)); 345 if (!folio) 346 return -ENOMEM; 347 348 if (ctx->ops->check_write_begin) { 349 /* Allow the netfs (eg. ceph) to flush conflicts. */ 350 ret = ctx->ops->check_write_begin(file, pos, len, folio, _fsdata); 351 if (ret < 0) { 352 trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin); 353 if (ret == -EAGAIN) 354 goto retry; 355 goto error; 356 } 357 } 358 359 if (folio_test_uptodate(folio)) 360 goto have_folio; 361 362 /* If the page is beyond the EOF, we want to clear it - unless it's 363 * within the cache granule containing the EOF, in which case we need 364 * to preload the granule. 365 */ 366 if (!netfs_is_cache_enabled(ctx) && 367 netfs_skip_folio_read(folio, pos, len, false)) { 368 netfs_stat(&netfs_n_rh_write_zskip); 369 goto have_folio_no_wait; 370 } 371 372 rreq = netfs_alloc_request(mapping, file, 373 folio_file_pos(folio), folio_size(folio), 374 NETFS_READ_FOR_WRITE); 375 if (IS_ERR(rreq)) { 376 ret = PTR_ERR(rreq); 377 goto error; 378 } 379 rreq->no_unlock_folio = folio_index(folio); 380 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); 381 382 if (ctx->ops->begin_cache_operation) { 383 ret = ctx->ops->begin_cache_operation(rreq); 384 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 385 goto error_put; 386 } 387 388 netfs_stat(&netfs_n_rh_write_begin); 389 trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin); 390 391 /* Expand the request to meet caching requirements and download 392 * preferences. 393 */ 394 ractl._nr_pages = folio_nr_pages(folio); 395 netfs_rreq_expand(rreq, &ractl); 396 397 /* We hold the folio locks, so we can drop the references */ 398 folio_get(folio); 399 while (readahead_folio(&ractl)) 400 ; 401 402 ret = netfs_begin_read(rreq, true); 403 if (ret < 0) 404 goto error; 405 406 have_folio: 407 ret = folio_wait_fscache_killable(folio); 408 if (ret < 0) 409 goto error; 410 have_folio_no_wait: 411 *_folio = folio; 412 _leave(" = 0"); 413 return 0; 414 415 error_put: 416 netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); 417 error: 418 folio_unlock(folio); 419 folio_put(folio); 420 _leave(" = %d", ret); 421 return ret; 422 } 423 EXPORT_SYMBOL(netfs_write_begin); 424