1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* kiocb-using read/write 3 * 4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #include <linux/mount.h> 9 #include <linux/slab.h> 10 #include <linux/file.h> 11 #include <linux/uio.h> 12 #include <linux/falloc.h> 13 #include <linux/sched/mm.h> 14 #include <trace/events/fscache.h> 15 #include "internal.h" 16 17 struct cachefiles_kiocb { 18 struct kiocb iocb; 19 refcount_t ki_refcnt; 20 loff_t start; 21 union { 22 size_t skipped; 23 size_t len; 24 }; 25 struct cachefiles_object *object; 26 netfs_io_terminated_t term_func; 27 void *term_func_priv; 28 bool was_async; 29 unsigned int inval_counter; /* Copy of cookie->inval_counter */ 30 u64 b_writing; 31 }; 32 33 static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki) 34 { 35 if (refcount_dec_and_test(&ki->ki_refcnt)) { 36 cachefiles_put_object(ki->object, cachefiles_obj_put_ioreq); 37 fput(ki->iocb.ki_filp); 38 kfree(ki); 39 } 40 } 41 42 /* 43 * Handle completion of a read from the cache. 44 */ 45 static void cachefiles_read_complete(struct kiocb *iocb, long ret) 46 { 47 struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb); 48 struct inode *inode = file_inode(ki->iocb.ki_filp); 49 50 _enter("%ld", ret); 51 52 if (ret < 0) 53 trace_cachefiles_io_error(ki->object, inode, ret, 54 cachefiles_trace_read_error); 55 56 if (ki->term_func) { 57 if (ret >= 0) { 58 if (ki->object->cookie->inval_counter == ki->inval_counter) 59 ki->skipped += ret; 60 else 61 ret = -ESTALE; 62 } 63 64 ki->term_func(ki->term_func_priv, ret, ki->was_async); 65 } 66 67 cachefiles_put_kiocb(ki); 68 } 69 70 /* 71 * Initiate a read from the cache. 72 */ 73 static int cachefiles_read(struct netfs_cache_resources *cres, 74 loff_t start_pos, 75 struct iov_iter *iter, 76 enum netfs_read_from_hole read_hole, 77 netfs_io_terminated_t term_func, 78 void *term_func_priv) 79 { 80 struct cachefiles_object *object; 81 struct cachefiles_kiocb *ki; 82 struct file *file; 83 unsigned int old_nofs; 84 ssize_t ret = -ENOBUFS; 85 size_t len = iov_iter_count(iter), skipped = 0; 86 87 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ)) 88 goto presubmission_error; 89 90 fscache_count_read(); 91 object = cachefiles_cres_object(cres); 92 file = cachefiles_cres_file(cres); 93 94 _enter("%pD,%li,%llx,%zx/%llx", 95 file, file_inode(file)->i_ino, start_pos, len, 96 i_size_read(file_inode(file))); 97 98 /* If the caller asked us to seek for data before doing the read, then 99 * we should do that now. If we find a gap, we fill it with zeros. 100 */ 101 if (read_hole != NETFS_READ_HOLE_IGNORE) { 102 loff_t off = start_pos, off2; 103 104 off2 = cachefiles_inject_read_error(); 105 if (off2 == 0) 106 off2 = vfs_llseek(file, off, SEEK_DATA); 107 if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) { 108 skipped = 0; 109 ret = off2; 110 goto presubmission_error; 111 } 112 113 if (off2 == -ENXIO || off2 >= start_pos + len) { 114 /* The region is beyond the EOF or there's no more data 115 * in the region, so clear the rest of the buffer and 116 * return success. 117 */ 118 ret = -ENODATA; 119 if (read_hole == NETFS_READ_HOLE_FAIL) 120 goto presubmission_error; 121 122 iov_iter_zero(len, iter); 123 skipped = len; 124 ret = 0; 125 goto presubmission_error; 126 } 127 128 skipped = off2 - off; 129 iov_iter_zero(skipped, iter); 130 } 131 132 ret = -ENOMEM; 133 ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL); 134 if (!ki) 135 goto presubmission_error; 136 137 refcount_set(&ki->ki_refcnt, 2); 138 ki->iocb.ki_filp = file; 139 ki->iocb.ki_pos = start_pos + skipped; 140 ki->iocb.ki_flags = IOCB_DIRECT; 141 ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file)); 142 ki->iocb.ki_ioprio = get_current_ioprio(); 143 ki->skipped = skipped; 144 ki->object = object; 145 ki->inval_counter = cres->inval_counter; 146 ki->term_func = term_func; 147 ki->term_func_priv = term_func_priv; 148 ki->was_async = true; 149 150 if (ki->term_func) 151 ki->iocb.ki_complete = cachefiles_read_complete; 152 153 get_file(ki->iocb.ki_filp); 154 cachefiles_grab_object(object, cachefiles_obj_get_ioreq); 155 156 trace_cachefiles_read(object, file_inode(file), ki->iocb.ki_pos, len - skipped); 157 old_nofs = memalloc_nofs_save(); 158 ret = cachefiles_inject_read_error(); 159 if (ret == 0) 160 ret = vfs_iocb_iter_read(file, &ki->iocb, iter); 161 memalloc_nofs_restore(old_nofs); 162 switch (ret) { 163 case -EIOCBQUEUED: 164 goto in_progress; 165 166 case -ERESTARTSYS: 167 case -ERESTARTNOINTR: 168 case -ERESTARTNOHAND: 169 case -ERESTART_RESTARTBLOCK: 170 /* There's no easy way to restart the syscall since other AIO's 171 * may be already running. Just fail this IO with EINTR. 172 */ 173 ret = -EINTR; 174 fallthrough; 175 default: 176 ki->was_async = false; 177 cachefiles_read_complete(&ki->iocb, ret); 178 if (ret > 0) 179 ret = 0; 180 break; 181 } 182 183 in_progress: 184 cachefiles_put_kiocb(ki); 185 _leave(" = %zd", ret); 186 return ret; 187 188 presubmission_error: 189 if (term_func) 190 term_func(term_func_priv, ret < 0 ? ret : skipped, false); 191 return ret; 192 } 193 194 /* 195 * Handle completion of a write to the cache. 196 */ 197 static void cachefiles_write_complete(struct kiocb *iocb, long ret) 198 { 199 struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb); 200 struct cachefiles_object *object = ki->object; 201 struct inode *inode = file_inode(ki->iocb.ki_filp); 202 203 _enter("%ld", ret); 204 205 /* Tell lockdep we inherited freeze protection from submission thread */ 206 __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); 207 __sb_end_write(inode->i_sb, SB_FREEZE_WRITE); 208 209 if (ret < 0) 210 trace_cachefiles_io_error(object, inode, ret, 211 cachefiles_trace_write_error); 212 213 atomic_long_sub(ki->b_writing, &object->volume->cache->b_writing); 214 set_bit(FSCACHE_COOKIE_HAVE_DATA, &object->cookie->flags); 215 if (ki->term_func) 216 ki->term_func(ki->term_func_priv, ret, ki->was_async); 217 cachefiles_put_kiocb(ki); 218 } 219 220 /* 221 * Initiate a write to the cache. 222 */ 223 static int cachefiles_write(struct netfs_cache_resources *cres, 224 loff_t start_pos, 225 struct iov_iter *iter, 226 netfs_io_terminated_t term_func, 227 void *term_func_priv) 228 { 229 struct cachefiles_object *object; 230 struct cachefiles_cache *cache; 231 struct cachefiles_kiocb *ki; 232 struct inode *inode; 233 struct file *file; 234 unsigned int old_nofs; 235 ssize_t ret = -ENOBUFS; 236 size_t len = iov_iter_count(iter); 237 238 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) 239 goto presubmission_error; 240 fscache_count_write(); 241 object = cachefiles_cres_object(cres); 242 cache = object->volume->cache; 243 file = cachefiles_cres_file(cres); 244 245 _enter("%pD,%li,%llx,%zx/%llx", 246 file, file_inode(file)->i_ino, start_pos, len, 247 i_size_read(file_inode(file))); 248 249 ret = -ENOMEM; 250 ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL); 251 if (!ki) 252 goto presubmission_error; 253 254 refcount_set(&ki->ki_refcnt, 2); 255 ki->iocb.ki_filp = file; 256 ki->iocb.ki_pos = start_pos; 257 ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE; 258 ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file)); 259 ki->iocb.ki_ioprio = get_current_ioprio(); 260 ki->object = object; 261 ki->inval_counter = cres->inval_counter; 262 ki->start = start_pos; 263 ki->len = len; 264 ki->term_func = term_func; 265 ki->term_func_priv = term_func_priv; 266 ki->was_async = true; 267 ki->b_writing = (len + (1 << cache->bshift)) >> cache->bshift; 268 269 if (ki->term_func) 270 ki->iocb.ki_complete = cachefiles_write_complete; 271 atomic_long_add(ki->b_writing, &cache->b_writing); 272 273 /* Open-code file_start_write here to grab freeze protection, which 274 * will be released by another thread in aio_complete_rw(). Fool 275 * lockdep by telling it the lock got released so that it doesn't 276 * complain about the held lock when we return to userspace. 277 */ 278 inode = file_inode(file); 279 __sb_start_write(inode->i_sb, SB_FREEZE_WRITE); 280 __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE); 281 282 get_file(ki->iocb.ki_filp); 283 cachefiles_grab_object(object, cachefiles_obj_get_ioreq); 284 285 trace_cachefiles_write(object, inode, ki->iocb.ki_pos, len); 286 old_nofs = memalloc_nofs_save(); 287 ret = cachefiles_inject_write_error(); 288 if (ret == 0) 289 ret = vfs_iocb_iter_write(file, &ki->iocb, iter); 290 memalloc_nofs_restore(old_nofs); 291 switch (ret) { 292 case -EIOCBQUEUED: 293 goto in_progress; 294 295 case -ERESTARTSYS: 296 case -ERESTARTNOINTR: 297 case -ERESTARTNOHAND: 298 case -ERESTART_RESTARTBLOCK: 299 /* There's no easy way to restart the syscall since other AIO's 300 * may be already running. Just fail this IO with EINTR. 301 */ 302 ret = -EINTR; 303 fallthrough; 304 default: 305 ki->was_async = false; 306 cachefiles_write_complete(&ki->iocb, ret); 307 if (ret > 0) 308 ret = 0; 309 break; 310 } 311 312 in_progress: 313 cachefiles_put_kiocb(ki); 314 _leave(" = %zd", ret); 315 return ret; 316 317 presubmission_error: 318 if (term_func) 319 term_func(term_func_priv, ret, false); 320 return ret; 321 } 322 323 /* 324 * Prepare a read operation, shortening it to a cached/uncached 325 * boundary as appropriate. 326 */ 327 static enum netfs_read_source cachefiles_prepare_read(struct netfs_read_subrequest *subreq, 328 loff_t i_size) 329 { 330 enum cachefiles_prepare_read_trace why; 331 struct netfs_read_request *rreq = subreq->rreq; 332 struct netfs_cache_resources *cres = &rreq->cache_resources; 333 struct cachefiles_object *object; 334 struct cachefiles_cache *cache; 335 struct fscache_cookie *cookie = fscache_cres_cookie(cres); 336 const struct cred *saved_cred; 337 struct file *file = cachefiles_cres_file(cres); 338 enum netfs_read_source ret = NETFS_DOWNLOAD_FROM_SERVER; 339 loff_t off, to; 340 ino_t ino = file ? file_inode(file)->i_ino : 0; 341 342 _enter("%zx @%llx/%llx", subreq->len, subreq->start, i_size); 343 344 if (subreq->start >= i_size) { 345 ret = NETFS_FILL_WITH_ZEROES; 346 why = cachefiles_trace_read_after_eof; 347 goto out_no_object; 348 } 349 350 if (test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) { 351 __set_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags); 352 why = cachefiles_trace_read_no_data; 353 goto out_no_object; 354 } 355 356 /* The object and the file may be being created in the background. */ 357 if (!file) { 358 why = cachefiles_trace_read_no_file; 359 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ)) 360 goto out_no_object; 361 file = cachefiles_cres_file(cres); 362 if (!file) 363 goto out_no_object; 364 ino = file_inode(file)->i_ino; 365 } 366 367 object = cachefiles_cres_object(cres); 368 cache = object->volume->cache; 369 cachefiles_begin_secure(cache, &saved_cred); 370 371 off = cachefiles_inject_read_error(); 372 if (off == 0) 373 off = vfs_llseek(file, subreq->start, SEEK_DATA); 374 if (off < 0 && off >= (loff_t)-MAX_ERRNO) { 375 if (off == (loff_t)-ENXIO) { 376 why = cachefiles_trace_read_seek_nxio; 377 goto download_and_store; 378 } 379 trace_cachefiles_io_error(object, file_inode(file), off, 380 cachefiles_trace_seek_error); 381 why = cachefiles_trace_read_seek_error; 382 goto out; 383 } 384 385 if (off >= subreq->start + subreq->len) { 386 why = cachefiles_trace_read_found_hole; 387 goto download_and_store; 388 } 389 390 if (off > subreq->start) { 391 off = round_up(off, cache->bsize); 392 subreq->len = off - subreq->start; 393 why = cachefiles_trace_read_found_part; 394 goto download_and_store; 395 } 396 397 to = cachefiles_inject_read_error(); 398 if (to == 0) 399 to = vfs_llseek(file, subreq->start, SEEK_HOLE); 400 if (to < 0 && to >= (loff_t)-MAX_ERRNO) { 401 trace_cachefiles_io_error(object, file_inode(file), to, 402 cachefiles_trace_seek_error); 403 why = cachefiles_trace_read_seek_error; 404 goto out; 405 } 406 407 if (to < subreq->start + subreq->len) { 408 if (subreq->start + subreq->len >= i_size) 409 to = round_up(to, cache->bsize); 410 else 411 to = round_down(to, cache->bsize); 412 subreq->len = to - subreq->start; 413 } 414 415 why = cachefiles_trace_read_have_data; 416 ret = NETFS_READ_FROM_CACHE; 417 goto out; 418 419 download_and_store: 420 __set_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags); 421 out: 422 cachefiles_end_secure(cache, saved_cred); 423 out_no_object: 424 trace_cachefiles_prep_read(subreq, ret, why, ino); 425 return ret; 426 } 427 428 /* 429 * Prepare for a write to occur. 430 */ 431 static int __cachefiles_prepare_write(struct netfs_cache_resources *cres, 432 loff_t *_start, size_t *_len, loff_t i_size, 433 bool no_space_allocated_yet) 434 { 435 struct cachefiles_object *object = cachefiles_cres_object(cres); 436 struct cachefiles_cache *cache = object->volume->cache; 437 struct file *file = cachefiles_cres_file(cres); 438 loff_t start = *_start, pos; 439 size_t len = *_len, down; 440 int ret; 441 442 /* Round to DIO size */ 443 down = start - round_down(start, PAGE_SIZE); 444 *_start = start - down; 445 *_len = round_up(down + len, PAGE_SIZE); 446 447 /* We need to work out whether there's sufficient disk space to perform 448 * the write - but we can skip that check if we have space already 449 * allocated. 450 */ 451 if (no_space_allocated_yet) 452 goto check_space; 453 454 pos = cachefiles_inject_read_error(); 455 if (pos == 0) 456 pos = vfs_llseek(file, *_start, SEEK_DATA); 457 if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) { 458 if (pos == -ENXIO) 459 goto check_space; /* Unallocated tail */ 460 trace_cachefiles_io_error(object, file_inode(file), pos, 461 cachefiles_trace_seek_error); 462 return pos; 463 } 464 if ((u64)pos >= (u64)*_start + *_len) 465 goto check_space; /* Unallocated region */ 466 467 /* We have a block that's at least partially filled - if we're low on 468 * space, we need to see if it's fully allocated. If it's not, we may 469 * want to cull it. 470 */ 471 if (cachefiles_has_space(cache, 0, *_len / PAGE_SIZE, 472 cachefiles_has_space_check) == 0) 473 return 0; /* Enough space to simply overwrite the whole block */ 474 475 pos = cachefiles_inject_read_error(); 476 if (pos == 0) 477 pos = vfs_llseek(file, *_start, SEEK_HOLE); 478 if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) { 479 trace_cachefiles_io_error(object, file_inode(file), pos, 480 cachefiles_trace_seek_error); 481 return pos; 482 } 483 if ((u64)pos >= (u64)*_start + *_len) 484 return 0; /* Fully allocated */ 485 486 /* Partially allocated, but insufficient space: cull. */ 487 fscache_count_no_write_space(); 488 ret = cachefiles_inject_remove_error(); 489 if (ret == 0) 490 ret = vfs_fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 491 *_start, *_len); 492 if (ret < 0) { 493 trace_cachefiles_io_error(object, file_inode(file), ret, 494 cachefiles_trace_fallocate_error); 495 cachefiles_io_error_obj(object, 496 "CacheFiles: fallocate failed (%d)\n", ret); 497 ret = -EIO; 498 } 499 500 return ret; 501 502 check_space: 503 return cachefiles_has_space(cache, 0, *_len / PAGE_SIZE, 504 cachefiles_has_space_for_write); 505 } 506 507 static int cachefiles_prepare_write(struct netfs_cache_resources *cres, 508 loff_t *_start, size_t *_len, loff_t i_size, 509 bool no_space_allocated_yet) 510 { 511 struct cachefiles_object *object = cachefiles_cres_object(cres); 512 struct cachefiles_cache *cache = object->volume->cache; 513 const struct cred *saved_cred; 514 int ret; 515 516 if (!cachefiles_cres_file(cres)) { 517 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) 518 return -ENOBUFS; 519 if (!cachefiles_cres_file(cres)) 520 return -ENOBUFS; 521 } 522 523 cachefiles_begin_secure(cache, &saved_cred); 524 ret = __cachefiles_prepare_write(cres, _start, _len, i_size, 525 no_space_allocated_yet); 526 cachefiles_end_secure(cache, saved_cred); 527 return ret; 528 } 529 530 /* 531 * Clean up an operation. 532 */ 533 static void cachefiles_end_operation(struct netfs_cache_resources *cres) 534 { 535 struct file *file = cachefiles_cres_file(cres); 536 537 if (file) 538 fput(file); 539 fscache_end_cookie_access(fscache_cres_cookie(cres), fscache_access_io_end); 540 } 541 542 static const struct netfs_cache_ops cachefiles_netfs_cache_ops = { 543 .end_operation = cachefiles_end_operation, 544 .read = cachefiles_read, 545 .write = cachefiles_write, 546 .prepare_read = cachefiles_prepare_read, 547 .prepare_write = cachefiles_prepare_write, 548 }; 549 550 /* 551 * Open the cache file when beginning a cache operation. 552 */ 553 bool cachefiles_begin_operation(struct netfs_cache_resources *cres, 554 enum fscache_want_state want_state) 555 { 556 struct cachefiles_object *object = cachefiles_cres_object(cres); 557 558 if (!cachefiles_cres_file(cres)) { 559 cres->ops = &cachefiles_netfs_cache_ops; 560 if (object->file) { 561 spin_lock(&object->lock); 562 if (!cres->cache_priv2 && object->file) 563 cres->cache_priv2 = get_file(object->file); 564 spin_unlock(&object->lock); 565 } 566 } 567 568 if (!cachefiles_cres_file(cres) && want_state != FSCACHE_WANT_PARAMS) { 569 pr_err("failed to get cres->file\n"); 570 return false; 571 } 572 573 return true; 574 } 575