1 // SPDX-License-Identifier: LGPL-2.1 2 /* 3 * 4 * vfs operations that deal with files 5 * 6 * Copyright (C) International Business Machines Corp., 2002,2010 7 * Author(s): Steve French (sfrench@us.ibm.com) 8 * Jeremy Allison (jra@samba.org) 9 * 10 */ 11 #include <linux/fs.h> 12 #include <linux/filelock.h> 13 #include <linux/backing-dev.h> 14 #include <linux/stat.h> 15 #include <linux/fcntl.h> 16 #include <linux/pagemap.h> 17 #include <linux/pagevec.h> 18 #include <linux/writeback.h> 19 #include <linux/task_io_accounting_ops.h> 20 #include <linux/delay.h> 21 #include <linux/mount.h> 22 #include <linux/slab.h> 23 #include <linux/swap.h> 24 #include <linux/mm.h> 25 #include <asm/div64.h> 26 #include "cifsfs.h" 27 #include "cifspdu.h" 28 #include "cifsglob.h" 29 #include "cifsproto.h" 30 #include "smb2proto.h" 31 #include "cifs_unicode.h" 32 #include "cifs_debug.h" 33 #include "cifs_fs_sb.h" 34 #include "fscache.h" 35 #include "smbdirect.h" 36 #include "fs_context.h" 37 #include "cifs_ioctl.h" 38 #include "cached_dir.h" 39 40 /* 41 * Remove the dirty flags from a span of pages. 42 */ 43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len) 44 { 45 struct address_space *mapping = inode->i_mapping; 46 struct folio *folio; 47 pgoff_t end; 48 49 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 50 51 rcu_read_lock(); 52 53 end = (start + len - 1) / PAGE_SIZE; 54 xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) { 55 if (xas_retry(&xas, folio)) 56 continue; 57 xas_pause(&xas); 58 rcu_read_unlock(); 59 folio_lock(folio); 60 folio_clear_dirty_for_io(folio); 61 folio_unlock(folio); 62 rcu_read_lock(); 63 } 64 65 rcu_read_unlock(); 66 } 67 68 /* 69 * Completion of write to server. 70 */ 71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len) 72 { 73 struct address_space *mapping = inode->i_mapping; 74 struct folio *folio; 75 pgoff_t end; 76 77 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 78 79 if (!len) 80 return; 81 82 rcu_read_lock(); 83 84 end = (start + len - 1) / PAGE_SIZE; 85 xas_for_each(&xas, folio, end) { 86 if (xas_retry(&xas, folio)) 87 continue; 88 if (!folio_test_writeback(folio)) { 89 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 90 len, start, folio->index, end); 91 continue; 92 } 93 94 folio_detach_private(folio); 95 folio_end_writeback(folio); 96 } 97 98 rcu_read_unlock(); 99 } 100 101 /* 102 * Failure of write to server. 103 */ 104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len) 105 { 106 struct address_space *mapping = inode->i_mapping; 107 struct folio *folio; 108 pgoff_t end; 109 110 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 111 112 if (!len) 113 return; 114 115 rcu_read_lock(); 116 117 end = (start + len - 1) / PAGE_SIZE; 118 xas_for_each(&xas, folio, end) { 119 if (xas_retry(&xas, folio)) 120 continue; 121 if (!folio_test_writeback(folio)) { 122 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 123 len, start, folio->index, end); 124 continue; 125 } 126 127 folio_set_error(folio); 128 folio_end_writeback(folio); 129 } 130 131 rcu_read_unlock(); 132 } 133 134 /* 135 * Redirty pages after a temporary failure. 136 */ 137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len) 138 { 139 struct address_space *mapping = inode->i_mapping; 140 struct folio *folio; 141 pgoff_t end; 142 143 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 144 145 if (!len) 146 return; 147 148 rcu_read_lock(); 149 150 end = (start + len - 1) / PAGE_SIZE; 151 xas_for_each(&xas, folio, end) { 152 if (!folio_test_writeback(folio)) { 153 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 154 len, start, folio->index, end); 155 continue; 156 } 157 158 filemap_dirty_folio(folio->mapping, folio); 159 folio_end_writeback(folio); 160 } 161 162 rcu_read_unlock(); 163 } 164 165 /* 166 * Mark as invalid, all open files on tree connections since they 167 * were closed when session to server was lost. 168 */ 169 void 170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon) 171 { 172 struct cifsFileInfo *open_file = NULL; 173 struct list_head *tmp; 174 struct list_head *tmp1; 175 176 /* only send once per connect */ 177 spin_lock(&tcon->tc_lock); 178 if (tcon->need_reconnect) 179 tcon->status = TID_NEED_RECON; 180 181 if (tcon->status != TID_NEED_RECON) { 182 spin_unlock(&tcon->tc_lock); 183 return; 184 } 185 tcon->status = TID_IN_FILES_INVALIDATE; 186 spin_unlock(&tcon->tc_lock); 187 188 /* list all files open on tree connection and mark them invalid */ 189 spin_lock(&tcon->open_file_lock); 190 list_for_each_safe(tmp, tmp1, &tcon->openFileList) { 191 open_file = list_entry(tmp, struct cifsFileInfo, tlist); 192 open_file->invalidHandle = true; 193 open_file->oplock_break_cancelled = true; 194 } 195 spin_unlock(&tcon->open_file_lock); 196 197 invalidate_all_cached_dirs(tcon); 198 spin_lock(&tcon->tc_lock); 199 if (tcon->status == TID_IN_FILES_INVALIDATE) 200 tcon->status = TID_NEED_TCON; 201 spin_unlock(&tcon->tc_lock); 202 203 /* 204 * BB Add call to invalidate_inodes(sb) for all superblocks mounted 205 * to this tcon. 206 */ 207 } 208 209 static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache) 210 { 211 if ((flags & O_ACCMODE) == O_RDONLY) 212 return GENERIC_READ; 213 else if ((flags & O_ACCMODE) == O_WRONLY) 214 return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE; 215 else if ((flags & O_ACCMODE) == O_RDWR) { 216 /* GENERIC_ALL is too much permission to request 217 can cause unnecessary access denied on create */ 218 /* return GENERIC_ALL; */ 219 return (GENERIC_READ | GENERIC_WRITE); 220 } 221 222 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES | 223 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA | 224 FILE_READ_DATA); 225 } 226 227 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 228 static u32 cifs_posix_convert_flags(unsigned int flags) 229 { 230 u32 posix_flags = 0; 231 232 if ((flags & O_ACCMODE) == O_RDONLY) 233 posix_flags = SMB_O_RDONLY; 234 else if ((flags & O_ACCMODE) == O_WRONLY) 235 posix_flags = SMB_O_WRONLY; 236 else if ((flags & O_ACCMODE) == O_RDWR) 237 posix_flags = SMB_O_RDWR; 238 239 if (flags & O_CREAT) { 240 posix_flags |= SMB_O_CREAT; 241 if (flags & O_EXCL) 242 posix_flags |= SMB_O_EXCL; 243 } else if (flags & O_EXCL) 244 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n", 245 current->comm, current->tgid); 246 247 if (flags & O_TRUNC) 248 posix_flags |= SMB_O_TRUNC; 249 /* be safe and imply O_SYNC for O_DSYNC */ 250 if (flags & O_DSYNC) 251 posix_flags |= SMB_O_SYNC; 252 if (flags & O_DIRECTORY) 253 posix_flags |= SMB_O_DIRECTORY; 254 if (flags & O_NOFOLLOW) 255 posix_flags |= SMB_O_NOFOLLOW; 256 if (flags & O_DIRECT) 257 posix_flags |= SMB_O_DIRECT; 258 259 return posix_flags; 260 } 261 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 262 263 static inline int cifs_get_disposition(unsigned int flags) 264 { 265 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) 266 return FILE_CREATE; 267 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) 268 return FILE_OVERWRITE_IF; 269 else if ((flags & O_CREAT) == O_CREAT) 270 return FILE_OPEN_IF; 271 else if ((flags & O_TRUNC) == O_TRUNC) 272 return FILE_OVERWRITE; 273 else 274 return FILE_OPEN; 275 } 276 277 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 278 int cifs_posix_open(const char *full_path, struct inode **pinode, 279 struct super_block *sb, int mode, unsigned int f_flags, 280 __u32 *poplock, __u16 *pnetfid, unsigned int xid) 281 { 282 int rc; 283 FILE_UNIX_BASIC_INFO *presp_data; 284 __u32 posix_flags = 0; 285 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 286 struct cifs_fattr fattr; 287 struct tcon_link *tlink; 288 struct cifs_tcon *tcon; 289 290 cifs_dbg(FYI, "posix open %s\n", full_path); 291 292 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); 293 if (presp_data == NULL) 294 return -ENOMEM; 295 296 tlink = cifs_sb_tlink(cifs_sb); 297 if (IS_ERR(tlink)) { 298 rc = PTR_ERR(tlink); 299 goto posix_open_ret; 300 } 301 302 tcon = tlink_tcon(tlink); 303 mode &= ~current_umask(); 304 305 posix_flags = cifs_posix_convert_flags(f_flags); 306 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data, 307 poplock, full_path, cifs_sb->local_nls, 308 cifs_remap(cifs_sb)); 309 cifs_put_tlink(tlink); 310 311 if (rc) 312 goto posix_open_ret; 313 314 if (presp_data->Type == cpu_to_le32(-1)) 315 goto posix_open_ret; /* open ok, caller does qpathinfo */ 316 317 if (!pinode) 318 goto posix_open_ret; /* caller does not need info */ 319 320 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb); 321 322 /* get new inode and set it up */ 323 if (*pinode == NULL) { 324 cifs_fill_uniqueid(sb, &fattr); 325 *pinode = cifs_iget(sb, &fattr); 326 if (!*pinode) { 327 rc = -ENOMEM; 328 goto posix_open_ret; 329 } 330 } else { 331 cifs_revalidate_mapping(*pinode); 332 rc = cifs_fattr_to_inode(*pinode, &fattr, false); 333 } 334 335 posix_open_ret: 336 kfree(presp_data); 337 return rc; 338 } 339 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 340 341 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, 342 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock, 343 struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf) 344 { 345 int rc; 346 int desired_access; 347 int disposition; 348 int create_options = CREATE_NOT_DIR; 349 struct TCP_Server_Info *server = tcon->ses->server; 350 struct cifs_open_parms oparms; 351 int rdwr_for_fscache = 0; 352 353 if (!server->ops->open) 354 return -ENOSYS; 355 356 /* If we're caching, we need to be able to fill in around partial writes. */ 357 if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY) 358 rdwr_for_fscache = 1; 359 360 desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache); 361 362 /********************************************************************* 363 * open flag mapping table: 364 * 365 * POSIX Flag CIFS Disposition 366 * ---------- ---------------- 367 * O_CREAT FILE_OPEN_IF 368 * O_CREAT | O_EXCL FILE_CREATE 369 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF 370 * O_TRUNC FILE_OVERWRITE 371 * none of the above FILE_OPEN 372 * 373 * Note that there is not a direct match between disposition 374 * FILE_SUPERSEDE (ie create whether or not file exists although 375 * O_CREAT | O_TRUNC is similar but truncates the existing 376 * file rather than creating a new file as FILE_SUPERSEDE does 377 * (which uses the attributes / metadata passed in on open call) 378 *? 379 *? O_SYNC is a reasonable match to CIFS writethrough flag 380 *? and the read write flags match reasonably. O_LARGEFILE 381 *? is irrelevant because largefile support is always used 382 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY, 383 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation 384 *********************************************************************/ 385 386 disposition = cifs_get_disposition(f_flags); 387 388 /* BB pass O_SYNC flag through on file attributes .. BB */ 389 390 /* O_SYNC also has bit for O_DSYNC so following check picks up either */ 391 if (f_flags & O_SYNC) 392 create_options |= CREATE_WRITE_THROUGH; 393 394 if (f_flags & O_DIRECT) 395 create_options |= CREATE_NO_BUFFER; 396 397 retry_open: 398 oparms = (struct cifs_open_parms) { 399 .tcon = tcon, 400 .cifs_sb = cifs_sb, 401 .desired_access = desired_access, 402 .create_options = cifs_create_options(cifs_sb, create_options), 403 .disposition = disposition, 404 .path = full_path, 405 .fid = fid, 406 }; 407 408 rc = server->ops->open(xid, &oparms, oplock, buf); 409 if (rc) { 410 if (rc == -EACCES && rdwr_for_fscache == 1) { 411 desired_access = cifs_convert_flags(f_flags, 0); 412 rdwr_for_fscache = 2; 413 goto retry_open; 414 } 415 return rc; 416 } 417 if (rdwr_for_fscache == 2) 418 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); 419 420 /* TODO: Add support for calling posix query info but with passing in fid */ 421 if (tcon->unix_ext) 422 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb, 423 xid); 424 else 425 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, 426 xid, fid); 427 428 if (rc) { 429 server->ops->close(xid, tcon, fid); 430 if (rc == -ESTALE) 431 rc = -EOPENSTALE; 432 } 433 434 return rc; 435 } 436 437 static bool 438 cifs_has_mand_locks(struct cifsInodeInfo *cinode) 439 { 440 struct cifs_fid_locks *cur; 441 bool has_locks = false; 442 443 down_read(&cinode->lock_sem); 444 list_for_each_entry(cur, &cinode->llist, llist) { 445 if (!list_empty(&cur->locks)) { 446 has_locks = true; 447 break; 448 } 449 } 450 up_read(&cinode->lock_sem); 451 return has_locks; 452 } 453 454 void 455 cifs_down_write(struct rw_semaphore *sem) 456 { 457 while (!down_write_trylock(sem)) 458 msleep(10); 459 } 460 461 static void cifsFileInfo_put_work(struct work_struct *work); 462 void serverclose_work(struct work_struct *work); 463 464 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, 465 struct tcon_link *tlink, __u32 oplock, 466 const char *symlink_target) 467 { 468 struct dentry *dentry = file_dentry(file); 469 struct inode *inode = d_inode(dentry); 470 struct cifsInodeInfo *cinode = CIFS_I(inode); 471 struct cifsFileInfo *cfile; 472 struct cifs_fid_locks *fdlocks; 473 struct cifs_tcon *tcon = tlink_tcon(tlink); 474 struct TCP_Server_Info *server = tcon->ses->server; 475 476 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); 477 if (cfile == NULL) 478 return cfile; 479 480 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL); 481 if (!fdlocks) { 482 kfree(cfile); 483 return NULL; 484 } 485 486 if (symlink_target) { 487 cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL); 488 if (!cfile->symlink_target) { 489 kfree(fdlocks); 490 kfree(cfile); 491 return NULL; 492 } 493 } 494 495 INIT_LIST_HEAD(&fdlocks->locks); 496 fdlocks->cfile = cfile; 497 cfile->llist = fdlocks; 498 499 cfile->count = 1; 500 cfile->pid = current->tgid; 501 cfile->uid = current_fsuid(); 502 cfile->dentry = dget(dentry); 503 cfile->f_flags = file->f_flags; 504 cfile->invalidHandle = false; 505 cfile->deferred_close_scheduled = false; 506 cfile->tlink = cifs_get_tlink(tlink); 507 INIT_WORK(&cfile->oplock_break, cifs_oplock_break); 508 INIT_WORK(&cfile->put, cifsFileInfo_put_work); 509 INIT_WORK(&cfile->serverclose, serverclose_work); 510 INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close); 511 mutex_init(&cfile->fh_mutex); 512 spin_lock_init(&cfile->file_info_lock); 513 514 cifs_sb_active(inode->i_sb); 515 516 /* 517 * If the server returned a read oplock and we have mandatory brlocks, 518 * set oplock level to None. 519 */ 520 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) { 521 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); 522 oplock = 0; 523 } 524 525 cifs_down_write(&cinode->lock_sem); 526 list_add(&fdlocks->llist, &cinode->llist); 527 up_write(&cinode->lock_sem); 528 529 spin_lock(&tcon->open_file_lock); 530 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock) 531 oplock = fid->pending_open->oplock; 532 list_del(&fid->pending_open->olist); 533 534 fid->purge_cache = false; 535 server->ops->set_fid(cfile, fid, oplock); 536 537 list_add(&cfile->tlist, &tcon->openFileList); 538 atomic_inc(&tcon->num_local_opens); 539 540 /* if readable file instance put first in list*/ 541 spin_lock(&cinode->open_file_lock); 542 if (file->f_mode & FMODE_READ) 543 list_add(&cfile->flist, &cinode->openFileList); 544 else 545 list_add_tail(&cfile->flist, &cinode->openFileList); 546 spin_unlock(&cinode->open_file_lock); 547 spin_unlock(&tcon->open_file_lock); 548 549 if (fid->purge_cache) 550 cifs_zap_mapping(inode); 551 552 file->private_data = cfile; 553 return cfile; 554 } 555 556 struct cifsFileInfo * 557 cifsFileInfo_get(struct cifsFileInfo *cifs_file) 558 { 559 spin_lock(&cifs_file->file_info_lock); 560 cifsFileInfo_get_locked(cifs_file); 561 spin_unlock(&cifs_file->file_info_lock); 562 return cifs_file; 563 } 564 565 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file) 566 { 567 struct inode *inode = d_inode(cifs_file->dentry); 568 struct cifsInodeInfo *cifsi = CIFS_I(inode); 569 struct cifsLockInfo *li, *tmp; 570 struct super_block *sb = inode->i_sb; 571 572 /* 573 * Delete any outstanding lock records. We'll lose them when the file 574 * is closed anyway. 575 */ 576 cifs_down_write(&cifsi->lock_sem); 577 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) { 578 list_del(&li->llist); 579 cifs_del_lock_waiters(li); 580 kfree(li); 581 } 582 list_del(&cifs_file->llist->llist); 583 kfree(cifs_file->llist); 584 up_write(&cifsi->lock_sem); 585 586 cifs_put_tlink(cifs_file->tlink); 587 dput(cifs_file->dentry); 588 cifs_sb_deactive(sb); 589 kfree(cifs_file->symlink_target); 590 kfree(cifs_file); 591 } 592 593 static void cifsFileInfo_put_work(struct work_struct *work) 594 { 595 struct cifsFileInfo *cifs_file = container_of(work, 596 struct cifsFileInfo, put); 597 598 cifsFileInfo_put_final(cifs_file); 599 } 600 601 void serverclose_work(struct work_struct *work) 602 { 603 struct cifsFileInfo *cifs_file = container_of(work, 604 struct cifsFileInfo, serverclose); 605 606 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); 607 608 struct TCP_Server_Info *server = tcon->ses->server; 609 int rc = 0; 610 int retries = 0; 611 int MAX_RETRIES = 4; 612 613 do { 614 if (server->ops->close_getattr) 615 rc = server->ops->close_getattr(0, tcon, cifs_file); 616 else if (server->ops->close) 617 rc = server->ops->close(0, tcon, &cifs_file->fid); 618 619 if (rc == -EBUSY || rc == -EAGAIN) { 620 retries++; 621 msleep(250); 622 } 623 } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES) 624 ); 625 626 if (retries == MAX_RETRIES) 627 pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES); 628 629 if (cifs_file->offload) 630 queue_work(fileinfo_put_wq, &cifs_file->put); 631 else 632 cifsFileInfo_put_final(cifs_file); 633 } 634 635 /** 636 * cifsFileInfo_put - release a reference of file priv data 637 * 638 * Always potentially wait for oplock handler. See _cifsFileInfo_put(). 639 * 640 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file 641 */ 642 void cifsFileInfo_put(struct cifsFileInfo *cifs_file) 643 { 644 _cifsFileInfo_put(cifs_file, true, true); 645 } 646 647 /** 648 * _cifsFileInfo_put - release a reference of file priv data 649 * 650 * This may involve closing the filehandle @cifs_file out on the 651 * server. Must be called without holding tcon->open_file_lock, 652 * cinode->open_file_lock and cifs_file->file_info_lock. 653 * 654 * If @wait_for_oplock_handler is true and we are releasing the last 655 * reference, wait for any running oplock break handler of the file 656 * and cancel any pending one. 657 * 658 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file 659 * @wait_oplock_handler: must be false if called from oplock_break_handler 660 * @offload: not offloaded on close and oplock breaks 661 * 662 */ 663 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, 664 bool wait_oplock_handler, bool offload) 665 { 666 struct inode *inode = d_inode(cifs_file->dentry); 667 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); 668 struct TCP_Server_Info *server = tcon->ses->server; 669 struct cifsInodeInfo *cifsi = CIFS_I(inode); 670 struct super_block *sb = inode->i_sb; 671 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 672 struct cifs_fid fid = {}; 673 struct cifs_pending_open open; 674 bool oplock_break_cancelled; 675 bool serverclose_offloaded = false; 676 677 spin_lock(&tcon->open_file_lock); 678 spin_lock(&cifsi->open_file_lock); 679 spin_lock(&cifs_file->file_info_lock); 680 681 cifs_file->offload = offload; 682 if (--cifs_file->count > 0) { 683 spin_unlock(&cifs_file->file_info_lock); 684 spin_unlock(&cifsi->open_file_lock); 685 spin_unlock(&tcon->open_file_lock); 686 return; 687 } 688 spin_unlock(&cifs_file->file_info_lock); 689 690 if (server->ops->get_lease_key) 691 server->ops->get_lease_key(inode, &fid); 692 693 /* store open in pending opens to make sure we don't miss lease break */ 694 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open); 695 696 /* remove it from the lists */ 697 list_del(&cifs_file->flist); 698 list_del(&cifs_file->tlist); 699 atomic_dec(&tcon->num_local_opens); 700 701 if (list_empty(&cifsi->openFileList)) { 702 cifs_dbg(FYI, "closing last open instance for inode %p\n", 703 d_inode(cifs_file->dentry)); 704 /* 705 * In strict cache mode we need invalidate mapping on the last 706 * close because it may cause a error when we open this file 707 * again and get at least level II oplock. 708 */ 709 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) 710 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags); 711 cifs_set_oplock_level(cifsi, 0); 712 } 713 714 spin_unlock(&cifsi->open_file_lock); 715 spin_unlock(&tcon->open_file_lock); 716 717 oplock_break_cancelled = wait_oplock_handler ? 718 cancel_work_sync(&cifs_file->oplock_break) : false; 719 720 if (!tcon->need_reconnect && !cifs_file->invalidHandle) { 721 struct TCP_Server_Info *server = tcon->ses->server; 722 unsigned int xid; 723 int rc = 0; 724 725 xid = get_xid(); 726 if (server->ops->close_getattr) 727 rc = server->ops->close_getattr(xid, tcon, cifs_file); 728 else if (server->ops->close) 729 rc = server->ops->close(xid, tcon, &cifs_file->fid); 730 _free_xid(xid); 731 732 if (rc == -EBUSY || rc == -EAGAIN) { 733 // Server close failed, hence offloading it as an async op 734 queue_work(serverclose_wq, &cifs_file->serverclose); 735 serverclose_offloaded = true; 736 } 737 } 738 739 if (oplock_break_cancelled) 740 cifs_done_oplock_break(cifsi); 741 742 cifs_del_pending_open(&open); 743 744 // if serverclose has been offloaded to wq (on failure), it will 745 // handle offloading put as well. If serverclose not offloaded, 746 // we need to handle offloading put here. 747 if (!serverclose_offloaded) { 748 if (offload) 749 queue_work(fileinfo_put_wq, &cifs_file->put); 750 else 751 cifsFileInfo_put_final(cifs_file); 752 } 753 } 754 755 int cifs_open(struct inode *inode, struct file *file) 756 757 { 758 int rc = -EACCES; 759 unsigned int xid; 760 __u32 oplock; 761 struct cifs_sb_info *cifs_sb; 762 struct TCP_Server_Info *server; 763 struct cifs_tcon *tcon; 764 struct tcon_link *tlink; 765 struct cifsFileInfo *cfile = NULL; 766 void *page; 767 const char *full_path; 768 bool posix_open_ok = false; 769 struct cifs_fid fid = {}; 770 struct cifs_pending_open open; 771 struct cifs_open_info_data data = {}; 772 773 xid = get_xid(); 774 775 cifs_sb = CIFS_SB(inode->i_sb); 776 if (unlikely(cifs_forced_shutdown(cifs_sb))) { 777 free_xid(xid); 778 return -EIO; 779 } 780 781 tlink = cifs_sb_tlink(cifs_sb); 782 if (IS_ERR(tlink)) { 783 free_xid(xid); 784 return PTR_ERR(tlink); 785 } 786 tcon = tlink_tcon(tlink); 787 server = tcon->ses->server; 788 789 page = alloc_dentry_path(); 790 full_path = build_path_from_dentry(file_dentry(file), page); 791 if (IS_ERR(full_path)) { 792 rc = PTR_ERR(full_path); 793 goto out; 794 } 795 796 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n", 797 inode, file->f_flags, full_path); 798 799 if (file->f_flags & O_DIRECT && 800 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { 801 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) 802 file->f_op = &cifs_file_direct_nobrl_ops; 803 else 804 file->f_op = &cifs_file_direct_ops; 805 } 806 807 /* Get the cached handle as SMB2 close is deferred */ 808 rc = cifs_get_readable_path(tcon, full_path, &cfile); 809 if (rc == 0) { 810 if (file->f_flags == cfile->f_flags) { 811 file->private_data = cfile; 812 spin_lock(&CIFS_I(inode)->deferred_lock); 813 cifs_del_deferred_close(cfile); 814 spin_unlock(&CIFS_I(inode)->deferred_lock); 815 goto use_cache; 816 } else { 817 _cifsFileInfo_put(cfile, true, false); 818 } 819 } 820 821 if (server->oplocks) 822 oplock = REQ_OPLOCK; 823 else 824 oplock = 0; 825 826 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 827 if (!tcon->broken_posix_open && tcon->unix_ext && 828 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & 829 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 830 /* can not refresh inode info since size could be stale */ 831 rc = cifs_posix_open(full_path, &inode, inode->i_sb, 832 cifs_sb->ctx->file_mode /* ignored */, 833 file->f_flags, &oplock, &fid.netfid, xid); 834 if (rc == 0) { 835 cifs_dbg(FYI, "posix open succeeded\n"); 836 posix_open_ok = true; 837 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 838 if (tcon->ses->serverNOS) 839 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n", 840 tcon->ses->ip_addr, 841 tcon->ses->serverNOS); 842 tcon->broken_posix_open = true; 843 } else if ((rc != -EIO) && (rc != -EREMOTE) && 844 (rc != -EOPNOTSUPP)) /* path not found or net err */ 845 goto out; 846 /* 847 * Else fallthrough to retry open the old way on network i/o 848 * or DFS errors. 849 */ 850 } 851 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 852 853 if (server->ops->get_lease_key) 854 server->ops->get_lease_key(inode, &fid); 855 856 cifs_add_pending_open(&fid, tlink, &open); 857 858 if (!posix_open_ok) { 859 if (server->ops->get_lease_key) 860 server->ops->get_lease_key(inode, &fid); 861 862 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid, 863 xid, &data); 864 if (rc) { 865 cifs_del_pending_open(&open); 866 goto out; 867 } 868 } 869 870 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target); 871 if (cfile == NULL) { 872 if (server->ops->close) 873 server->ops->close(xid, tcon, &fid); 874 cifs_del_pending_open(&open); 875 rc = -ENOMEM; 876 goto out; 877 } 878 879 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 880 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) { 881 /* 882 * Time to set mode which we can not set earlier due to 883 * problems creating new read-only files. 884 */ 885 struct cifs_unix_set_info_args args = { 886 .mode = inode->i_mode, 887 .uid = INVALID_UID, /* no change */ 888 .gid = INVALID_GID, /* no change */ 889 .ctime = NO_CHANGE_64, 890 .atime = NO_CHANGE_64, 891 .mtime = NO_CHANGE_64, 892 .device = 0, 893 }; 894 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid, 895 cfile->pid); 896 } 897 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 898 899 use_cache: 900 fscache_use_cookie(cifs_inode_cookie(file_inode(file)), 901 file->f_mode & FMODE_WRITE); 902 if (!(file->f_flags & O_DIRECT)) 903 goto out; 904 if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY) 905 goto out; 906 cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE); 907 908 out: 909 free_dentry_path(page); 910 free_xid(xid); 911 cifs_put_tlink(tlink); 912 cifs_free_open_info(&data); 913 return rc; 914 } 915 916 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 917 static int cifs_push_posix_locks(struct cifsFileInfo *cfile); 918 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 919 920 /* 921 * Try to reacquire byte range locks that were released when session 922 * to server was lost. 923 */ 924 static int 925 cifs_relock_file(struct cifsFileInfo *cfile) 926 { 927 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 928 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 929 int rc = 0; 930 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 931 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 932 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 933 934 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING); 935 if (cinode->can_cache_brlcks) { 936 /* can cache locks - no need to relock */ 937 up_read(&cinode->lock_sem); 938 return rc; 939 } 940 941 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 942 if (cap_unix(tcon->ses) && 943 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 944 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 945 rc = cifs_push_posix_locks(cfile); 946 else 947 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 948 rc = tcon->ses->server->ops->push_mand_locks(cfile); 949 950 up_read(&cinode->lock_sem); 951 return rc; 952 } 953 954 static int 955 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) 956 { 957 int rc = -EACCES; 958 unsigned int xid; 959 __u32 oplock; 960 struct cifs_sb_info *cifs_sb; 961 struct cifs_tcon *tcon; 962 struct TCP_Server_Info *server; 963 struct cifsInodeInfo *cinode; 964 struct inode *inode; 965 void *page; 966 const char *full_path; 967 int desired_access; 968 int disposition = FILE_OPEN; 969 int create_options = CREATE_NOT_DIR; 970 struct cifs_open_parms oparms; 971 int rdwr_for_fscache = 0; 972 973 xid = get_xid(); 974 mutex_lock(&cfile->fh_mutex); 975 if (!cfile->invalidHandle) { 976 mutex_unlock(&cfile->fh_mutex); 977 free_xid(xid); 978 return 0; 979 } 980 981 inode = d_inode(cfile->dentry); 982 cifs_sb = CIFS_SB(inode->i_sb); 983 tcon = tlink_tcon(cfile->tlink); 984 server = tcon->ses->server; 985 986 /* 987 * Can not grab rename sem here because various ops, including those 988 * that already have the rename sem can end up causing writepage to get 989 * called and if the server was down that means we end up here, and we 990 * can never tell if the caller already has the rename_sem. 991 */ 992 page = alloc_dentry_path(); 993 full_path = build_path_from_dentry(cfile->dentry, page); 994 if (IS_ERR(full_path)) { 995 mutex_unlock(&cfile->fh_mutex); 996 free_dentry_path(page); 997 free_xid(xid); 998 return PTR_ERR(full_path); 999 } 1000 1001 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n", 1002 inode, cfile->f_flags, full_path); 1003 1004 if (tcon->ses->server->oplocks) 1005 oplock = REQ_OPLOCK; 1006 else 1007 oplock = 0; 1008 1009 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1010 if (tcon->unix_ext && cap_unix(tcon->ses) && 1011 (CIFS_UNIX_POSIX_PATH_OPS_CAP & 1012 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 1013 /* 1014 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the 1015 * original open. Must mask them off for a reopen. 1016 */ 1017 unsigned int oflags = cfile->f_flags & 1018 ~(O_CREAT | O_EXCL | O_TRUNC); 1019 1020 rc = cifs_posix_open(full_path, NULL, inode->i_sb, 1021 cifs_sb->ctx->file_mode /* ignored */, 1022 oflags, &oplock, &cfile->fid.netfid, xid); 1023 if (rc == 0) { 1024 cifs_dbg(FYI, "posix reopen succeeded\n"); 1025 oparms.reconnect = true; 1026 goto reopen_success; 1027 } 1028 /* 1029 * fallthrough to retry open the old way on errors, especially 1030 * in the reconnect path it is important to retry hard 1031 */ 1032 } 1033 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1034 1035 /* If we're caching, we need to be able to fill in around partial writes. */ 1036 if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY) 1037 rdwr_for_fscache = 1; 1038 1039 desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache); 1040 1041 /* O_SYNC also has bit for O_DSYNC so following check picks up either */ 1042 if (cfile->f_flags & O_SYNC) 1043 create_options |= CREATE_WRITE_THROUGH; 1044 1045 if (cfile->f_flags & O_DIRECT) 1046 create_options |= CREATE_NO_BUFFER; 1047 1048 if (server->ops->get_lease_key) 1049 server->ops->get_lease_key(inode, &cfile->fid); 1050 1051 retry_open: 1052 oparms = (struct cifs_open_parms) { 1053 .tcon = tcon, 1054 .cifs_sb = cifs_sb, 1055 .desired_access = desired_access, 1056 .create_options = cifs_create_options(cifs_sb, create_options), 1057 .disposition = disposition, 1058 .path = full_path, 1059 .fid = &cfile->fid, 1060 .reconnect = true, 1061 }; 1062 1063 /* 1064 * Can not refresh inode by passing in file_info buf to be returned by 1065 * ops->open and then calling get_inode_info with returned buf since 1066 * file might have write behind data that needs to be flushed and server 1067 * version of file size can be stale. If we knew for sure that inode was 1068 * not dirty locally we could do this. 1069 */ 1070 rc = server->ops->open(xid, &oparms, &oplock, NULL); 1071 if (rc == -ENOENT && oparms.reconnect == false) { 1072 /* durable handle timeout is expired - open the file again */ 1073 rc = server->ops->open(xid, &oparms, &oplock, NULL); 1074 /* indicate that we need to relock the file */ 1075 oparms.reconnect = true; 1076 } 1077 if (rc == -EACCES && rdwr_for_fscache == 1) { 1078 desired_access = cifs_convert_flags(cfile->f_flags, 0); 1079 rdwr_for_fscache = 2; 1080 goto retry_open; 1081 } 1082 1083 if (rc) { 1084 mutex_unlock(&cfile->fh_mutex); 1085 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc); 1086 cifs_dbg(FYI, "oplock: %d\n", oplock); 1087 goto reopen_error_exit; 1088 } 1089 1090 if (rdwr_for_fscache == 2) 1091 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); 1092 1093 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1094 reopen_success: 1095 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1096 cfile->invalidHandle = false; 1097 mutex_unlock(&cfile->fh_mutex); 1098 cinode = CIFS_I(inode); 1099 1100 if (can_flush) { 1101 rc = filemap_write_and_wait(inode->i_mapping); 1102 if (!is_interrupt_error(rc)) 1103 mapping_set_error(inode->i_mapping, rc); 1104 1105 if (tcon->posix_extensions) { 1106 rc = smb311_posix_get_inode_info(&inode, full_path, 1107 NULL, inode->i_sb, xid); 1108 } else if (tcon->unix_ext) { 1109 rc = cifs_get_inode_info_unix(&inode, full_path, 1110 inode->i_sb, xid); 1111 } else { 1112 rc = cifs_get_inode_info(&inode, full_path, NULL, 1113 inode->i_sb, xid, NULL); 1114 } 1115 } 1116 /* 1117 * Else we are writing out data to server already and could deadlock if 1118 * we tried to flush data, and since we do not know if we have data that 1119 * would invalidate the current end of file on the server we can not go 1120 * to the server to get the new inode info. 1121 */ 1122 1123 /* 1124 * If the server returned a read oplock and we have mandatory brlocks, 1125 * set oplock level to None. 1126 */ 1127 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) { 1128 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); 1129 oplock = 0; 1130 } 1131 1132 server->ops->set_fid(cfile, &cfile->fid, oplock); 1133 if (oparms.reconnect) 1134 cifs_relock_file(cfile); 1135 1136 reopen_error_exit: 1137 free_dentry_path(page); 1138 free_xid(xid); 1139 return rc; 1140 } 1141 1142 void smb2_deferred_work_close(struct work_struct *work) 1143 { 1144 struct cifsFileInfo *cfile = container_of(work, 1145 struct cifsFileInfo, deferred.work); 1146 1147 spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); 1148 cifs_del_deferred_close(cfile); 1149 cfile->deferred_close_scheduled = false; 1150 spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); 1151 _cifsFileInfo_put(cfile, true, false); 1152 } 1153 1154 static bool 1155 smb2_can_defer_close(struct inode *inode, struct cifs_deferred_close *dclose) 1156 { 1157 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1158 struct cifsInodeInfo *cinode = CIFS_I(inode); 1159 1160 return (cifs_sb->ctx->closetimeo && cinode->lease_granted && dclose && 1161 (cinode->oplock == CIFS_CACHE_RHW_FLG || 1162 cinode->oplock == CIFS_CACHE_RH_FLG) && 1163 !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags)); 1164 1165 } 1166 1167 int cifs_close(struct inode *inode, struct file *file) 1168 { 1169 struct cifsFileInfo *cfile; 1170 struct cifsInodeInfo *cinode = CIFS_I(inode); 1171 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1172 struct cifs_deferred_close *dclose; 1173 1174 cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE); 1175 1176 if (file->private_data != NULL) { 1177 cfile = file->private_data; 1178 file->private_data = NULL; 1179 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL); 1180 if ((cfile->status_file_deleted == false) && 1181 (smb2_can_defer_close(inode, dclose))) { 1182 if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) { 1183 inode_set_mtime_to_ts(inode, 1184 inode_set_ctime_current(inode)); 1185 } 1186 spin_lock(&cinode->deferred_lock); 1187 cifs_add_deferred_close(cfile, dclose); 1188 if (cfile->deferred_close_scheduled && 1189 delayed_work_pending(&cfile->deferred)) { 1190 /* 1191 * If there is no pending work, mod_delayed_work queues new work. 1192 * So, Increase the ref count to avoid use-after-free. 1193 */ 1194 if (!mod_delayed_work(deferredclose_wq, 1195 &cfile->deferred, cifs_sb->ctx->closetimeo)) 1196 cifsFileInfo_get(cfile); 1197 } else { 1198 /* Deferred close for files */ 1199 queue_delayed_work(deferredclose_wq, 1200 &cfile->deferred, cifs_sb->ctx->closetimeo); 1201 cfile->deferred_close_scheduled = true; 1202 spin_unlock(&cinode->deferred_lock); 1203 return 0; 1204 } 1205 spin_unlock(&cinode->deferred_lock); 1206 _cifsFileInfo_put(cfile, true, false); 1207 } else { 1208 _cifsFileInfo_put(cfile, true, false); 1209 kfree(dclose); 1210 } 1211 } 1212 1213 /* return code from the ->release op is always ignored */ 1214 return 0; 1215 } 1216 1217 void 1218 cifs_reopen_persistent_handles(struct cifs_tcon *tcon) 1219 { 1220 struct cifsFileInfo *open_file, *tmp; 1221 struct list_head tmp_list; 1222 1223 if (!tcon->use_persistent || !tcon->need_reopen_files) 1224 return; 1225 1226 tcon->need_reopen_files = false; 1227 1228 cifs_dbg(FYI, "Reopen persistent handles\n"); 1229 INIT_LIST_HEAD(&tmp_list); 1230 1231 /* list all files open on tree connection, reopen resilient handles */ 1232 spin_lock(&tcon->open_file_lock); 1233 list_for_each_entry(open_file, &tcon->openFileList, tlist) { 1234 if (!open_file->invalidHandle) 1235 continue; 1236 cifsFileInfo_get(open_file); 1237 list_add_tail(&open_file->rlist, &tmp_list); 1238 } 1239 spin_unlock(&tcon->open_file_lock); 1240 1241 list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) { 1242 if (cifs_reopen_file(open_file, false /* do not flush */)) 1243 tcon->need_reopen_files = true; 1244 list_del_init(&open_file->rlist); 1245 cifsFileInfo_put(open_file); 1246 } 1247 } 1248 1249 int cifs_closedir(struct inode *inode, struct file *file) 1250 { 1251 int rc = 0; 1252 unsigned int xid; 1253 struct cifsFileInfo *cfile = file->private_data; 1254 struct cifs_tcon *tcon; 1255 struct TCP_Server_Info *server; 1256 char *buf; 1257 1258 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode); 1259 1260 if (cfile == NULL) 1261 return rc; 1262 1263 xid = get_xid(); 1264 tcon = tlink_tcon(cfile->tlink); 1265 server = tcon->ses->server; 1266 1267 cifs_dbg(FYI, "Freeing private data in close dir\n"); 1268 spin_lock(&cfile->file_info_lock); 1269 if (server->ops->dir_needs_close(cfile)) { 1270 cfile->invalidHandle = true; 1271 spin_unlock(&cfile->file_info_lock); 1272 if (server->ops->close_dir) 1273 rc = server->ops->close_dir(xid, tcon, &cfile->fid); 1274 else 1275 rc = -ENOSYS; 1276 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc); 1277 /* not much we can do if it fails anyway, ignore rc */ 1278 rc = 0; 1279 } else 1280 spin_unlock(&cfile->file_info_lock); 1281 1282 buf = cfile->srch_inf.ntwrk_buf_start; 1283 if (buf) { 1284 cifs_dbg(FYI, "closedir free smb buf in srch struct\n"); 1285 cfile->srch_inf.ntwrk_buf_start = NULL; 1286 if (cfile->srch_inf.smallBuf) 1287 cifs_small_buf_release(buf); 1288 else 1289 cifs_buf_release(buf); 1290 } 1291 1292 cifs_put_tlink(cfile->tlink); 1293 kfree(file->private_data); 1294 file->private_data = NULL; 1295 /* BB can we lock the filestruct while this is going on? */ 1296 free_xid(xid); 1297 return rc; 1298 } 1299 1300 static struct cifsLockInfo * 1301 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags) 1302 { 1303 struct cifsLockInfo *lock = 1304 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL); 1305 if (!lock) 1306 return lock; 1307 lock->offset = offset; 1308 lock->length = length; 1309 lock->type = type; 1310 lock->pid = current->tgid; 1311 lock->flags = flags; 1312 INIT_LIST_HEAD(&lock->blist); 1313 init_waitqueue_head(&lock->block_q); 1314 return lock; 1315 } 1316 1317 void 1318 cifs_del_lock_waiters(struct cifsLockInfo *lock) 1319 { 1320 struct cifsLockInfo *li, *tmp; 1321 list_for_each_entry_safe(li, tmp, &lock->blist, blist) { 1322 list_del_init(&li->blist); 1323 wake_up(&li->block_q); 1324 } 1325 } 1326 1327 #define CIFS_LOCK_OP 0 1328 #define CIFS_READ_OP 1 1329 #define CIFS_WRITE_OP 2 1330 1331 /* @rw_check : 0 - no op, 1 - read, 2 - write */ 1332 static bool 1333 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset, 1334 __u64 length, __u8 type, __u16 flags, 1335 struct cifsFileInfo *cfile, 1336 struct cifsLockInfo **conf_lock, int rw_check) 1337 { 1338 struct cifsLockInfo *li; 1339 struct cifsFileInfo *cur_cfile = fdlocks->cfile; 1340 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 1341 1342 list_for_each_entry(li, &fdlocks->locks, llist) { 1343 if (offset + length <= li->offset || 1344 offset >= li->offset + li->length) 1345 continue; 1346 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid && 1347 server->ops->compare_fids(cfile, cur_cfile)) { 1348 /* shared lock prevents write op through the same fid */ 1349 if (!(li->type & server->vals->shared_lock_type) || 1350 rw_check != CIFS_WRITE_OP) 1351 continue; 1352 } 1353 if ((type & server->vals->shared_lock_type) && 1354 ((server->ops->compare_fids(cfile, cur_cfile) && 1355 current->tgid == li->pid) || type == li->type)) 1356 continue; 1357 if (rw_check == CIFS_LOCK_OP && 1358 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) && 1359 server->ops->compare_fids(cfile, cur_cfile)) 1360 continue; 1361 if (conf_lock) 1362 *conf_lock = li; 1363 return true; 1364 } 1365 return false; 1366 } 1367 1368 bool 1369 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length, 1370 __u8 type, __u16 flags, 1371 struct cifsLockInfo **conf_lock, int rw_check) 1372 { 1373 bool rc = false; 1374 struct cifs_fid_locks *cur; 1375 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1376 1377 list_for_each_entry(cur, &cinode->llist, llist) { 1378 rc = cifs_find_fid_lock_conflict(cur, offset, length, type, 1379 flags, cfile, conf_lock, 1380 rw_check); 1381 if (rc) 1382 break; 1383 } 1384 1385 return rc; 1386 } 1387 1388 /* 1389 * Check if there is another lock that prevents us to set the lock (mandatory 1390 * style). If such a lock exists, update the flock structure with its 1391 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks 1392 * or leave it the same if we can't. Returns 0 if we don't need to request to 1393 * the server or 1 otherwise. 1394 */ 1395 static int 1396 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length, 1397 __u8 type, struct file_lock *flock) 1398 { 1399 int rc = 0; 1400 struct cifsLockInfo *conf_lock; 1401 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1402 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 1403 bool exist; 1404 1405 down_read(&cinode->lock_sem); 1406 1407 exist = cifs_find_lock_conflict(cfile, offset, length, type, 1408 flock->fl_flags, &conf_lock, 1409 CIFS_LOCK_OP); 1410 if (exist) { 1411 flock->fl_start = conf_lock->offset; 1412 flock->fl_end = conf_lock->offset + conf_lock->length - 1; 1413 flock->fl_pid = conf_lock->pid; 1414 if (conf_lock->type & server->vals->shared_lock_type) 1415 flock->fl_type = F_RDLCK; 1416 else 1417 flock->fl_type = F_WRLCK; 1418 } else if (!cinode->can_cache_brlcks) 1419 rc = 1; 1420 else 1421 flock->fl_type = F_UNLCK; 1422 1423 up_read(&cinode->lock_sem); 1424 return rc; 1425 } 1426 1427 static void 1428 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock) 1429 { 1430 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1431 cifs_down_write(&cinode->lock_sem); 1432 list_add_tail(&lock->llist, &cfile->llist->locks); 1433 up_write(&cinode->lock_sem); 1434 } 1435 1436 /* 1437 * Set the byte-range lock (mandatory style). Returns: 1438 * 1) 0, if we set the lock and don't need to request to the server; 1439 * 2) 1, if no locks prevent us but we need to request to the server; 1440 * 3) -EACCES, if there is a lock that prevents us and wait is false. 1441 */ 1442 static int 1443 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock, 1444 bool wait) 1445 { 1446 struct cifsLockInfo *conf_lock; 1447 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1448 bool exist; 1449 int rc = 0; 1450 1451 try_again: 1452 exist = false; 1453 cifs_down_write(&cinode->lock_sem); 1454 1455 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, 1456 lock->type, lock->flags, &conf_lock, 1457 CIFS_LOCK_OP); 1458 if (!exist && cinode->can_cache_brlcks) { 1459 list_add_tail(&lock->llist, &cfile->llist->locks); 1460 up_write(&cinode->lock_sem); 1461 return rc; 1462 } 1463 1464 if (!exist) 1465 rc = 1; 1466 else if (!wait) 1467 rc = -EACCES; 1468 else { 1469 list_add_tail(&lock->blist, &conf_lock->blist); 1470 up_write(&cinode->lock_sem); 1471 rc = wait_event_interruptible(lock->block_q, 1472 (lock->blist.prev == &lock->blist) && 1473 (lock->blist.next == &lock->blist)); 1474 if (!rc) 1475 goto try_again; 1476 cifs_down_write(&cinode->lock_sem); 1477 list_del_init(&lock->blist); 1478 } 1479 1480 up_write(&cinode->lock_sem); 1481 return rc; 1482 } 1483 1484 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1485 /* 1486 * Check if there is another lock that prevents us to set the lock (posix 1487 * style). If such a lock exists, update the flock structure with its 1488 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks 1489 * or leave it the same if we can't. Returns 0 if we don't need to request to 1490 * the server or 1 otherwise. 1491 */ 1492 static int 1493 cifs_posix_lock_test(struct file *file, struct file_lock *flock) 1494 { 1495 int rc = 0; 1496 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); 1497 unsigned char saved_type = flock->fl_type; 1498 1499 if ((flock->fl_flags & FL_POSIX) == 0) 1500 return 1; 1501 1502 down_read(&cinode->lock_sem); 1503 posix_test_lock(file, flock); 1504 1505 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) { 1506 flock->fl_type = saved_type; 1507 rc = 1; 1508 } 1509 1510 up_read(&cinode->lock_sem); 1511 return rc; 1512 } 1513 1514 /* 1515 * Set the byte-range lock (posix style). Returns: 1516 * 1) <0, if the error occurs while setting the lock; 1517 * 2) 0, if we set the lock and don't need to request to the server; 1518 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock; 1519 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server. 1520 */ 1521 static int 1522 cifs_posix_lock_set(struct file *file, struct file_lock *flock) 1523 { 1524 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); 1525 int rc = FILE_LOCK_DEFERRED + 1; 1526 1527 if ((flock->fl_flags & FL_POSIX) == 0) 1528 return rc; 1529 1530 cifs_down_write(&cinode->lock_sem); 1531 if (!cinode->can_cache_brlcks) { 1532 up_write(&cinode->lock_sem); 1533 return rc; 1534 } 1535 1536 rc = posix_lock_file(file, flock, NULL); 1537 up_write(&cinode->lock_sem); 1538 return rc; 1539 } 1540 1541 int 1542 cifs_push_mandatory_locks(struct cifsFileInfo *cfile) 1543 { 1544 unsigned int xid; 1545 int rc = 0, stored_rc; 1546 struct cifsLockInfo *li, *tmp; 1547 struct cifs_tcon *tcon; 1548 unsigned int num, max_num, max_buf; 1549 LOCKING_ANDX_RANGE *buf, *cur; 1550 static const int types[] = { 1551 LOCKING_ANDX_LARGE_FILES, 1552 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES 1553 }; 1554 int i; 1555 1556 xid = get_xid(); 1557 tcon = tlink_tcon(cfile->tlink); 1558 1559 /* 1560 * Accessing maxBuf is racy with cifs_reconnect - need to store value 1561 * and check it before using. 1562 */ 1563 max_buf = tcon->ses->server->maxBuf; 1564 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) { 1565 free_xid(xid); 1566 return -EINVAL; 1567 } 1568 1569 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > 1570 PAGE_SIZE); 1571 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), 1572 PAGE_SIZE); 1573 max_num = (max_buf - sizeof(struct smb_hdr)) / 1574 sizeof(LOCKING_ANDX_RANGE); 1575 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 1576 if (!buf) { 1577 free_xid(xid); 1578 return -ENOMEM; 1579 } 1580 1581 for (i = 0; i < 2; i++) { 1582 cur = buf; 1583 num = 0; 1584 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { 1585 if (li->type != types[i]) 1586 continue; 1587 cur->Pid = cpu_to_le16(li->pid); 1588 cur->LengthLow = cpu_to_le32((u32)li->length); 1589 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); 1590 cur->OffsetLow = cpu_to_le32((u32)li->offset); 1591 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); 1592 if (++num == max_num) { 1593 stored_rc = cifs_lockv(xid, tcon, 1594 cfile->fid.netfid, 1595 (__u8)li->type, 0, num, 1596 buf); 1597 if (stored_rc) 1598 rc = stored_rc; 1599 cur = buf; 1600 num = 0; 1601 } else 1602 cur++; 1603 } 1604 1605 if (num) { 1606 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, 1607 (__u8)types[i], 0, num, buf); 1608 if (stored_rc) 1609 rc = stored_rc; 1610 } 1611 } 1612 1613 kfree(buf); 1614 free_xid(xid); 1615 return rc; 1616 } 1617 1618 static __u32 1619 hash_lockowner(fl_owner_t owner) 1620 { 1621 return cifs_lock_secret ^ hash32_ptr((const void *)owner); 1622 } 1623 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1624 1625 struct lock_to_push { 1626 struct list_head llist; 1627 __u64 offset; 1628 __u64 length; 1629 __u32 pid; 1630 __u16 netfid; 1631 __u8 type; 1632 }; 1633 1634 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1635 static int 1636 cifs_push_posix_locks(struct cifsFileInfo *cfile) 1637 { 1638 struct inode *inode = d_inode(cfile->dentry); 1639 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1640 struct file_lock *flock; 1641 struct file_lock_context *flctx = locks_inode_context(inode); 1642 unsigned int count = 0, i; 1643 int rc = 0, xid, type; 1644 struct list_head locks_to_send, *el; 1645 struct lock_to_push *lck, *tmp; 1646 __u64 length; 1647 1648 xid = get_xid(); 1649 1650 if (!flctx) 1651 goto out; 1652 1653 spin_lock(&flctx->flc_lock); 1654 list_for_each(el, &flctx->flc_posix) { 1655 count++; 1656 } 1657 spin_unlock(&flctx->flc_lock); 1658 1659 INIT_LIST_HEAD(&locks_to_send); 1660 1661 /* 1662 * Allocating count locks is enough because no FL_POSIX locks can be 1663 * added to the list while we are holding cinode->lock_sem that 1664 * protects locking operations of this inode. 1665 */ 1666 for (i = 0; i < count; i++) { 1667 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); 1668 if (!lck) { 1669 rc = -ENOMEM; 1670 goto err_out; 1671 } 1672 list_add_tail(&lck->llist, &locks_to_send); 1673 } 1674 1675 el = locks_to_send.next; 1676 spin_lock(&flctx->flc_lock); 1677 list_for_each_entry(flock, &flctx->flc_posix, fl_list) { 1678 if (el == &locks_to_send) { 1679 /* 1680 * The list ended. We don't have enough allocated 1681 * structures - something is really wrong. 1682 */ 1683 cifs_dbg(VFS, "Can't push all brlocks!\n"); 1684 break; 1685 } 1686 length = cifs_flock_len(flock); 1687 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK) 1688 type = CIFS_RDLCK; 1689 else 1690 type = CIFS_WRLCK; 1691 lck = list_entry(el, struct lock_to_push, llist); 1692 lck->pid = hash_lockowner(flock->fl_owner); 1693 lck->netfid = cfile->fid.netfid; 1694 lck->length = length; 1695 lck->type = type; 1696 lck->offset = flock->fl_start; 1697 } 1698 spin_unlock(&flctx->flc_lock); 1699 1700 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { 1701 int stored_rc; 1702 1703 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid, 1704 lck->offset, lck->length, NULL, 1705 lck->type, 0); 1706 if (stored_rc) 1707 rc = stored_rc; 1708 list_del(&lck->llist); 1709 kfree(lck); 1710 } 1711 1712 out: 1713 free_xid(xid); 1714 return rc; 1715 err_out: 1716 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { 1717 list_del(&lck->llist); 1718 kfree(lck); 1719 } 1720 goto out; 1721 } 1722 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1723 1724 static int 1725 cifs_push_locks(struct cifsFileInfo *cfile) 1726 { 1727 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1728 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1729 int rc = 0; 1730 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1731 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 1732 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1733 1734 /* we are going to update can_cache_brlcks here - need a write access */ 1735 cifs_down_write(&cinode->lock_sem); 1736 if (!cinode->can_cache_brlcks) { 1737 up_write(&cinode->lock_sem); 1738 return rc; 1739 } 1740 1741 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1742 if (cap_unix(tcon->ses) && 1743 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 1744 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 1745 rc = cifs_push_posix_locks(cfile); 1746 else 1747 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1748 rc = tcon->ses->server->ops->push_mand_locks(cfile); 1749 1750 cinode->can_cache_brlcks = false; 1751 up_write(&cinode->lock_sem); 1752 return rc; 1753 } 1754 1755 static void 1756 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock, 1757 bool *wait_flag, struct TCP_Server_Info *server) 1758 { 1759 if (flock->fl_flags & FL_POSIX) 1760 cifs_dbg(FYI, "Posix\n"); 1761 if (flock->fl_flags & FL_FLOCK) 1762 cifs_dbg(FYI, "Flock\n"); 1763 if (flock->fl_flags & FL_SLEEP) { 1764 cifs_dbg(FYI, "Blocking lock\n"); 1765 *wait_flag = true; 1766 } 1767 if (flock->fl_flags & FL_ACCESS) 1768 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n"); 1769 if (flock->fl_flags & FL_LEASE) 1770 cifs_dbg(FYI, "Lease on file - not implemented yet\n"); 1771 if (flock->fl_flags & 1772 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | 1773 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK))) 1774 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags); 1775 1776 *type = server->vals->large_lock_type; 1777 if (flock->fl_type == F_WRLCK) { 1778 cifs_dbg(FYI, "F_WRLCK\n"); 1779 *type |= server->vals->exclusive_lock_type; 1780 *lock = 1; 1781 } else if (flock->fl_type == F_UNLCK) { 1782 cifs_dbg(FYI, "F_UNLCK\n"); 1783 *type |= server->vals->unlock_lock_type; 1784 *unlock = 1; 1785 /* Check if unlock includes more than one lock range */ 1786 } else if (flock->fl_type == F_RDLCK) { 1787 cifs_dbg(FYI, "F_RDLCK\n"); 1788 *type |= server->vals->shared_lock_type; 1789 *lock = 1; 1790 } else if (flock->fl_type == F_EXLCK) { 1791 cifs_dbg(FYI, "F_EXLCK\n"); 1792 *type |= server->vals->exclusive_lock_type; 1793 *lock = 1; 1794 } else if (flock->fl_type == F_SHLCK) { 1795 cifs_dbg(FYI, "F_SHLCK\n"); 1796 *type |= server->vals->shared_lock_type; 1797 *lock = 1; 1798 } else 1799 cifs_dbg(FYI, "Unknown type of lock\n"); 1800 } 1801 1802 static int 1803 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, 1804 bool wait_flag, bool posix_lck, unsigned int xid) 1805 { 1806 int rc = 0; 1807 __u64 length = cifs_flock_len(flock); 1808 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 1809 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1810 struct TCP_Server_Info *server = tcon->ses->server; 1811 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1812 __u16 netfid = cfile->fid.netfid; 1813 1814 if (posix_lck) { 1815 int posix_lock_type; 1816 1817 rc = cifs_posix_lock_test(file, flock); 1818 if (!rc) 1819 return rc; 1820 1821 if (type & server->vals->shared_lock_type) 1822 posix_lock_type = CIFS_RDLCK; 1823 else 1824 posix_lock_type = CIFS_WRLCK; 1825 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1826 hash_lockowner(flock->fl_owner), 1827 flock->fl_start, length, flock, 1828 posix_lock_type, wait_flag); 1829 return rc; 1830 } 1831 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1832 1833 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock); 1834 if (!rc) 1835 return rc; 1836 1837 /* BB we could chain these into one lock request BB */ 1838 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type, 1839 1, 0, false); 1840 if (rc == 0) { 1841 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1842 type, 0, 1, false); 1843 flock->fl_type = F_UNLCK; 1844 if (rc != 0) 1845 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", 1846 rc); 1847 return 0; 1848 } 1849 1850 if (type & server->vals->shared_lock_type) { 1851 flock->fl_type = F_WRLCK; 1852 return 0; 1853 } 1854 1855 type &= ~server->vals->exclusive_lock_type; 1856 1857 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1858 type | server->vals->shared_lock_type, 1859 1, 0, false); 1860 if (rc == 0) { 1861 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1862 type | server->vals->shared_lock_type, 0, 1, false); 1863 flock->fl_type = F_RDLCK; 1864 if (rc != 0) 1865 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", 1866 rc); 1867 } else 1868 flock->fl_type = F_WRLCK; 1869 1870 return 0; 1871 } 1872 1873 void 1874 cifs_move_llist(struct list_head *source, struct list_head *dest) 1875 { 1876 struct list_head *li, *tmp; 1877 list_for_each_safe(li, tmp, source) 1878 list_move(li, dest); 1879 } 1880 1881 void 1882 cifs_free_llist(struct list_head *llist) 1883 { 1884 struct cifsLockInfo *li, *tmp; 1885 list_for_each_entry_safe(li, tmp, llist, llist) { 1886 cifs_del_lock_waiters(li); 1887 list_del(&li->llist); 1888 kfree(li); 1889 } 1890 } 1891 1892 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1893 int 1894 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, 1895 unsigned int xid) 1896 { 1897 int rc = 0, stored_rc; 1898 static const int types[] = { 1899 LOCKING_ANDX_LARGE_FILES, 1900 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES 1901 }; 1902 unsigned int i; 1903 unsigned int max_num, num, max_buf; 1904 LOCKING_ANDX_RANGE *buf, *cur; 1905 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1906 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1907 struct cifsLockInfo *li, *tmp; 1908 __u64 length = cifs_flock_len(flock); 1909 struct list_head tmp_llist; 1910 1911 INIT_LIST_HEAD(&tmp_llist); 1912 1913 /* 1914 * Accessing maxBuf is racy with cifs_reconnect - need to store value 1915 * and check it before using. 1916 */ 1917 max_buf = tcon->ses->server->maxBuf; 1918 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) 1919 return -EINVAL; 1920 1921 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > 1922 PAGE_SIZE); 1923 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), 1924 PAGE_SIZE); 1925 max_num = (max_buf - sizeof(struct smb_hdr)) / 1926 sizeof(LOCKING_ANDX_RANGE); 1927 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 1928 if (!buf) 1929 return -ENOMEM; 1930 1931 cifs_down_write(&cinode->lock_sem); 1932 for (i = 0; i < 2; i++) { 1933 cur = buf; 1934 num = 0; 1935 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { 1936 if (flock->fl_start > li->offset || 1937 (flock->fl_start + length) < 1938 (li->offset + li->length)) 1939 continue; 1940 if (current->tgid != li->pid) 1941 continue; 1942 if (types[i] != li->type) 1943 continue; 1944 if (cinode->can_cache_brlcks) { 1945 /* 1946 * We can cache brlock requests - simply remove 1947 * a lock from the file's list. 1948 */ 1949 list_del(&li->llist); 1950 cifs_del_lock_waiters(li); 1951 kfree(li); 1952 continue; 1953 } 1954 cur->Pid = cpu_to_le16(li->pid); 1955 cur->LengthLow = cpu_to_le32((u32)li->length); 1956 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); 1957 cur->OffsetLow = cpu_to_le32((u32)li->offset); 1958 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); 1959 /* 1960 * We need to save a lock here to let us add it again to 1961 * the file's list if the unlock range request fails on 1962 * the server. 1963 */ 1964 list_move(&li->llist, &tmp_llist); 1965 if (++num == max_num) { 1966 stored_rc = cifs_lockv(xid, tcon, 1967 cfile->fid.netfid, 1968 li->type, num, 0, buf); 1969 if (stored_rc) { 1970 /* 1971 * We failed on the unlock range 1972 * request - add all locks from the tmp 1973 * list to the head of the file's list. 1974 */ 1975 cifs_move_llist(&tmp_llist, 1976 &cfile->llist->locks); 1977 rc = stored_rc; 1978 } else 1979 /* 1980 * The unlock range request succeed - 1981 * free the tmp list. 1982 */ 1983 cifs_free_llist(&tmp_llist); 1984 cur = buf; 1985 num = 0; 1986 } else 1987 cur++; 1988 } 1989 if (num) { 1990 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, 1991 types[i], num, 0, buf); 1992 if (stored_rc) { 1993 cifs_move_llist(&tmp_llist, 1994 &cfile->llist->locks); 1995 rc = stored_rc; 1996 } else 1997 cifs_free_llist(&tmp_llist); 1998 } 1999 } 2000 2001 up_write(&cinode->lock_sem); 2002 kfree(buf); 2003 return rc; 2004 } 2005 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 2006 2007 static int 2008 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, 2009 bool wait_flag, bool posix_lck, int lock, int unlock, 2010 unsigned int xid) 2011 { 2012 int rc = 0; 2013 __u64 length = cifs_flock_len(flock); 2014 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 2015 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 2016 struct TCP_Server_Info *server = tcon->ses->server; 2017 struct inode *inode = d_inode(cfile->dentry); 2018 2019 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 2020 if (posix_lck) { 2021 int posix_lock_type; 2022 2023 rc = cifs_posix_lock_set(file, flock); 2024 if (rc <= FILE_LOCK_DEFERRED) 2025 return rc; 2026 2027 if (type & server->vals->shared_lock_type) 2028 posix_lock_type = CIFS_RDLCK; 2029 else 2030 posix_lock_type = CIFS_WRLCK; 2031 2032 if (unlock == 1) 2033 posix_lock_type = CIFS_UNLCK; 2034 2035 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid, 2036 hash_lockowner(flock->fl_owner), 2037 flock->fl_start, length, 2038 NULL, posix_lock_type, wait_flag); 2039 goto out; 2040 } 2041 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 2042 if (lock) { 2043 struct cifsLockInfo *lock; 2044 2045 lock = cifs_lock_init(flock->fl_start, length, type, 2046 flock->fl_flags); 2047 if (!lock) 2048 return -ENOMEM; 2049 2050 rc = cifs_lock_add_if(cfile, lock, wait_flag); 2051 if (rc < 0) { 2052 kfree(lock); 2053 return rc; 2054 } 2055 if (!rc) 2056 goto out; 2057 2058 /* 2059 * Windows 7 server can delay breaking lease from read to None 2060 * if we set a byte-range lock on a file - break it explicitly 2061 * before sending the lock to the server to be sure the next 2062 * read won't conflict with non-overlapted locks due to 2063 * pagereading. 2064 */ 2065 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) && 2066 CIFS_CACHE_READ(CIFS_I(inode))) { 2067 cifs_zap_mapping(inode); 2068 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n", 2069 inode); 2070 CIFS_I(inode)->oplock = 0; 2071 } 2072 2073 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 2074 type, 1, 0, wait_flag); 2075 if (rc) { 2076 kfree(lock); 2077 return rc; 2078 } 2079 2080 cifs_lock_add(cfile, lock); 2081 } else if (unlock) 2082 rc = server->ops->mand_unlock_range(cfile, flock, xid); 2083 2084 out: 2085 if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) { 2086 /* 2087 * If this is a request to remove all locks because we 2088 * are closing the file, it doesn't matter if the 2089 * unlocking failed as both cifs.ko and the SMB server 2090 * remove the lock on file close 2091 */ 2092 if (rc) { 2093 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc); 2094 if (!(flock->fl_flags & FL_CLOSE)) 2095 return rc; 2096 } 2097 rc = locks_lock_file_wait(file, flock); 2098 } 2099 return rc; 2100 } 2101 2102 int cifs_flock(struct file *file, int cmd, struct file_lock *fl) 2103 { 2104 int rc, xid; 2105 int lock = 0, unlock = 0; 2106 bool wait_flag = false; 2107 bool posix_lck = false; 2108 struct cifs_sb_info *cifs_sb; 2109 struct cifs_tcon *tcon; 2110 struct cifsFileInfo *cfile; 2111 __u32 type; 2112 2113 xid = get_xid(); 2114 2115 if (!(fl->fl_flags & FL_FLOCK)) { 2116 rc = -ENOLCK; 2117 free_xid(xid); 2118 return rc; 2119 } 2120 2121 cfile = (struct cifsFileInfo *)file->private_data; 2122 tcon = tlink_tcon(cfile->tlink); 2123 2124 cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag, 2125 tcon->ses->server); 2126 cifs_sb = CIFS_FILE_SB(file); 2127 2128 if (cap_unix(tcon->ses) && 2129 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 2130 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2131 posix_lck = true; 2132 2133 if (!lock && !unlock) { 2134 /* 2135 * if no lock or unlock then nothing to do since we do not 2136 * know what it is 2137 */ 2138 rc = -EOPNOTSUPP; 2139 free_xid(xid); 2140 return rc; 2141 } 2142 2143 rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock, 2144 xid); 2145 free_xid(xid); 2146 return rc; 2147 2148 2149 } 2150 2151 int cifs_lock(struct file *file, int cmd, struct file_lock *flock) 2152 { 2153 int rc, xid; 2154 int lock = 0, unlock = 0; 2155 bool wait_flag = false; 2156 bool posix_lck = false; 2157 struct cifs_sb_info *cifs_sb; 2158 struct cifs_tcon *tcon; 2159 struct cifsFileInfo *cfile; 2160 __u32 type; 2161 2162 rc = -EACCES; 2163 xid = get_xid(); 2164 2165 cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd, 2166 flock->fl_flags, flock->fl_type, (long long)flock->fl_start, 2167 (long long)flock->fl_end); 2168 2169 cfile = (struct cifsFileInfo *)file->private_data; 2170 tcon = tlink_tcon(cfile->tlink); 2171 2172 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag, 2173 tcon->ses->server); 2174 cifs_sb = CIFS_FILE_SB(file); 2175 set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags); 2176 2177 if (cap_unix(tcon->ses) && 2178 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 2179 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2180 posix_lck = true; 2181 /* 2182 * BB add code here to normalize offset and length to account for 2183 * negative length which we can not accept over the wire. 2184 */ 2185 if (IS_GETLK(cmd)) { 2186 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid); 2187 free_xid(xid); 2188 return rc; 2189 } 2190 2191 if (!lock && !unlock) { 2192 /* 2193 * if no lock or unlock then nothing to do since we do not 2194 * know what it is 2195 */ 2196 free_xid(xid); 2197 return -EOPNOTSUPP; 2198 } 2199 2200 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock, 2201 xid); 2202 free_xid(xid); 2203 return rc; 2204 } 2205 2206 /* 2207 * update the file size (if needed) after a write. Should be called with 2208 * the inode->i_lock held 2209 */ 2210 void 2211 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, 2212 unsigned int bytes_written) 2213 { 2214 loff_t end_of_write = offset + bytes_written; 2215 2216 if (end_of_write > cifsi->server_eof) 2217 cifsi->server_eof = end_of_write; 2218 } 2219 2220 static ssize_t 2221 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data, 2222 size_t write_size, loff_t *offset) 2223 { 2224 int rc = 0; 2225 unsigned int bytes_written = 0; 2226 unsigned int total_written; 2227 struct cifs_tcon *tcon; 2228 struct TCP_Server_Info *server; 2229 unsigned int xid; 2230 struct dentry *dentry = open_file->dentry; 2231 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry)); 2232 struct cifs_io_parms io_parms = {0}; 2233 2234 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n", 2235 write_size, *offset, dentry); 2236 2237 tcon = tlink_tcon(open_file->tlink); 2238 server = tcon->ses->server; 2239 2240 if (!server->ops->sync_write) 2241 return -ENOSYS; 2242 2243 xid = get_xid(); 2244 2245 for (total_written = 0; write_size > total_written; 2246 total_written += bytes_written) { 2247 rc = -EAGAIN; 2248 while (rc == -EAGAIN) { 2249 struct kvec iov[2]; 2250 unsigned int len; 2251 2252 if (open_file->invalidHandle) { 2253 /* we could deadlock if we called 2254 filemap_fdatawait from here so tell 2255 reopen_file not to flush data to 2256 server now */ 2257 rc = cifs_reopen_file(open_file, false); 2258 if (rc != 0) 2259 break; 2260 } 2261 2262 len = min(server->ops->wp_retry_size(d_inode(dentry)), 2263 (unsigned int)write_size - total_written); 2264 /* iov[0] is reserved for smb header */ 2265 iov[1].iov_base = (char *)write_data + total_written; 2266 iov[1].iov_len = len; 2267 io_parms.pid = pid; 2268 io_parms.tcon = tcon; 2269 io_parms.offset = *offset; 2270 io_parms.length = len; 2271 rc = server->ops->sync_write(xid, &open_file->fid, 2272 &io_parms, &bytes_written, iov, 1); 2273 } 2274 if (rc || (bytes_written == 0)) { 2275 if (total_written) 2276 break; 2277 else { 2278 free_xid(xid); 2279 return rc; 2280 } 2281 } else { 2282 spin_lock(&d_inode(dentry)->i_lock); 2283 cifs_update_eof(cifsi, *offset, bytes_written); 2284 spin_unlock(&d_inode(dentry)->i_lock); 2285 *offset += bytes_written; 2286 } 2287 } 2288 2289 cifs_stats_bytes_written(tcon, total_written); 2290 2291 if (total_written > 0) { 2292 spin_lock(&d_inode(dentry)->i_lock); 2293 if (*offset > d_inode(dentry)->i_size) { 2294 i_size_write(d_inode(dentry), *offset); 2295 d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9; 2296 } 2297 spin_unlock(&d_inode(dentry)->i_lock); 2298 } 2299 mark_inode_dirty_sync(d_inode(dentry)); 2300 free_xid(xid); 2301 return total_written; 2302 } 2303 2304 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, 2305 bool fsuid_only) 2306 { 2307 struct cifsFileInfo *open_file = NULL; 2308 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); 2309 2310 /* only filter by fsuid on multiuser mounts */ 2311 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 2312 fsuid_only = false; 2313 2314 spin_lock(&cifs_inode->open_file_lock); 2315 /* we could simply get the first_list_entry since write-only entries 2316 are always at the end of the list but since the first entry might 2317 have a close pending, we go through the whole list */ 2318 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 2319 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) 2320 continue; 2321 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) { 2322 if ((!open_file->invalidHandle)) { 2323 /* found a good file */ 2324 /* lock it so it will not be closed on us */ 2325 cifsFileInfo_get(open_file); 2326 spin_unlock(&cifs_inode->open_file_lock); 2327 return open_file; 2328 } /* else might as well continue, and look for 2329 another, or simply have the caller reopen it 2330 again rather than trying to fix this handle */ 2331 } else /* write only file */ 2332 break; /* write only files are last so must be done */ 2333 } 2334 spin_unlock(&cifs_inode->open_file_lock); 2335 return NULL; 2336 } 2337 2338 /* Return -EBADF if no handle is found and general rc otherwise */ 2339 int 2340 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, 2341 struct cifsFileInfo **ret_file) 2342 { 2343 struct cifsFileInfo *open_file, *inv_file = NULL; 2344 struct cifs_sb_info *cifs_sb; 2345 bool any_available = false; 2346 int rc = -EBADF; 2347 unsigned int refind = 0; 2348 bool fsuid_only = flags & FIND_WR_FSUID_ONLY; 2349 bool with_delete = flags & FIND_WR_WITH_DELETE; 2350 *ret_file = NULL; 2351 2352 /* 2353 * Having a null inode here (because mapping->host was set to zero by 2354 * the VFS or MM) should not happen but we had reports of on oops (due 2355 * to it being zero) during stress testcases so we need to check for it 2356 */ 2357 2358 if (cifs_inode == NULL) { 2359 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n"); 2360 dump_stack(); 2361 return rc; 2362 } 2363 2364 cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); 2365 2366 /* only filter by fsuid on multiuser mounts */ 2367 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 2368 fsuid_only = false; 2369 2370 spin_lock(&cifs_inode->open_file_lock); 2371 refind_writable: 2372 if (refind > MAX_REOPEN_ATT) { 2373 spin_unlock(&cifs_inode->open_file_lock); 2374 return rc; 2375 } 2376 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 2377 if (!any_available && open_file->pid != current->tgid) 2378 continue; 2379 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) 2380 continue; 2381 if (with_delete && !(open_file->fid.access & DELETE)) 2382 continue; 2383 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { 2384 if (!open_file->invalidHandle) { 2385 /* found a good writable file */ 2386 cifsFileInfo_get(open_file); 2387 spin_unlock(&cifs_inode->open_file_lock); 2388 *ret_file = open_file; 2389 return 0; 2390 } else { 2391 if (!inv_file) 2392 inv_file = open_file; 2393 } 2394 } 2395 } 2396 /* couldn't find useable FH with same pid, try any available */ 2397 if (!any_available) { 2398 any_available = true; 2399 goto refind_writable; 2400 } 2401 2402 if (inv_file) { 2403 any_available = false; 2404 cifsFileInfo_get(inv_file); 2405 } 2406 2407 spin_unlock(&cifs_inode->open_file_lock); 2408 2409 if (inv_file) { 2410 rc = cifs_reopen_file(inv_file, false); 2411 if (!rc) { 2412 *ret_file = inv_file; 2413 return 0; 2414 } 2415 2416 spin_lock(&cifs_inode->open_file_lock); 2417 list_move_tail(&inv_file->flist, &cifs_inode->openFileList); 2418 spin_unlock(&cifs_inode->open_file_lock); 2419 cifsFileInfo_put(inv_file); 2420 ++refind; 2421 inv_file = NULL; 2422 spin_lock(&cifs_inode->open_file_lock); 2423 goto refind_writable; 2424 } 2425 2426 return rc; 2427 } 2428 2429 struct cifsFileInfo * 2430 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags) 2431 { 2432 struct cifsFileInfo *cfile; 2433 int rc; 2434 2435 rc = cifs_get_writable_file(cifs_inode, flags, &cfile); 2436 if (rc) 2437 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc); 2438 2439 return cfile; 2440 } 2441 2442 int 2443 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, 2444 int flags, 2445 struct cifsFileInfo **ret_file) 2446 { 2447 struct cifsFileInfo *cfile; 2448 void *page = alloc_dentry_path(); 2449 2450 *ret_file = NULL; 2451 2452 spin_lock(&tcon->open_file_lock); 2453 list_for_each_entry(cfile, &tcon->openFileList, tlist) { 2454 struct cifsInodeInfo *cinode; 2455 const char *full_path = build_path_from_dentry(cfile->dentry, page); 2456 if (IS_ERR(full_path)) { 2457 spin_unlock(&tcon->open_file_lock); 2458 free_dentry_path(page); 2459 return PTR_ERR(full_path); 2460 } 2461 if (strcmp(full_path, name)) 2462 continue; 2463 2464 cinode = CIFS_I(d_inode(cfile->dentry)); 2465 spin_unlock(&tcon->open_file_lock); 2466 free_dentry_path(page); 2467 return cifs_get_writable_file(cinode, flags, ret_file); 2468 } 2469 2470 spin_unlock(&tcon->open_file_lock); 2471 free_dentry_path(page); 2472 return -ENOENT; 2473 } 2474 2475 int 2476 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, 2477 struct cifsFileInfo **ret_file) 2478 { 2479 struct cifsFileInfo *cfile; 2480 void *page = alloc_dentry_path(); 2481 2482 *ret_file = NULL; 2483 2484 spin_lock(&tcon->open_file_lock); 2485 list_for_each_entry(cfile, &tcon->openFileList, tlist) { 2486 struct cifsInodeInfo *cinode; 2487 const char *full_path = build_path_from_dentry(cfile->dentry, page); 2488 if (IS_ERR(full_path)) { 2489 spin_unlock(&tcon->open_file_lock); 2490 free_dentry_path(page); 2491 return PTR_ERR(full_path); 2492 } 2493 if (strcmp(full_path, name)) 2494 continue; 2495 2496 cinode = CIFS_I(d_inode(cfile->dentry)); 2497 spin_unlock(&tcon->open_file_lock); 2498 free_dentry_path(page); 2499 *ret_file = find_readable_file(cinode, 0); 2500 return *ret_file ? 0 : -ENOENT; 2501 } 2502 2503 spin_unlock(&tcon->open_file_lock); 2504 free_dentry_path(page); 2505 return -ENOENT; 2506 } 2507 2508 void 2509 cifs_writedata_release(struct kref *refcount) 2510 { 2511 struct cifs_writedata *wdata = container_of(refcount, 2512 struct cifs_writedata, refcount); 2513 #ifdef CONFIG_CIFS_SMB_DIRECT 2514 if (wdata->mr) { 2515 smbd_deregister_mr(wdata->mr); 2516 wdata->mr = NULL; 2517 } 2518 #endif 2519 2520 if (wdata->cfile) 2521 cifsFileInfo_put(wdata->cfile); 2522 2523 kfree(wdata); 2524 } 2525 2526 /* 2527 * Write failed with a retryable error. Resend the write request. It's also 2528 * possible that the page was redirtied so re-clean the page. 2529 */ 2530 static void 2531 cifs_writev_requeue(struct cifs_writedata *wdata) 2532 { 2533 int rc = 0; 2534 struct inode *inode = d_inode(wdata->cfile->dentry); 2535 struct TCP_Server_Info *server; 2536 unsigned int rest_len = wdata->bytes; 2537 loff_t fpos = wdata->offset; 2538 2539 server = tlink_tcon(wdata->cfile->tlink)->ses->server; 2540 do { 2541 struct cifs_writedata *wdata2; 2542 unsigned int wsize, cur_len; 2543 2544 wsize = server->ops->wp_retry_size(inode); 2545 if (wsize < rest_len) { 2546 if (wsize < PAGE_SIZE) { 2547 rc = -EOPNOTSUPP; 2548 break; 2549 } 2550 cur_len = min(round_down(wsize, PAGE_SIZE), rest_len); 2551 } else { 2552 cur_len = rest_len; 2553 } 2554 2555 wdata2 = cifs_writedata_alloc(cifs_writev_complete); 2556 if (!wdata2) { 2557 rc = -ENOMEM; 2558 break; 2559 } 2560 2561 wdata2->sync_mode = wdata->sync_mode; 2562 wdata2->offset = fpos; 2563 wdata2->bytes = cur_len; 2564 wdata2->iter = wdata->iter; 2565 2566 iov_iter_advance(&wdata2->iter, fpos - wdata->offset); 2567 iov_iter_truncate(&wdata2->iter, wdata2->bytes); 2568 2569 if (iov_iter_is_xarray(&wdata2->iter)) 2570 /* Check for pages having been redirtied and clean 2571 * them. We can do this by walking the xarray. If 2572 * it's not an xarray, then it's a DIO and we shouldn't 2573 * be mucking around with the page bits. 2574 */ 2575 cifs_undirty_folios(inode, fpos, cur_len); 2576 2577 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, 2578 &wdata2->cfile); 2579 if (!wdata2->cfile) { 2580 cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n", 2581 rc); 2582 if (!is_retryable_error(rc)) 2583 rc = -EBADF; 2584 } else { 2585 wdata2->pid = wdata2->cfile->pid; 2586 rc = server->ops->async_writev(wdata2, 2587 cifs_writedata_release); 2588 } 2589 2590 kref_put(&wdata2->refcount, cifs_writedata_release); 2591 if (rc) { 2592 if (is_retryable_error(rc)) 2593 continue; 2594 fpos += cur_len; 2595 rest_len -= cur_len; 2596 break; 2597 } 2598 2599 fpos += cur_len; 2600 rest_len -= cur_len; 2601 } while (rest_len > 0); 2602 2603 /* Clean up remaining pages from the original wdata */ 2604 if (iov_iter_is_xarray(&wdata->iter)) 2605 cifs_pages_write_failed(inode, fpos, rest_len); 2606 2607 if (rc != 0 && !is_retryable_error(rc)) 2608 mapping_set_error(inode->i_mapping, rc); 2609 kref_put(&wdata->refcount, cifs_writedata_release); 2610 } 2611 2612 void 2613 cifs_writev_complete(struct work_struct *work) 2614 { 2615 struct cifs_writedata *wdata = container_of(work, 2616 struct cifs_writedata, work); 2617 struct inode *inode = d_inode(wdata->cfile->dentry); 2618 2619 if (wdata->result == 0) { 2620 spin_lock(&inode->i_lock); 2621 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes); 2622 spin_unlock(&inode->i_lock); 2623 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink), 2624 wdata->bytes); 2625 } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN) 2626 return cifs_writev_requeue(wdata); 2627 2628 if (wdata->result == -EAGAIN) 2629 cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes); 2630 else if (wdata->result < 0) 2631 cifs_pages_write_failed(inode, wdata->offset, wdata->bytes); 2632 else 2633 cifs_pages_written_back(inode, wdata->offset, wdata->bytes); 2634 2635 if (wdata->result != -EAGAIN) 2636 mapping_set_error(inode->i_mapping, wdata->result); 2637 kref_put(&wdata->refcount, cifs_writedata_release); 2638 } 2639 2640 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete) 2641 { 2642 struct cifs_writedata *wdata; 2643 2644 wdata = kzalloc(sizeof(*wdata), GFP_NOFS); 2645 if (wdata != NULL) { 2646 kref_init(&wdata->refcount); 2647 INIT_LIST_HEAD(&wdata->list); 2648 init_completion(&wdata->done); 2649 INIT_WORK(&wdata->work, complete); 2650 } 2651 return wdata; 2652 } 2653 2654 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) 2655 { 2656 struct address_space *mapping = page->mapping; 2657 loff_t offset = (loff_t)page->index << PAGE_SHIFT; 2658 char *write_data; 2659 int rc = -EFAULT; 2660 int bytes_written = 0; 2661 struct inode *inode; 2662 struct cifsFileInfo *open_file; 2663 2664 if (!mapping || !mapping->host) 2665 return -EFAULT; 2666 2667 inode = page->mapping->host; 2668 2669 offset += (loff_t)from; 2670 write_data = kmap(page); 2671 write_data += from; 2672 2673 if ((to > PAGE_SIZE) || (from > to)) { 2674 kunmap(page); 2675 return -EIO; 2676 } 2677 2678 /* racing with truncate? */ 2679 if (offset > mapping->host->i_size) { 2680 kunmap(page); 2681 return 0; /* don't care */ 2682 } 2683 2684 /* check to make sure that we are not extending the file */ 2685 if (mapping->host->i_size - offset < (loff_t)to) 2686 to = (unsigned)(mapping->host->i_size - offset); 2687 2688 rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY, 2689 &open_file); 2690 if (!rc) { 2691 bytes_written = cifs_write(open_file, open_file->pid, 2692 write_data, to - from, &offset); 2693 cifsFileInfo_put(open_file); 2694 /* Does mm or vfs already set times? */ 2695 simple_inode_init_ts(inode); 2696 if ((bytes_written > 0) && (offset)) 2697 rc = 0; 2698 else if (bytes_written < 0) 2699 rc = bytes_written; 2700 else 2701 rc = -EFAULT; 2702 } else { 2703 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc); 2704 if (!is_retryable_error(rc)) 2705 rc = -EIO; 2706 } 2707 2708 kunmap(page); 2709 return rc; 2710 } 2711 2712 /* 2713 * Extend the region to be written back to include subsequent contiguously 2714 * dirty pages if possible, but don't sleep while doing so. 2715 */ 2716 static void cifs_extend_writeback(struct address_space *mapping, 2717 struct xa_state *xas, 2718 long *_count, 2719 loff_t start, 2720 int max_pages, 2721 loff_t max_len, 2722 size_t *_len) 2723 { 2724 struct folio_batch batch; 2725 struct folio *folio; 2726 unsigned int nr_pages; 2727 pgoff_t index = (start + *_len) / PAGE_SIZE; 2728 size_t len; 2729 bool stop = true; 2730 unsigned int i; 2731 2732 folio_batch_init(&batch); 2733 2734 do { 2735 /* Firstly, we gather up a batch of contiguous dirty pages 2736 * under the RCU read lock - but we can't clear the dirty flags 2737 * there if any of those pages are mapped. 2738 */ 2739 rcu_read_lock(); 2740 2741 xas_for_each(xas, folio, ULONG_MAX) { 2742 stop = true; 2743 if (xas_retry(xas, folio)) 2744 continue; 2745 if (xa_is_value(folio)) 2746 break; 2747 if (folio->index != index) { 2748 xas_reset(xas); 2749 break; 2750 } 2751 2752 if (!folio_try_get(folio)) { 2753 xas_reset(xas); 2754 continue; 2755 } 2756 nr_pages = folio_nr_pages(folio); 2757 if (nr_pages > max_pages) { 2758 xas_reset(xas); 2759 break; 2760 } 2761 2762 /* Has the page moved or been split? */ 2763 if (unlikely(folio != xas_reload(xas))) { 2764 folio_put(folio); 2765 xas_reset(xas); 2766 break; 2767 } 2768 2769 if (!folio_trylock(folio)) { 2770 folio_put(folio); 2771 xas_reset(xas); 2772 break; 2773 } 2774 if (!folio_test_dirty(folio) || 2775 folio_test_writeback(folio)) { 2776 folio_unlock(folio); 2777 folio_put(folio); 2778 xas_reset(xas); 2779 break; 2780 } 2781 2782 max_pages -= nr_pages; 2783 len = folio_size(folio); 2784 stop = false; 2785 2786 index += nr_pages; 2787 *_count -= nr_pages; 2788 *_len += len; 2789 if (max_pages <= 0 || *_len >= max_len || *_count <= 0) 2790 stop = true; 2791 2792 if (!folio_batch_add(&batch, folio)) 2793 break; 2794 if (stop) 2795 break; 2796 } 2797 2798 xas_pause(xas); 2799 rcu_read_unlock(); 2800 2801 /* Now, if we obtained any pages, we can shift them to being 2802 * writable and mark them for caching. 2803 */ 2804 if (!folio_batch_count(&batch)) 2805 break; 2806 2807 for (i = 0; i < folio_batch_count(&batch); i++) { 2808 folio = batch.folios[i]; 2809 /* The folio should be locked, dirty and not undergoing 2810 * writeback from the loop above. 2811 */ 2812 if (!folio_clear_dirty_for_io(folio)) 2813 WARN_ON(1); 2814 folio_start_writeback(folio); 2815 folio_unlock(folio); 2816 } 2817 2818 folio_batch_release(&batch); 2819 cond_resched(); 2820 } while (!stop); 2821 } 2822 2823 /* 2824 * Write back the locked page and any subsequent non-locked dirty pages. 2825 */ 2826 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping, 2827 struct writeback_control *wbc, 2828 struct xa_state *xas, 2829 struct folio *folio, 2830 unsigned long long start, 2831 unsigned long long end) 2832 { 2833 struct inode *inode = mapping->host; 2834 struct TCP_Server_Info *server; 2835 struct cifs_writedata *wdata; 2836 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 2837 struct cifs_credits credits_on_stack; 2838 struct cifs_credits *credits = &credits_on_stack; 2839 struct cifsFileInfo *cfile = NULL; 2840 unsigned long long i_size = i_size_read(inode), max_len; 2841 unsigned int xid, wsize; 2842 size_t len = folio_size(folio); 2843 long count = wbc->nr_to_write; 2844 int rc; 2845 2846 /* The folio should be locked, dirty and not undergoing writeback. */ 2847 if (!folio_clear_dirty_for_io(folio)) 2848 WARN_ON_ONCE(1); 2849 folio_start_writeback(folio); 2850 2851 count -= folio_nr_pages(folio); 2852 2853 xid = get_xid(); 2854 server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses); 2855 2856 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile); 2857 if (rc) { 2858 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc); 2859 goto err_xid; 2860 } 2861 2862 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, 2863 &wsize, credits); 2864 if (rc != 0) 2865 goto err_close; 2866 2867 wdata = cifs_writedata_alloc(cifs_writev_complete); 2868 if (!wdata) { 2869 rc = -ENOMEM; 2870 goto err_uncredit; 2871 } 2872 2873 wdata->sync_mode = wbc->sync_mode; 2874 wdata->offset = folio_pos(folio); 2875 wdata->pid = cfile->pid; 2876 wdata->credits = credits_on_stack; 2877 wdata->cfile = cfile; 2878 wdata->server = server; 2879 cfile = NULL; 2880 2881 /* Find all consecutive lockable dirty pages that have contiguous 2882 * written regions, stopping when we find a page that is not 2883 * immediately lockable, is not dirty or is missing, or we reach the 2884 * end of the range. 2885 */ 2886 if (start < i_size) { 2887 /* Trim the write to the EOF; the extra data is ignored. Also 2888 * put an upper limit on the size of a single storedata op. 2889 */ 2890 max_len = wsize; 2891 max_len = min_t(unsigned long long, max_len, end - start + 1); 2892 max_len = min_t(unsigned long long, max_len, i_size - start); 2893 2894 if (len < max_len) { 2895 int max_pages = INT_MAX; 2896 2897 #ifdef CONFIG_CIFS_SMB_DIRECT 2898 if (server->smbd_conn) 2899 max_pages = server->smbd_conn->max_frmr_depth; 2900 #endif 2901 max_pages -= folio_nr_pages(folio); 2902 2903 if (max_pages > 0) 2904 cifs_extend_writeback(mapping, xas, &count, start, 2905 max_pages, max_len, &len); 2906 } 2907 } 2908 len = min_t(unsigned long long, len, i_size - start); 2909 2910 /* We now have a contiguous set of dirty pages, each with writeback 2911 * set; the first page is still locked at this point, but all the rest 2912 * have been unlocked. 2913 */ 2914 folio_unlock(folio); 2915 wdata->bytes = len; 2916 2917 if (start < i_size) { 2918 iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages, 2919 start, len); 2920 2921 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes); 2922 if (rc) 2923 goto err_wdata; 2924 2925 if (wdata->cfile->invalidHandle) 2926 rc = -EAGAIN; 2927 else 2928 rc = wdata->server->ops->async_writev(wdata, 2929 cifs_writedata_release); 2930 if (rc >= 0) { 2931 kref_put(&wdata->refcount, cifs_writedata_release); 2932 goto err_close; 2933 } 2934 } else { 2935 /* The dirty region was entirely beyond the EOF. */ 2936 cifs_pages_written_back(inode, start, len); 2937 rc = 0; 2938 } 2939 2940 err_wdata: 2941 kref_put(&wdata->refcount, cifs_writedata_release); 2942 err_uncredit: 2943 add_credits_and_wake_if(server, credits, 0); 2944 err_close: 2945 if (cfile) 2946 cifsFileInfo_put(cfile); 2947 err_xid: 2948 free_xid(xid); 2949 if (rc == 0) { 2950 wbc->nr_to_write = count; 2951 rc = len; 2952 } else if (is_retryable_error(rc)) { 2953 cifs_pages_write_redirty(inode, start, len); 2954 } else { 2955 cifs_pages_write_failed(inode, start, len); 2956 mapping_set_error(mapping, rc); 2957 } 2958 /* Indication to update ctime and mtime as close is deferred */ 2959 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 2960 return rc; 2961 } 2962 2963 /* 2964 * write a region of pages back to the server 2965 */ 2966 static ssize_t cifs_writepages_begin(struct address_space *mapping, 2967 struct writeback_control *wbc, 2968 struct xa_state *xas, 2969 unsigned long long *_start, 2970 unsigned long long end) 2971 { 2972 struct folio *folio; 2973 unsigned long long start = *_start; 2974 ssize_t ret; 2975 int skips = 0; 2976 2977 search_again: 2978 /* Find the first dirty page. */ 2979 rcu_read_lock(); 2980 2981 for (;;) { 2982 folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY); 2983 if (xas_retry(xas, folio) || xa_is_value(folio)) 2984 continue; 2985 if (!folio) 2986 break; 2987 2988 if (!folio_try_get(folio)) { 2989 xas_reset(xas); 2990 continue; 2991 } 2992 2993 if (unlikely(folio != xas_reload(xas))) { 2994 folio_put(folio); 2995 xas_reset(xas); 2996 continue; 2997 } 2998 2999 xas_pause(xas); 3000 break; 3001 } 3002 rcu_read_unlock(); 3003 if (!folio) 3004 return 0; 3005 3006 start = folio_pos(folio); /* May regress with THPs */ 3007 3008 /* At this point we hold neither the i_pages lock nor the page lock: 3009 * the page may be truncated or invalidated (changing page->mapping to 3010 * NULL), or even swizzled back from swapper_space to tmpfs file 3011 * mapping 3012 */ 3013 lock_again: 3014 if (wbc->sync_mode != WB_SYNC_NONE) { 3015 ret = folio_lock_killable(folio); 3016 if (ret < 0) 3017 return ret; 3018 } else { 3019 if (!folio_trylock(folio)) 3020 goto search_again; 3021 } 3022 3023 if (folio->mapping != mapping || 3024 !folio_test_dirty(folio)) { 3025 start += folio_size(folio); 3026 folio_unlock(folio); 3027 goto search_again; 3028 } 3029 3030 if (folio_test_writeback(folio) || 3031 folio_test_fscache(folio)) { 3032 folio_unlock(folio); 3033 if (wbc->sync_mode != WB_SYNC_NONE) { 3034 folio_wait_writeback(folio); 3035 #ifdef CONFIG_CIFS_FSCACHE 3036 folio_wait_fscache(folio); 3037 #endif 3038 goto lock_again; 3039 } 3040 3041 start += folio_size(folio); 3042 if (wbc->sync_mode == WB_SYNC_NONE) { 3043 if (skips >= 5 || need_resched()) { 3044 ret = 0; 3045 goto out; 3046 } 3047 skips++; 3048 } 3049 goto search_again; 3050 } 3051 3052 ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end); 3053 out: 3054 if (ret > 0) 3055 *_start = start + ret; 3056 return ret; 3057 } 3058 3059 /* 3060 * Write a region of pages back to the server 3061 */ 3062 static int cifs_writepages_region(struct address_space *mapping, 3063 struct writeback_control *wbc, 3064 unsigned long long *_start, 3065 unsigned long long end) 3066 { 3067 ssize_t ret; 3068 3069 XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE); 3070 3071 do { 3072 ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end); 3073 if (ret > 0 && wbc->nr_to_write > 0) 3074 cond_resched(); 3075 } while (ret > 0 && wbc->nr_to_write > 0); 3076 3077 return ret > 0 ? 0 : ret; 3078 } 3079 3080 /* 3081 * Write some of the pending data back to the server 3082 */ 3083 static int cifs_writepages(struct address_space *mapping, 3084 struct writeback_control *wbc) 3085 { 3086 loff_t start, end; 3087 int ret; 3088 3089 /* We have to be careful as we can end up racing with setattr() 3090 * truncating the pagecache since the caller doesn't take a lock here 3091 * to prevent it. 3092 */ 3093 3094 if (wbc->range_cyclic && mapping->writeback_index) { 3095 start = mapping->writeback_index * PAGE_SIZE; 3096 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX); 3097 if (ret < 0) 3098 goto out; 3099 3100 if (wbc->nr_to_write <= 0) { 3101 mapping->writeback_index = start / PAGE_SIZE; 3102 goto out; 3103 } 3104 3105 start = 0; 3106 end = mapping->writeback_index * PAGE_SIZE; 3107 mapping->writeback_index = 0; 3108 ret = cifs_writepages_region(mapping, wbc, &start, end); 3109 if (ret == 0) 3110 mapping->writeback_index = start / PAGE_SIZE; 3111 } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { 3112 start = 0; 3113 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX); 3114 if (wbc->nr_to_write > 0 && ret == 0) 3115 mapping->writeback_index = start / PAGE_SIZE; 3116 } else { 3117 start = wbc->range_start; 3118 ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end); 3119 } 3120 3121 out: 3122 return ret; 3123 } 3124 3125 static int 3126 cifs_writepage_locked(struct page *page, struct writeback_control *wbc) 3127 { 3128 int rc; 3129 unsigned int xid; 3130 3131 xid = get_xid(); 3132 /* BB add check for wbc flags */ 3133 get_page(page); 3134 if (!PageUptodate(page)) 3135 cifs_dbg(FYI, "ppw - page not up to date\n"); 3136 3137 /* 3138 * Set the "writeback" flag, and clear "dirty" in the radix tree. 3139 * 3140 * A writepage() implementation always needs to do either this, 3141 * or re-dirty the page with "redirty_page_for_writepage()" in 3142 * the case of a failure. 3143 * 3144 * Just unlocking the page will cause the radix tree tag-bits 3145 * to fail to update with the state of the page correctly. 3146 */ 3147 set_page_writeback(page); 3148 retry_write: 3149 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE); 3150 if (is_retryable_error(rc)) { 3151 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) 3152 goto retry_write; 3153 redirty_page_for_writepage(wbc, page); 3154 } else if (rc != 0) { 3155 SetPageError(page); 3156 mapping_set_error(page->mapping, rc); 3157 } else { 3158 SetPageUptodate(page); 3159 } 3160 end_page_writeback(page); 3161 put_page(page); 3162 free_xid(xid); 3163 return rc; 3164 } 3165 3166 static int cifs_write_end(struct file *file, struct address_space *mapping, 3167 loff_t pos, unsigned len, unsigned copied, 3168 struct page *page, void *fsdata) 3169 { 3170 int rc; 3171 struct inode *inode = mapping->host; 3172 struct cifsFileInfo *cfile = file->private_data; 3173 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 3174 struct folio *folio = page_folio(page); 3175 __u32 pid; 3176 3177 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3178 pid = cfile->pid; 3179 else 3180 pid = current->tgid; 3181 3182 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n", 3183 page, pos, copied); 3184 3185 if (folio_test_checked(folio)) { 3186 if (copied == len) 3187 folio_mark_uptodate(folio); 3188 folio_clear_checked(folio); 3189 } else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE) 3190 folio_mark_uptodate(folio); 3191 3192 if (!folio_test_uptodate(folio)) { 3193 char *page_data; 3194 unsigned offset = pos & (PAGE_SIZE - 1); 3195 unsigned int xid; 3196 3197 xid = get_xid(); 3198 /* this is probably better than directly calling 3199 partialpage_write since in this function the file handle is 3200 known which we might as well leverage */ 3201 /* BB check if anything else missing out of ppw 3202 such as updating last write time */ 3203 page_data = kmap(page); 3204 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos); 3205 /* if (rc < 0) should we set writebehind rc? */ 3206 kunmap(page); 3207 3208 free_xid(xid); 3209 } else { 3210 rc = copied; 3211 pos += copied; 3212 set_page_dirty(page); 3213 } 3214 3215 if (rc > 0) { 3216 spin_lock(&inode->i_lock); 3217 if (pos > inode->i_size) { 3218 loff_t additional_blocks = (512 - 1 + copied) >> 9; 3219 3220 i_size_write(inode, pos); 3221 /* 3222 * Estimate new allocation size based on the amount written. 3223 * This will be updated from server on close (and on queryinfo) 3224 */ 3225 inode->i_blocks = min_t(blkcnt_t, (512 - 1 + pos) >> 9, 3226 inode->i_blocks + additional_blocks); 3227 } 3228 spin_unlock(&inode->i_lock); 3229 } 3230 3231 unlock_page(page); 3232 put_page(page); 3233 /* Indication to update ctime and mtime as close is deferred */ 3234 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 3235 3236 return rc; 3237 } 3238 3239 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, 3240 int datasync) 3241 { 3242 unsigned int xid; 3243 int rc = 0; 3244 struct cifs_tcon *tcon; 3245 struct TCP_Server_Info *server; 3246 struct cifsFileInfo *smbfile = file->private_data; 3247 struct inode *inode = file_inode(file); 3248 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 3249 3250 rc = file_write_and_wait_range(file, start, end); 3251 if (rc) { 3252 trace_cifs_fsync_err(inode->i_ino, rc); 3253 return rc; 3254 } 3255 3256 xid = get_xid(); 3257 3258 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", 3259 file, datasync); 3260 3261 if (!CIFS_CACHE_READ(CIFS_I(inode))) { 3262 rc = cifs_zap_mapping(inode); 3263 if (rc) { 3264 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc); 3265 rc = 0; /* don't care about it in fsync */ 3266 } 3267 } 3268 3269 tcon = tlink_tcon(smbfile->tlink); 3270 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { 3271 server = tcon->ses->server; 3272 if (server->ops->flush == NULL) { 3273 rc = -ENOSYS; 3274 goto strict_fsync_exit; 3275 } 3276 3277 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { 3278 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); 3279 if (smbfile) { 3280 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3281 cifsFileInfo_put(smbfile); 3282 } else 3283 cifs_dbg(FYI, "ignore fsync for file not open for write\n"); 3284 } else 3285 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3286 } 3287 3288 strict_fsync_exit: 3289 free_xid(xid); 3290 return rc; 3291 } 3292 3293 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) 3294 { 3295 unsigned int xid; 3296 int rc = 0; 3297 struct cifs_tcon *tcon; 3298 struct TCP_Server_Info *server; 3299 struct cifsFileInfo *smbfile = file->private_data; 3300 struct inode *inode = file_inode(file); 3301 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); 3302 3303 rc = file_write_and_wait_range(file, start, end); 3304 if (rc) { 3305 trace_cifs_fsync_err(file_inode(file)->i_ino, rc); 3306 return rc; 3307 } 3308 3309 xid = get_xid(); 3310 3311 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", 3312 file, datasync); 3313 3314 tcon = tlink_tcon(smbfile->tlink); 3315 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { 3316 server = tcon->ses->server; 3317 if (server->ops->flush == NULL) { 3318 rc = -ENOSYS; 3319 goto fsync_exit; 3320 } 3321 3322 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { 3323 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); 3324 if (smbfile) { 3325 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3326 cifsFileInfo_put(smbfile); 3327 } else 3328 cifs_dbg(FYI, "ignore fsync for file not open for write\n"); 3329 } else 3330 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3331 } 3332 3333 fsync_exit: 3334 free_xid(xid); 3335 return rc; 3336 } 3337 3338 /* 3339 * As file closes, flush all cached write data for this inode checking 3340 * for write behind errors. 3341 */ 3342 int cifs_flush(struct file *file, fl_owner_t id) 3343 { 3344 struct inode *inode = file_inode(file); 3345 int rc = 0; 3346 3347 if (file->f_mode & FMODE_WRITE) 3348 rc = filemap_write_and_wait(inode->i_mapping); 3349 3350 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc); 3351 if (rc) { 3352 /* get more nuanced writeback errors */ 3353 rc = filemap_check_wb_err(file->f_mapping, 0); 3354 trace_cifs_flush_err(inode->i_ino, rc); 3355 } 3356 return rc; 3357 } 3358 3359 static void 3360 cifs_uncached_writedata_release(struct kref *refcount) 3361 { 3362 struct cifs_writedata *wdata = container_of(refcount, 3363 struct cifs_writedata, refcount); 3364 3365 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release); 3366 cifs_writedata_release(refcount); 3367 } 3368 3369 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx); 3370 3371 static void 3372 cifs_uncached_writev_complete(struct work_struct *work) 3373 { 3374 struct cifs_writedata *wdata = container_of(work, 3375 struct cifs_writedata, work); 3376 struct inode *inode = d_inode(wdata->cfile->dentry); 3377 struct cifsInodeInfo *cifsi = CIFS_I(inode); 3378 3379 spin_lock(&inode->i_lock); 3380 cifs_update_eof(cifsi, wdata->offset, wdata->bytes); 3381 if (cifsi->server_eof > inode->i_size) 3382 i_size_write(inode, cifsi->server_eof); 3383 spin_unlock(&inode->i_lock); 3384 3385 complete(&wdata->done); 3386 collect_uncached_write_data(wdata->ctx); 3387 /* the below call can possibly free the last ref to aio ctx */ 3388 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3389 } 3390 3391 static int 3392 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list, 3393 struct cifs_aio_ctx *ctx) 3394 { 3395 unsigned int wsize; 3396 struct cifs_credits credits; 3397 int rc; 3398 struct TCP_Server_Info *server = wdata->server; 3399 3400 do { 3401 if (wdata->cfile->invalidHandle) { 3402 rc = cifs_reopen_file(wdata->cfile, false); 3403 if (rc == -EAGAIN) 3404 continue; 3405 else if (rc) 3406 break; 3407 } 3408 3409 3410 /* 3411 * Wait for credits to resend this wdata. 3412 * Note: we are attempting to resend the whole wdata not in 3413 * segments 3414 */ 3415 do { 3416 rc = server->ops->wait_mtu_credits(server, wdata->bytes, 3417 &wsize, &credits); 3418 if (rc) 3419 goto fail; 3420 3421 if (wsize < wdata->bytes) { 3422 add_credits_and_wake_if(server, &credits, 0); 3423 msleep(1000); 3424 } 3425 } while (wsize < wdata->bytes); 3426 wdata->credits = credits; 3427 3428 rc = adjust_credits(server, &wdata->credits, wdata->bytes); 3429 3430 if (!rc) { 3431 if (wdata->cfile->invalidHandle) 3432 rc = -EAGAIN; 3433 else { 3434 wdata->replay = true; 3435 #ifdef CONFIG_CIFS_SMB_DIRECT 3436 if (wdata->mr) { 3437 wdata->mr->need_invalidate = true; 3438 smbd_deregister_mr(wdata->mr); 3439 wdata->mr = NULL; 3440 } 3441 #endif 3442 rc = server->ops->async_writev(wdata, 3443 cifs_uncached_writedata_release); 3444 } 3445 } 3446 3447 /* If the write was successfully sent, we are done */ 3448 if (!rc) { 3449 list_add_tail(&wdata->list, wdata_list); 3450 return 0; 3451 } 3452 3453 /* Roll back credits and retry if needed */ 3454 add_credits_and_wake_if(server, &wdata->credits, 0); 3455 } while (rc == -EAGAIN); 3456 3457 fail: 3458 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3459 return rc; 3460 } 3461 3462 /* 3463 * Select span of a bvec iterator we're going to use. Limit it by both maximum 3464 * size and maximum number of segments. 3465 */ 3466 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size, 3467 size_t max_segs, unsigned int *_nsegs) 3468 { 3469 const struct bio_vec *bvecs = iter->bvec; 3470 unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0; 3471 size_t len, span = 0, n = iter->count; 3472 size_t skip = iter->iov_offset; 3473 3474 if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0) 3475 return 0; 3476 3477 while (n && ix < nbv && skip) { 3478 len = bvecs[ix].bv_len; 3479 if (skip < len) 3480 break; 3481 skip -= len; 3482 n -= len; 3483 ix++; 3484 } 3485 3486 while (n && ix < nbv) { 3487 len = min3(n, bvecs[ix].bv_len - skip, max_size); 3488 span += len; 3489 max_size -= len; 3490 nsegs++; 3491 ix++; 3492 if (max_size == 0 || nsegs >= max_segs) 3493 break; 3494 skip = 0; 3495 n -= len; 3496 } 3497 3498 *_nsegs = nsegs; 3499 return span; 3500 } 3501 3502 static int 3503 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from, 3504 struct cifsFileInfo *open_file, 3505 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list, 3506 struct cifs_aio_ctx *ctx) 3507 { 3508 int rc = 0; 3509 size_t cur_len, max_len; 3510 struct cifs_writedata *wdata; 3511 pid_t pid; 3512 struct TCP_Server_Info *server; 3513 unsigned int xid, max_segs = INT_MAX; 3514 3515 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3516 pid = open_file->pid; 3517 else 3518 pid = current->tgid; 3519 3520 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 3521 xid = get_xid(); 3522 3523 #ifdef CONFIG_CIFS_SMB_DIRECT 3524 if (server->smbd_conn) 3525 max_segs = server->smbd_conn->max_frmr_depth; 3526 #endif 3527 3528 do { 3529 struct cifs_credits credits_on_stack; 3530 struct cifs_credits *credits = &credits_on_stack; 3531 unsigned int wsize, nsegs = 0; 3532 3533 if (signal_pending(current)) { 3534 rc = -EINTR; 3535 break; 3536 } 3537 3538 if (open_file->invalidHandle) { 3539 rc = cifs_reopen_file(open_file, false); 3540 if (rc == -EAGAIN) 3541 continue; 3542 else if (rc) 3543 break; 3544 } 3545 3546 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, 3547 &wsize, credits); 3548 if (rc) 3549 break; 3550 3551 max_len = min_t(const size_t, len, wsize); 3552 if (!max_len) { 3553 rc = -EAGAIN; 3554 add_credits_and_wake_if(server, credits, 0); 3555 break; 3556 } 3557 3558 cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs); 3559 cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n", 3560 cur_len, max_len, nsegs, from->nr_segs, max_segs); 3561 if (cur_len == 0) { 3562 rc = -EIO; 3563 add_credits_and_wake_if(server, credits, 0); 3564 break; 3565 } 3566 3567 wdata = cifs_writedata_alloc(cifs_uncached_writev_complete); 3568 if (!wdata) { 3569 rc = -ENOMEM; 3570 add_credits_and_wake_if(server, credits, 0); 3571 break; 3572 } 3573 3574 wdata->sync_mode = WB_SYNC_ALL; 3575 wdata->offset = (__u64)fpos; 3576 wdata->cfile = cifsFileInfo_get(open_file); 3577 wdata->server = server; 3578 wdata->pid = pid; 3579 wdata->bytes = cur_len; 3580 wdata->credits = credits_on_stack; 3581 wdata->iter = *from; 3582 wdata->ctx = ctx; 3583 kref_get(&ctx->refcount); 3584 3585 iov_iter_truncate(&wdata->iter, cur_len); 3586 3587 rc = adjust_credits(server, &wdata->credits, wdata->bytes); 3588 3589 if (!rc) { 3590 if (wdata->cfile->invalidHandle) 3591 rc = -EAGAIN; 3592 else 3593 rc = server->ops->async_writev(wdata, 3594 cifs_uncached_writedata_release); 3595 } 3596 3597 if (rc) { 3598 add_credits_and_wake_if(server, &wdata->credits, 0); 3599 kref_put(&wdata->refcount, 3600 cifs_uncached_writedata_release); 3601 if (rc == -EAGAIN) 3602 continue; 3603 break; 3604 } 3605 3606 list_add_tail(&wdata->list, wdata_list); 3607 iov_iter_advance(from, cur_len); 3608 fpos += cur_len; 3609 len -= cur_len; 3610 } while (len > 0); 3611 3612 free_xid(xid); 3613 return rc; 3614 } 3615 3616 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx) 3617 { 3618 struct cifs_writedata *wdata, *tmp; 3619 struct cifs_tcon *tcon; 3620 struct cifs_sb_info *cifs_sb; 3621 struct dentry *dentry = ctx->cfile->dentry; 3622 ssize_t rc; 3623 3624 tcon = tlink_tcon(ctx->cfile->tlink); 3625 cifs_sb = CIFS_SB(dentry->d_sb); 3626 3627 mutex_lock(&ctx->aio_mutex); 3628 3629 if (list_empty(&ctx->list)) { 3630 mutex_unlock(&ctx->aio_mutex); 3631 return; 3632 } 3633 3634 rc = ctx->rc; 3635 /* 3636 * Wait for and collect replies for any successful sends in order of 3637 * increasing offset. Once an error is hit, then return without waiting 3638 * for any more replies. 3639 */ 3640 restart_loop: 3641 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) { 3642 if (!rc) { 3643 if (!try_wait_for_completion(&wdata->done)) { 3644 mutex_unlock(&ctx->aio_mutex); 3645 return; 3646 } 3647 3648 if (wdata->result) 3649 rc = wdata->result; 3650 else 3651 ctx->total_len += wdata->bytes; 3652 3653 /* resend call if it's a retryable error */ 3654 if (rc == -EAGAIN) { 3655 struct list_head tmp_list; 3656 struct iov_iter tmp_from = ctx->iter; 3657 3658 INIT_LIST_HEAD(&tmp_list); 3659 list_del_init(&wdata->list); 3660 3661 if (ctx->direct_io) 3662 rc = cifs_resend_wdata( 3663 wdata, &tmp_list, ctx); 3664 else { 3665 iov_iter_advance(&tmp_from, 3666 wdata->offset - ctx->pos); 3667 3668 rc = cifs_write_from_iter(wdata->offset, 3669 wdata->bytes, &tmp_from, 3670 ctx->cfile, cifs_sb, &tmp_list, 3671 ctx); 3672 3673 kref_put(&wdata->refcount, 3674 cifs_uncached_writedata_release); 3675 } 3676 3677 list_splice(&tmp_list, &ctx->list); 3678 goto restart_loop; 3679 } 3680 } 3681 list_del_init(&wdata->list); 3682 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3683 } 3684 3685 cifs_stats_bytes_written(tcon, ctx->total_len); 3686 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags); 3687 3688 ctx->rc = (rc == 0) ? ctx->total_len : rc; 3689 3690 mutex_unlock(&ctx->aio_mutex); 3691 3692 if (ctx->iocb && ctx->iocb->ki_complete) 3693 ctx->iocb->ki_complete(ctx->iocb, ctx->rc); 3694 else 3695 complete(&ctx->done); 3696 } 3697 3698 static ssize_t __cifs_writev( 3699 struct kiocb *iocb, struct iov_iter *from, bool direct) 3700 { 3701 struct file *file = iocb->ki_filp; 3702 ssize_t total_written = 0; 3703 struct cifsFileInfo *cfile; 3704 struct cifs_tcon *tcon; 3705 struct cifs_sb_info *cifs_sb; 3706 struct cifs_aio_ctx *ctx; 3707 int rc; 3708 3709 rc = generic_write_checks(iocb, from); 3710 if (rc <= 0) 3711 return rc; 3712 3713 cifs_sb = CIFS_FILE_SB(file); 3714 cfile = file->private_data; 3715 tcon = tlink_tcon(cfile->tlink); 3716 3717 if (!tcon->ses->server->ops->async_writev) 3718 return -ENOSYS; 3719 3720 ctx = cifs_aio_ctx_alloc(); 3721 if (!ctx) 3722 return -ENOMEM; 3723 3724 ctx->cfile = cifsFileInfo_get(cfile); 3725 3726 if (!is_sync_kiocb(iocb)) 3727 ctx->iocb = iocb; 3728 3729 ctx->pos = iocb->ki_pos; 3730 ctx->direct_io = direct; 3731 ctx->nr_pinned_pages = 0; 3732 3733 if (user_backed_iter(from)) { 3734 /* 3735 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as 3736 * they contain references to the calling process's virtual 3737 * memory layout which won't be available in an async worker 3738 * thread. This also takes a pin on every folio involved. 3739 */ 3740 rc = netfs_extract_user_iter(from, iov_iter_count(from), 3741 &ctx->iter, 0); 3742 if (rc < 0) { 3743 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3744 return rc; 3745 } 3746 3747 ctx->nr_pinned_pages = rc; 3748 ctx->bv = (void *)ctx->iter.bvec; 3749 ctx->bv_need_unpin = iov_iter_extract_will_pin(from); 3750 } else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) && 3751 !is_sync_kiocb(iocb)) { 3752 /* 3753 * If the op is asynchronous, we need to copy the list attached 3754 * to a BVEC/KVEC-type iterator, but we assume that the storage 3755 * will be pinned by the caller; in any case, we may or may not 3756 * be able to pin the pages, so we don't try. 3757 */ 3758 ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL); 3759 if (!ctx->bv) { 3760 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3761 return -ENOMEM; 3762 } 3763 } else { 3764 /* 3765 * Otherwise, we just pass the iterator down as-is and rely on 3766 * the caller to make sure the pages referred to by the 3767 * iterator don't evaporate. 3768 */ 3769 ctx->iter = *from; 3770 } 3771 3772 ctx->len = iov_iter_count(&ctx->iter); 3773 3774 /* grab a lock here due to read response handlers can access ctx */ 3775 mutex_lock(&ctx->aio_mutex); 3776 3777 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter, 3778 cfile, cifs_sb, &ctx->list, ctx); 3779 3780 /* 3781 * If at least one write was successfully sent, then discard any rc 3782 * value from the later writes. If the other write succeeds, then 3783 * we'll end up returning whatever was written. If it fails, then 3784 * we'll get a new rc value from that. 3785 */ 3786 if (!list_empty(&ctx->list)) 3787 rc = 0; 3788 3789 mutex_unlock(&ctx->aio_mutex); 3790 3791 if (rc) { 3792 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3793 return rc; 3794 } 3795 3796 if (!is_sync_kiocb(iocb)) { 3797 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3798 return -EIOCBQUEUED; 3799 } 3800 3801 rc = wait_for_completion_killable(&ctx->done); 3802 if (rc) { 3803 mutex_lock(&ctx->aio_mutex); 3804 ctx->rc = rc = -EINTR; 3805 total_written = ctx->total_len; 3806 mutex_unlock(&ctx->aio_mutex); 3807 } else { 3808 rc = ctx->rc; 3809 total_written = ctx->total_len; 3810 } 3811 3812 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3813 3814 if (unlikely(!total_written)) 3815 return rc; 3816 3817 iocb->ki_pos += total_written; 3818 return total_written; 3819 } 3820 3821 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from) 3822 { 3823 struct file *file = iocb->ki_filp; 3824 3825 cifs_revalidate_mapping(file->f_inode); 3826 return __cifs_writev(iocb, from, true); 3827 } 3828 3829 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) 3830 { 3831 return __cifs_writev(iocb, from, false); 3832 } 3833 3834 static ssize_t 3835 cifs_writev(struct kiocb *iocb, struct iov_iter *from) 3836 { 3837 struct file *file = iocb->ki_filp; 3838 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 3839 struct inode *inode = file->f_mapping->host; 3840 struct cifsInodeInfo *cinode = CIFS_I(inode); 3841 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 3842 ssize_t rc; 3843 3844 inode_lock(inode); 3845 /* 3846 * We need to hold the sem to be sure nobody modifies lock list 3847 * with a brlock that prevents writing. 3848 */ 3849 down_read(&cinode->lock_sem); 3850 3851 rc = generic_write_checks(iocb, from); 3852 if (rc <= 0) 3853 goto out; 3854 3855 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), 3856 server->vals->exclusive_lock_type, 0, 3857 NULL, CIFS_WRITE_OP)) 3858 rc = __generic_file_write_iter(iocb, from); 3859 else 3860 rc = -EACCES; 3861 out: 3862 up_read(&cinode->lock_sem); 3863 inode_unlock(inode); 3864 3865 if (rc > 0) 3866 rc = generic_write_sync(iocb, rc); 3867 return rc; 3868 } 3869 3870 ssize_t 3871 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from) 3872 { 3873 struct inode *inode = file_inode(iocb->ki_filp); 3874 struct cifsInodeInfo *cinode = CIFS_I(inode); 3875 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 3876 struct cifsFileInfo *cfile = (struct cifsFileInfo *) 3877 iocb->ki_filp->private_data; 3878 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 3879 ssize_t written; 3880 3881 written = cifs_get_writer(cinode); 3882 if (written) 3883 return written; 3884 3885 if (CIFS_CACHE_WRITE(cinode)) { 3886 if (cap_unix(tcon->ses) && 3887 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) 3888 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { 3889 written = generic_file_write_iter(iocb, from); 3890 goto out; 3891 } 3892 written = cifs_writev(iocb, from); 3893 goto out; 3894 } 3895 /* 3896 * For non-oplocked files in strict cache mode we need to write the data 3897 * to the server exactly from the pos to pos+len-1 rather than flush all 3898 * affected pages because it may cause a error with mandatory locks on 3899 * these pages but not on the region from pos to ppos+len-1. 3900 */ 3901 written = cifs_user_writev(iocb, from); 3902 if (CIFS_CACHE_READ(cinode)) { 3903 /* 3904 * We have read level caching and we have just sent a write 3905 * request to the server thus making data in the cache stale. 3906 * Zap the cache and set oplock/lease level to NONE to avoid 3907 * reading stale data from the cache. All subsequent read 3908 * operations will read new data from the server. 3909 */ 3910 cifs_zap_mapping(inode); 3911 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n", 3912 inode); 3913 cinode->oplock = 0; 3914 } 3915 out: 3916 cifs_put_writer(cinode); 3917 return written; 3918 } 3919 3920 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete) 3921 { 3922 struct cifs_readdata *rdata; 3923 3924 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL); 3925 if (rdata) { 3926 kref_init(&rdata->refcount); 3927 INIT_LIST_HEAD(&rdata->list); 3928 init_completion(&rdata->done); 3929 INIT_WORK(&rdata->work, complete); 3930 } 3931 3932 return rdata; 3933 } 3934 3935 void 3936 cifs_readdata_release(struct kref *refcount) 3937 { 3938 struct cifs_readdata *rdata = container_of(refcount, 3939 struct cifs_readdata, refcount); 3940 3941 if (rdata->ctx) 3942 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release); 3943 #ifdef CONFIG_CIFS_SMB_DIRECT 3944 if (rdata->mr) { 3945 smbd_deregister_mr(rdata->mr); 3946 rdata->mr = NULL; 3947 } 3948 #endif 3949 if (rdata->cfile) 3950 cifsFileInfo_put(rdata->cfile); 3951 3952 kfree(rdata); 3953 } 3954 3955 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx); 3956 3957 static void 3958 cifs_uncached_readv_complete(struct work_struct *work) 3959 { 3960 struct cifs_readdata *rdata = container_of(work, 3961 struct cifs_readdata, work); 3962 3963 complete(&rdata->done); 3964 collect_uncached_read_data(rdata->ctx); 3965 /* the below call can possibly free the last ref to aio ctx */ 3966 kref_put(&rdata->refcount, cifs_readdata_release); 3967 } 3968 3969 static int cifs_resend_rdata(struct cifs_readdata *rdata, 3970 struct list_head *rdata_list, 3971 struct cifs_aio_ctx *ctx) 3972 { 3973 unsigned int rsize; 3974 struct cifs_credits credits; 3975 int rc; 3976 struct TCP_Server_Info *server; 3977 3978 /* XXX: should we pick a new channel here? */ 3979 server = rdata->server; 3980 3981 do { 3982 if (rdata->cfile->invalidHandle) { 3983 rc = cifs_reopen_file(rdata->cfile, true); 3984 if (rc == -EAGAIN) 3985 continue; 3986 else if (rc) 3987 break; 3988 } 3989 3990 /* 3991 * Wait for credits to resend this rdata. 3992 * Note: we are attempting to resend the whole rdata not in 3993 * segments 3994 */ 3995 do { 3996 rc = server->ops->wait_mtu_credits(server, rdata->bytes, 3997 &rsize, &credits); 3998 3999 if (rc) 4000 goto fail; 4001 4002 if (rsize < rdata->bytes) { 4003 add_credits_and_wake_if(server, &credits, 0); 4004 msleep(1000); 4005 } 4006 } while (rsize < rdata->bytes); 4007 rdata->credits = credits; 4008 4009 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 4010 if (!rc) { 4011 if (rdata->cfile->invalidHandle) 4012 rc = -EAGAIN; 4013 else { 4014 #ifdef CONFIG_CIFS_SMB_DIRECT 4015 if (rdata->mr) { 4016 rdata->mr->need_invalidate = true; 4017 smbd_deregister_mr(rdata->mr); 4018 rdata->mr = NULL; 4019 } 4020 #endif 4021 rc = server->ops->async_readv(rdata); 4022 } 4023 } 4024 4025 /* If the read was successfully sent, we are done */ 4026 if (!rc) { 4027 /* Add to aio pending list */ 4028 list_add_tail(&rdata->list, rdata_list); 4029 return 0; 4030 } 4031 4032 /* Roll back credits and retry if needed */ 4033 add_credits_and_wake_if(server, &rdata->credits, 0); 4034 } while (rc == -EAGAIN); 4035 4036 fail: 4037 kref_put(&rdata->refcount, cifs_readdata_release); 4038 return rc; 4039 } 4040 4041 static int 4042 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file, 4043 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list, 4044 struct cifs_aio_ctx *ctx) 4045 { 4046 struct cifs_readdata *rdata; 4047 unsigned int rsize, nsegs, max_segs = INT_MAX; 4048 struct cifs_credits credits_on_stack; 4049 struct cifs_credits *credits = &credits_on_stack; 4050 size_t cur_len, max_len; 4051 int rc; 4052 pid_t pid; 4053 struct TCP_Server_Info *server; 4054 4055 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 4056 4057 #ifdef CONFIG_CIFS_SMB_DIRECT 4058 if (server->smbd_conn) 4059 max_segs = server->smbd_conn->max_frmr_depth; 4060 #endif 4061 4062 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4063 pid = open_file->pid; 4064 else 4065 pid = current->tgid; 4066 4067 do { 4068 if (open_file->invalidHandle) { 4069 rc = cifs_reopen_file(open_file, true); 4070 if (rc == -EAGAIN) 4071 continue; 4072 else if (rc) 4073 break; 4074 } 4075 4076 if (cifs_sb->ctx->rsize == 0) 4077 cifs_sb->ctx->rsize = 4078 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), 4079 cifs_sb->ctx); 4080 4081 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, 4082 &rsize, credits); 4083 if (rc) 4084 break; 4085 4086 max_len = min_t(size_t, len, rsize); 4087 4088 cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len, 4089 max_segs, &nsegs); 4090 cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n", 4091 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs); 4092 if (cur_len == 0) { 4093 rc = -EIO; 4094 add_credits_and_wake_if(server, credits, 0); 4095 break; 4096 } 4097 4098 rdata = cifs_readdata_alloc(cifs_uncached_readv_complete); 4099 if (!rdata) { 4100 add_credits_and_wake_if(server, credits, 0); 4101 rc = -ENOMEM; 4102 break; 4103 } 4104 4105 rdata->server = server; 4106 rdata->cfile = cifsFileInfo_get(open_file); 4107 rdata->offset = fpos; 4108 rdata->bytes = cur_len; 4109 rdata->pid = pid; 4110 rdata->credits = credits_on_stack; 4111 rdata->ctx = ctx; 4112 kref_get(&ctx->refcount); 4113 4114 rdata->iter = ctx->iter; 4115 iov_iter_truncate(&rdata->iter, cur_len); 4116 4117 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 4118 4119 if (!rc) { 4120 if (rdata->cfile->invalidHandle) 4121 rc = -EAGAIN; 4122 else 4123 rc = server->ops->async_readv(rdata); 4124 } 4125 4126 if (rc) { 4127 add_credits_and_wake_if(server, &rdata->credits, 0); 4128 kref_put(&rdata->refcount, cifs_readdata_release); 4129 if (rc == -EAGAIN) 4130 continue; 4131 break; 4132 } 4133 4134 list_add_tail(&rdata->list, rdata_list); 4135 iov_iter_advance(&ctx->iter, cur_len); 4136 fpos += cur_len; 4137 len -= cur_len; 4138 } while (len > 0); 4139 4140 return rc; 4141 } 4142 4143 static void 4144 collect_uncached_read_data(struct cifs_aio_ctx *ctx) 4145 { 4146 struct cifs_readdata *rdata, *tmp; 4147 struct cifs_sb_info *cifs_sb; 4148 int rc; 4149 4150 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb); 4151 4152 mutex_lock(&ctx->aio_mutex); 4153 4154 if (list_empty(&ctx->list)) { 4155 mutex_unlock(&ctx->aio_mutex); 4156 return; 4157 } 4158 4159 rc = ctx->rc; 4160 /* the loop below should proceed in the order of increasing offsets */ 4161 again: 4162 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) { 4163 if (!rc) { 4164 if (!try_wait_for_completion(&rdata->done)) { 4165 mutex_unlock(&ctx->aio_mutex); 4166 return; 4167 } 4168 4169 if (rdata->result == -EAGAIN) { 4170 /* resend call if it's a retryable error */ 4171 struct list_head tmp_list; 4172 unsigned int got_bytes = rdata->got_bytes; 4173 4174 list_del_init(&rdata->list); 4175 INIT_LIST_HEAD(&tmp_list); 4176 4177 if (ctx->direct_io) { 4178 /* 4179 * Re-use rdata as this is a 4180 * direct I/O 4181 */ 4182 rc = cifs_resend_rdata( 4183 rdata, 4184 &tmp_list, ctx); 4185 } else { 4186 rc = cifs_send_async_read( 4187 rdata->offset + got_bytes, 4188 rdata->bytes - got_bytes, 4189 rdata->cfile, cifs_sb, 4190 &tmp_list, ctx); 4191 4192 kref_put(&rdata->refcount, 4193 cifs_readdata_release); 4194 } 4195 4196 list_splice(&tmp_list, &ctx->list); 4197 4198 goto again; 4199 } else if (rdata->result) 4200 rc = rdata->result; 4201 4202 /* if there was a short read -- discard anything left */ 4203 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes) 4204 rc = -ENODATA; 4205 4206 ctx->total_len += rdata->got_bytes; 4207 } 4208 list_del_init(&rdata->list); 4209 kref_put(&rdata->refcount, cifs_readdata_release); 4210 } 4211 4212 /* mask nodata case */ 4213 if (rc == -ENODATA) 4214 rc = 0; 4215 4216 ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc; 4217 4218 mutex_unlock(&ctx->aio_mutex); 4219 4220 if (ctx->iocb && ctx->iocb->ki_complete) 4221 ctx->iocb->ki_complete(ctx->iocb, ctx->rc); 4222 else 4223 complete(&ctx->done); 4224 } 4225 4226 static ssize_t __cifs_readv( 4227 struct kiocb *iocb, struct iov_iter *to, bool direct) 4228 { 4229 size_t len; 4230 struct file *file = iocb->ki_filp; 4231 struct cifs_sb_info *cifs_sb; 4232 struct cifsFileInfo *cfile; 4233 struct cifs_tcon *tcon; 4234 ssize_t rc, total_read = 0; 4235 loff_t offset = iocb->ki_pos; 4236 struct cifs_aio_ctx *ctx; 4237 4238 len = iov_iter_count(to); 4239 if (!len) 4240 return 0; 4241 4242 cifs_sb = CIFS_FILE_SB(file); 4243 cfile = file->private_data; 4244 tcon = tlink_tcon(cfile->tlink); 4245 4246 if (!tcon->ses->server->ops->async_readv) 4247 return -ENOSYS; 4248 4249 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 4250 cifs_dbg(FYI, "attempting read on write only file instance\n"); 4251 4252 ctx = cifs_aio_ctx_alloc(); 4253 if (!ctx) 4254 return -ENOMEM; 4255 4256 ctx->pos = offset; 4257 ctx->direct_io = direct; 4258 ctx->len = len; 4259 ctx->cfile = cifsFileInfo_get(cfile); 4260 ctx->nr_pinned_pages = 0; 4261 4262 if (!is_sync_kiocb(iocb)) 4263 ctx->iocb = iocb; 4264 4265 if (user_backed_iter(to)) { 4266 /* 4267 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as 4268 * they contain references to the calling process's virtual 4269 * memory layout which won't be available in an async worker 4270 * thread. This also takes a pin on every folio involved. 4271 */ 4272 rc = netfs_extract_user_iter(to, iov_iter_count(to), 4273 &ctx->iter, 0); 4274 if (rc < 0) { 4275 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4276 return rc; 4277 } 4278 4279 ctx->nr_pinned_pages = rc; 4280 ctx->bv = (void *)ctx->iter.bvec; 4281 ctx->bv_need_unpin = iov_iter_extract_will_pin(to); 4282 ctx->should_dirty = true; 4283 } else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) && 4284 !is_sync_kiocb(iocb)) { 4285 /* 4286 * If the op is asynchronous, we need to copy the list attached 4287 * to a BVEC/KVEC-type iterator, but we assume that the storage 4288 * will be retained by the caller; in any case, we may or may 4289 * not be able to pin the pages, so we don't try. 4290 */ 4291 ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL); 4292 if (!ctx->bv) { 4293 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4294 return -ENOMEM; 4295 } 4296 } else { 4297 /* 4298 * Otherwise, we just pass the iterator down as-is and rely on 4299 * the caller to make sure the pages referred to by the 4300 * iterator don't evaporate. 4301 */ 4302 ctx->iter = *to; 4303 } 4304 4305 if (direct) { 4306 rc = filemap_write_and_wait_range(file->f_inode->i_mapping, 4307 offset, offset + len - 1); 4308 if (rc) { 4309 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4310 return -EAGAIN; 4311 } 4312 } 4313 4314 /* grab a lock here due to read response handlers can access ctx */ 4315 mutex_lock(&ctx->aio_mutex); 4316 4317 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx); 4318 4319 /* if at least one read request send succeeded, then reset rc */ 4320 if (!list_empty(&ctx->list)) 4321 rc = 0; 4322 4323 mutex_unlock(&ctx->aio_mutex); 4324 4325 if (rc) { 4326 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4327 return rc; 4328 } 4329 4330 if (!is_sync_kiocb(iocb)) { 4331 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4332 return -EIOCBQUEUED; 4333 } 4334 4335 rc = wait_for_completion_killable(&ctx->done); 4336 if (rc) { 4337 mutex_lock(&ctx->aio_mutex); 4338 ctx->rc = rc = -EINTR; 4339 total_read = ctx->total_len; 4340 mutex_unlock(&ctx->aio_mutex); 4341 } else { 4342 rc = ctx->rc; 4343 total_read = ctx->total_len; 4344 } 4345 4346 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4347 4348 if (total_read) { 4349 iocb->ki_pos += total_read; 4350 return total_read; 4351 } 4352 return rc; 4353 } 4354 4355 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to) 4356 { 4357 return __cifs_readv(iocb, to, true); 4358 } 4359 4360 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) 4361 { 4362 return __cifs_readv(iocb, to, false); 4363 } 4364 4365 ssize_t 4366 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) 4367 { 4368 struct inode *inode = file_inode(iocb->ki_filp); 4369 struct cifsInodeInfo *cinode = CIFS_I(inode); 4370 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 4371 struct cifsFileInfo *cfile = (struct cifsFileInfo *) 4372 iocb->ki_filp->private_data; 4373 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 4374 int rc = -EACCES; 4375 4376 /* 4377 * In strict cache mode we need to read from the server all the time 4378 * if we don't have level II oplock because the server can delay mtime 4379 * change - so we can't make a decision about inode invalidating. 4380 * And we can also fail with pagereading if there are mandatory locks 4381 * on pages affected by this read but not on the region from pos to 4382 * pos+len-1. 4383 */ 4384 if (!CIFS_CACHE_READ(cinode)) 4385 return cifs_user_readv(iocb, to); 4386 4387 if (cap_unix(tcon->ses) && 4388 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 4389 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 4390 return generic_file_read_iter(iocb, to); 4391 4392 /* 4393 * We need to hold the sem to be sure nobody modifies lock list 4394 * with a brlock that prevents reading. 4395 */ 4396 down_read(&cinode->lock_sem); 4397 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to), 4398 tcon->ses->server->vals->shared_lock_type, 4399 0, NULL, CIFS_READ_OP)) 4400 rc = generic_file_read_iter(iocb, to); 4401 up_read(&cinode->lock_sem); 4402 return rc; 4403 } 4404 4405 static ssize_t 4406 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) 4407 { 4408 int rc = -EACCES; 4409 unsigned int bytes_read = 0; 4410 unsigned int total_read; 4411 unsigned int current_read_size; 4412 unsigned int rsize; 4413 struct cifs_sb_info *cifs_sb; 4414 struct cifs_tcon *tcon; 4415 struct TCP_Server_Info *server; 4416 unsigned int xid; 4417 char *cur_offset; 4418 struct cifsFileInfo *open_file; 4419 struct cifs_io_parms io_parms = {0}; 4420 int buf_type = CIFS_NO_BUFFER; 4421 __u32 pid; 4422 4423 xid = get_xid(); 4424 cifs_sb = CIFS_FILE_SB(file); 4425 4426 /* FIXME: set up handlers for larger reads and/or convert to async */ 4427 rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize); 4428 4429 if (file->private_data == NULL) { 4430 rc = -EBADF; 4431 free_xid(xid); 4432 return rc; 4433 } 4434 open_file = file->private_data; 4435 tcon = tlink_tcon(open_file->tlink); 4436 server = cifs_pick_channel(tcon->ses); 4437 4438 if (!server->ops->sync_read) { 4439 free_xid(xid); 4440 return -ENOSYS; 4441 } 4442 4443 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4444 pid = open_file->pid; 4445 else 4446 pid = current->tgid; 4447 4448 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 4449 cifs_dbg(FYI, "attempting read on write only file instance\n"); 4450 4451 for (total_read = 0, cur_offset = read_data; read_size > total_read; 4452 total_read += bytes_read, cur_offset += bytes_read) { 4453 do { 4454 current_read_size = min_t(uint, read_size - total_read, 4455 rsize); 4456 /* 4457 * For windows me and 9x we do not want to request more 4458 * than it negotiated since it will refuse the read 4459 * then. 4460 */ 4461 if (!(tcon->ses->capabilities & 4462 tcon->ses->server->vals->cap_large_files)) { 4463 current_read_size = min_t(uint, 4464 current_read_size, CIFSMaxBufSize); 4465 } 4466 if (open_file->invalidHandle) { 4467 rc = cifs_reopen_file(open_file, true); 4468 if (rc != 0) 4469 break; 4470 } 4471 io_parms.pid = pid; 4472 io_parms.tcon = tcon; 4473 io_parms.offset = *offset; 4474 io_parms.length = current_read_size; 4475 io_parms.server = server; 4476 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms, 4477 &bytes_read, &cur_offset, 4478 &buf_type); 4479 } while (rc == -EAGAIN); 4480 4481 if (rc || (bytes_read == 0)) { 4482 if (total_read) { 4483 break; 4484 } else { 4485 free_xid(xid); 4486 return rc; 4487 } 4488 } else { 4489 cifs_stats_bytes_read(tcon, total_read); 4490 *offset += bytes_read; 4491 } 4492 } 4493 free_xid(xid); 4494 return total_read; 4495 } 4496 4497 /* 4498 * If the page is mmap'ed into a process' page tables, then we need to make 4499 * sure that it doesn't change while being written back. 4500 */ 4501 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf) 4502 { 4503 struct folio *folio = page_folio(vmf->page); 4504 4505 /* Wait for the folio to be written to the cache before we allow it to 4506 * be modified. We then assume the entire folio will need writing back. 4507 */ 4508 #ifdef CONFIG_CIFS_FSCACHE 4509 if (folio_test_fscache(folio) && 4510 folio_wait_fscache_killable(folio) < 0) 4511 return VM_FAULT_RETRY; 4512 #endif 4513 4514 folio_wait_writeback(folio); 4515 4516 if (folio_lock_killable(folio) < 0) 4517 return VM_FAULT_RETRY; 4518 return VM_FAULT_LOCKED; 4519 } 4520 4521 static const struct vm_operations_struct cifs_file_vm_ops = { 4522 .fault = filemap_fault, 4523 .map_pages = filemap_map_pages, 4524 .page_mkwrite = cifs_page_mkwrite, 4525 }; 4526 4527 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) 4528 { 4529 int xid, rc = 0; 4530 struct inode *inode = file_inode(file); 4531 4532 xid = get_xid(); 4533 4534 if (!CIFS_CACHE_READ(CIFS_I(inode))) 4535 rc = cifs_zap_mapping(inode); 4536 if (!rc) 4537 rc = generic_file_mmap(file, vma); 4538 if (!rc) 4539 vma->vm_ops = &cifs_file_vm_ops; 4540 4541 free_xid(xid); 4542 return rc; 4543 } 4544 4545 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) 4546 { 4547 int rc, xid; 4548 4549 xid = get_xid(); 4550 4551 rc = cifs_revalidate_file(file); 4552 if (rc) 4553 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n", 4554 rc); 4555 if (!rc) 4556 rc = generic_file_mmap(file, vma); 4557 if (!rc) 4558 vma->vm_ops = &cifs_file_vm_ops; 4559 4560 free_xid(xid); 4561 return rc; 4562 } 4563 4564 /* 4565 * Unlock a bunch of folios in the pagecache. 4566 */ 4567 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last) 4568 { 4569 struct folio *folio; 4570 XA_STATE(xas, &mapping->i_pages, first); 4571 4572 rcu_read_lock(); 4573 xas_for_each(&xas, folio, last) { 4574 folio_unlock(folio); 4575 } 4576 rcu_read_unlock(); 4577 } 4578 4579 static void cifs_readahead_complete(struct work_struct *work) 4580 { 4581 struct cifs_readdata *rdata = container_of(work, 4582 struct cifs_readdata, work); 4583 struct folio *folio; 4584 pgoff_t last; 4585 bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes); 4586 4587 XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE); 4588 4589 if (good) 4590 cifs_readahead_to_fscache(rdata->mapping->host, 4591 rdata->offset, rdata->bytes); 4592 4593 if (iov_iter_count(&rdata->iter) > 0) 4594 iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter); 4595 4596 last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE; 4597 4598 rcu_read_lock(); 4599 xas_for_each(&xas, folio, last) { 4600 if (good) { 4601 flush_dcache_folio(folio); 4602 folio_mark_uptodate(folio); 4603 } 4604 folio_unlock(folio); 4605 } 4606 rcu_read_unlock(); 4607 4608 kref_put(&rdata->refcount, cifs_readdata_release); 4609 } 4610 4611 static void cifs_readahead(struct readahead_control *ractl) 4612 { 4613 struct cifsFileInfo *open_file = ractl->file->private_data; 4614 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file); 4615 struct TCP_Server_Info *server; 4616 unsigned int xid, nr_pages, cache_nr_pages = 0; 4617 unsigned int ra_pages; 4618 pgoff_t next_cached = ULONG_MAX, ra_index; 4619 bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) && 4620 cifs_inode_cookie(ractl->mapping->host)->cache_priv; 4621 bool check_cache = caching; 4622 pid_t pid; 4623 int rc = 0; 4624 4625 /* Note that readahead_count() lags behind our dequeuing of pages from 4626 * the ractl, wo we have to keep track for ourselves. 4627 */ 4628 ra_pages = readahead_count(ractl); 4629 ra_index = readahead_index(ractl); 4630 4631 xid = get_xid(); 4632 4633 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4634 pid = open_file->pid; 4635 else 4636 pid = current->tgid; 4637 4638 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 4639 4640 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n", 4641 __func__, ractl->file, ractl->mapping, ra_pages); 4642 4643 /* 4644 * Chop the readahead request up into rsize-sized read requests. 4645 */ 4646 while ((nr_pages = ra_pages)) { 4647 unsigned int i, rsize; 4648 struct cifs_readdata *rdata; 4649 struct cifs_credits credits_on_stack; 4650 struct cifs_credits *credits = &credits_on_stack; 4651 struct folio *folio; 4652 pgoff_t fsize; 4653 4654 /* 4655 * Find out if we have anything cached in the range of 4656 * interest, and if so, where the next chunk of cached data is. 4657 */ 4658 if (caching) { 4659 if (check_cache) { 4660 rc = cifs_fscache_query_occupancy( 4661 ractl->mapping->host, ra_index, nr_pages, 4662 &next_cached, &cache_nr_pages); 4663 if (rc < 0) 4664 caching = false; 4665 check_cache = false; 4666 } 4667 4668 if (ra_index == next_cached) { 4669 /* 4670 * TODO: Send a whole batch of pages to be read 4671 * by the cache. 4672 */ 4673 folio = readahead_folio(ractl); 4674 fsize = folio_nr_pages(folio); 4675 ra_pages -= fsize; 4676 ra_index += fsize; 4677 if (cifs_readpage_from_fscache(ractl->mapping->host, 4678 &folio->page) < 0) { 4679 /* 4680 * TODO: Deal with cache read failure 4681 * here, but for the moment, delegate 4682 * that to readpage. 4683 */ 4684 caching = false; 4685 } 4686 folio_unlock(folio); 4687 next_cached += fsize; 4688 cache_nr_pages -= fsize; 4689 if (cache_nr_pages == 0) 4690 check_cache = true; 4691 continue; 4692 } 4693 } 4694 4695 if (open_file->invalidHandle) { 4696 rc = cifs_reopen_file(open_file, true); 4697 if (rc) { 4698 if (rc == -EAGAIN) 4699 continue; 4700 break; 4701 } 4702 } 4703 4704 if (cifs_sb->ctx->rsize == 0) 4705 cifs_sb->ctx->rsize = 4706 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), 4707 cifs_sb->ctx); 4708 4709 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, 4710 &rsize, credits); 4711 if (rc) 4712 break; 4713 nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages); 4714 if (next_cached != ULONG_MAX) 4715 nr_pages = min_t(size_t, nr_pages, next_cached - ra_index); 4716 4717 /* 4718 * Give up immediately if rsize is too small to read an entire 4719 * page. The VFS will fall back to readpage. We should never 4720 * reach this point however since we set ra_pages to 0 when the 4721 * rsize is smaller than a cache page. 4722 */ 4723 if (unlikely(!nr_pages)) { 4724 add_credits_and_wake_if(server, credits, 0); 4725 break; 4726 } 4727 4728 rdata = cifs_readdata_alloc(cifs_readahead_complete); 4729 if (!rdata) { 4730 /* best to give up if we're out of mem */ 4731 add_credits_and_wake_if(server, credits, 0); 4732 break; 4733 } 4734 4735 rdata->offset = ra_index * PAGE_SIZE; 4736 rdata->bytes = nr_pages * PAGE_SIZE; 4737 rdata->cfile = cifsFileInfo_get(open_file); 4738 rdata->server = server; 4739 rdata->mapping = ractl->mapping; 4740 rdata->pid = pid; 4741 rdata->credits = credits_on_stack; 4742 4743 for (i = 0; i < nr_pages; i++) { 4744 if (!readahead_folio(ractl)) 4745 WARN_ON(1); 4746 } 4747 ra_pages -= nr_pages; 4748 ra_index += nr_pages; 4749 4750 iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages, 4751 rdata->offset, rdata->bytes); 4752 4753 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 4754 if (!rc) { 4755 if (rdata->cfile->invalidHandle) 4756 rc = -EAGAIN; 4757 else 4758 rc = server->ops->async_readv(rdata); 4759 } 4760 4761 if (rc) { 4762 add_credits_and_wake_if(server, &rdata->credits, 0); 4763 cifs_unlock_folios(rdata->mapping, 4764 rdata->offset / PAGE_SIZE, 4765 (rdata->offset + rdata->bytes - 1) / PAGE_SIZE); 4766 /* Fallback to the readpage in error/reconnect cases */ 4767 kref_put(&rdata->refcount, cifs_readdata_release); 4768 break; 4769 } 4770 4771 kref_put(&rdata->refcount, cifs_readdata_release); 4772 } 4773 4774 free_xid(xid); 4775 } 4776 4777 /* 4778 * cifs_readpage_worker must be called with the page pinned 4779 */ 4780 static int cifs_readpage_worker(struct file *file, struct page *page, 4781 loff_t *poffset) 4782 { 4783 struct inode *inode = file_inode(file); 4784 struct timespec64 atime, mtime; 4785 char *read_data; 4786 int rc; 4787 4788 /* Is the page cached? */ 4789 rc = cifs_readpage_from_fscache(inode, page); 4790 if (rc == 0) 4791 goto read_complete; 4792 4793 read_data = kmap(page); 4794 /* for reads over a certain size could initiate async read ahead */ 4795 4796 rc = cifs_read(file, read_data, PAGE_SIZE, poffset); 4797 4798 if (rc < 0) 4799 goto io_error; 4800 else 4801 cifs_dbg(FYI, "Bytes read %d\n", rc); 4802 4803 /* we do not want atime to be less than mtime, it broke some apps */ 4804 atime = inode_set_atime_to_ts(inode, current_time(inode)); 4805 mtime = inode_get_mtime(inode); 4806 if (timespec64_compare(&atime, &mtime) < 0) 4807 inode_set_atime_to_ts(inode, inode_get_mtime(inode)); 4808 4809 if (PAGE_SIZE > rc) 4810 memset(read_data + rc, 0, PAGE_SIZE - rc); 4811 4812 flush_dcache_page(page); 4813 SetPageUptodate(page); 4814 rc = 0; 4815 4816 io_error: 4817 kunmap(page); 4818 4819 read_complete: 4820 unlock_page(page); 4821 return rc; 4822 } 4823 4824 static int cifs_read_folio(struct file *file, struct folio *folio) 4825 { 4826 struct page *page = &folio->page; 4827 loff_t offset = page_file_offset(page); 4828 int rc = -EACCES; 4829 unsigned int xid; 4830 4831 xid = get_xid(); 4832 4833 if (file->private_data == NULL) { 4834 rc = -EBADF; 4835 free_xid(xid); 4836 return rc; 4837 } 4838 4839 cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n", 4840 page, (int)offset, (int)offset); 4841 4842 rc = cifs_readpage_worker(file, page, &offset); 4843 4844 free_xid(xid); 4845 return rc; 4846 } 4847 4848 static int is_inode_writable(struct cifsInodeInfo *cifs_inode) 4849 { 4850 struct cifsFileInfo *open_file; 4851 4852 spin_lock(&cifs_inode->open_file_lock); 4853 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 4854 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { 4855 spin_unlock(&cifs_inode->open_file_lock); 4856 return 1; 4857 } 4858 } 4859 spin_unlock(&cifs_inode->open_file_lock); 4860 return 0; 4861 } 4862 4863 /* We do not want to update the file size from server for inodes 4864 open for write - to avoid races with writepage extending 4865 the file - in the future we could consider allowing 4866 refreshing the inode only on increases in the file size 4867 but this is tricky to do without racing with writebehind 4868 page caching in the current Linux kernel design */ 4869 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file, 4870 bool from_readdir) 4871 { 4872 if (!cifsInode) 4873 return true; 4874 4875 if (is_inode_writable(cifsInode) || 4876 ((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) { 4877 /* This inode is open for write at least once */ 4878 struct cifs_sb_info *cifs_sb; 4879 4880 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb); 4881 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { 4882 /* since no page cache to corrupt on directio 4883 we can change size safely */ 4884 return true; 4885 } 4886 4887 if (i_size_read(&cifsInode->netfs.inode) < end_of_file) 4888 return true; 4889 4890 return false; 4891 } else 4892 return true; 4893 } 4894 4895 static int cifs_write_begin(struct file *file, struct address_space *mapping, 4896 loff_t pos, unsigned len, 4897 struct page **pagep, void **fsdata) 4898 { 4899 int oncethru = 0; 4900 pgoff_t index = pos >> PAGE_SHIFT; 4901 loff_t offset = pos & (PAGE_SIZE - 1); 4902 loff_t page_start = pos & PAGE_MASK; 4903 loff_t i_size; 4904 struct page *page; 4905 int rc = 0; 4906 4907 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len); 4908 4909 start: 4910 page = grab_cache_page_write_begin(mapping, index); 4911 if (!page) { 4912 rc = -ENOMEM; 4913 goto out; 4914 } 4915 4916 if (PageUptodate(page)) 4917 goto out; 4918 4919 /* 4920 * If we write a full page it will be up to date, no need to read from 4921 * the server. If the write is short, we'll end up doing a sync write 4922 * instead. 4923 */ 4924 if (len == PAGE_SIZE) 4925 goto out; 4926 4927 /* 4928 * optimize away the read when we have an oplock, and we're not 4929 * expecting to use any of the data we'd be reading in. That 4930 * is, when the page lies beyond the EOF, or straddles the EOF 4931 * and the write will cover all of the existing data. 4932 */ 4933 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) { 4934 i_size = i_size_read(mapping->host); 4935 if (page_start >= i_size || 4936 (offset == 0 && (pos + len) >= i_size)) { 4937 zero_user_segments(page, 0, offset, 4938 offset + len, 4939 PAGE_SIZE); 4940 /* 4941 * PageChecked means that the parts of the page 4942 * to which we're not writing are considered up 4943 * to date. Once the data is copied to the 4944 * page, it can be set uptodate. 4945 */ 4946 SetPageChecked(page); 4947 goto out; 4948 } 4949 } 4950 4951 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) { 4952 /* 4953 * might as well read a page, it is fast enough. If we get 4954 * an error, we don't need to return it. cifs_write_end will 4955 * do a sync write instead since PG_uptodate isn't set. 4956 */ 4957 cifs_readpage_worker(file, page, &page_start); 4958 put_page(page); 4959 oncethru = 1; 4960 goto start; 4961 } else { 4962 /* we could try using another file handle if there is one - 4963 but how would we lock it to prevent close of that handle 4964 racing with this read? In any case 4965 this will be written out by write_end so is fine */ 4966 } 4967 out: 4968 *pagep = page; 4969 return rc; 4970 } 4971 4972 static bool cifs_release_folio(struct folio *folio, gfp_t gfp) 4973 { 4974 if (folio_test_private(folio)) 4975 return 0; 4976 if (folio_test_fscache(folio)) { 4977 if (current_is_kswapd() || !(gfp & __GFP_FS)) 4978 return false; 4979 folio_wait_fscache(folio); 4980 } 4981 fscache_note_page_release(cifs_inode_cookie(folio->mapping->host)); 4982 return true; 4983 } 4984 4985 static void cifs_invalidate_folio(struct folio *folio, size_t offset, 4986 size_t length) 4987 { 4988 folio_wait_fscache(folio); 4989 } 4990 4991 static int cifs_launder_folio(struct folio *folio) 4992 { 4993 int rc = 0; 4994 loff_t range_start = folio_pos(folio); 4995 loff_t range_end = range_start + folio_size(folio); 4996 struct writeback_control wbc = { 4997 .sync_mode = WB_SYNC_ALL, 4998 .nr_to_write = 0, 4999 .range_start = range_start, 5000 .range_end = range_end, 5001 }; 5002 5003 cifs_dbg(FYI, "Launder page: %lu\n", folio->index); 5004 5005 if (folio_clear_dirty_for_io(folio)) 5006 rc = cifs_writepage_locked(&folio->page, &wbc); 5007 5008 folio_wait_fscache(folio); 5009 return rc; 5010 } 5011 5012 void cifs_oplock_break(struct work_struct *work) 5013 { 5014 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, 5015 oplock_break); 5016 struct inode *inode = d_inode(cfile->dentry); 5017 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 5018 struct cifsInodeInfo *cinode = CIFS_I(inode); 5019 struct cifs_tcon *tcon; 5020 struct TCP_Server_Info *server; 5021 struct tcon_link *tlink; 5022 int rc = 0; 5023 bool purge_cache = false, oplock_break_cancelled; 5024 __u64 persistent_fid, volatile_fid; 5025 __u16 net_fid; 5026 5027 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, 5028 TASK_UNINTERRUPTIBLE); 5029 5030 tlink = cifs_sb_tlink(cifs_sb); 5031 if (IS_ERR(tlink)) 5032 goto out; 5033 tcon = tlink_tcon(tlink); 5034 server = tcon->ses->server; 5035 5036 server->ops->downgrade_oplock(server, cinode, cfile->oplock_level, 5037 cfile->oplock_epoch, &purge_cache); 5038 5039 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) && 5040 cifs_has_mand_locks(cinode)) { 5041 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n", 5042 inode); 5043 cinode->oplock = 0; 5044 } 5045 5046 if (inode && S_ISREG(inode->i_mode)) { 5047 if (CIFS_CACHE_READ(cinode)) 5048 break_lease(inode, O_RDONLY); 5049 else 5050 break_lease(inode, O_WRONLY); 5051 rc = filemap_fdatawrite(inode->i_mapping); 5052 if (!CIFS_CACHE_READ(cinode) || purge_cache) { 5053 rc = filemap_fdatawait(inode->i_mapping); 5054 mapping_set_error(inode->i_mapping, rc); 5055 cifs_zap_mapping(inode); 5056 } 5057 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc); 5058 if (CIFS_CACHE_WRITE(cinode)) 5059 goto oplock_break_ack; 5060 } 5061 5062 rc = cifs_push_locks(cfile); 5063 if (rc) 5064 cifs_dbg(VFS, "Push locks rc = %d\n", rc); 5065 5066 oplock_break_ack: 5067 /* 5068 * When oplock break is received and there are no active 5069 * file handles but cached, then schedule deferred close immediately. 5070 * So, new open will not use cached handle. 5071 */ 5072 5073 if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes)) 5074 cifs_close_deferred_file(cinode); 5075 5076 persistent_fid = cfile->fid.persistent_fid; 5077 volatile_fid = cfile->fid.volatile_fid; 5078 net_fid = cfile->fid.netfid; 5079 oplock_break_cancelled = cfile->oplock_break_cancelled; 5080 5081 _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false); 5082 /* 5083 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require 5084 * an acknowledgment to be sent when the file has already been closed. 5085 */ 5086 spin_lock(&cinode->open_file_lock); 5087 /* check list empty since can race with kill_sb calling tree disconnect */ 5088 if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) { 5089 spin_unlock(&cinode->open_file_lock); 5090 rc = server->ops->oplock_response(tcon, persistent_fid, 5091 volatile_fid, net_fid, cinode); 5092 cifs_dbg(FYI, "Oplock release rc = %d\n", rc); 5093 } else 5094 spin_unlock(&cinode->open_file_lock); 5095 5096 cifs_put_tlink(tlink); 5097 out: 5098 cifs_done_oplock_break(cinode); 5099 } 5100 5101 /* 5102 * The presence of cifs_direct_io() in the address space ops vector 5103 * allowes open() O_DIRECT flags which would have failed otherwise. 5104 * 5105 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests 5106 * so this method should never be called. 5107 * 5108 * Direct IO is not yet supported in the cached mode. 5109 */ 5110 static ssize_t 5111 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter) 5112 { 5113 /* 5114 * FIXME 5115 * Eventually need to support direct IO for non forcedirectio mounts 5116 */ 5117 return -EINVAL; 5118 } 5119 5120 static int cifs_swap_activate(struct swap_info_struct *sis, 5121 struct file *swap_file, sector_t *span) 5122 { 5123 struct cifsFileInfo *cfile = swap_file->private_data; 5124 struct inode *inode = swap_file->f_mapping->host; 5125 unsigned long blocks; 5126 long long isize; 5127 5128 cifs_dbg(FYI, "swap activate\n"); 5129 5130 if (!swap_file->f_mapping->a_ops->swap_rw) 5131 /* Cannot support swap */ 5132 return -EINVAL; 5133 5134 spin_lock(&inode->i_lock); 5135 blocks = inode->i_blocks; 5136 isize = inode->i_size; 5137 spin_unlock(&inode->i_lock); 5138 if (blocks*512 < isize) { 5139 pr_warn("swap activate: swapfile has holes\n"); 5140 return -EINVAL; 5141 } 5142 *span = sis->pages; 5143 5144 pr_warn_once("Swap support over SMB3 is experimental\n"); 5145 5146 /* 5147 * TODO: consider adding ACL (or documenting how) to prevent other 5148 * users (on this or other systems) from reading it 5149 */ 5150 5151 5152 /* TODO: add sk_set_memalloc(inet) or similar */ 5153 5154 if (cfile) 5155 cfile->swapfile = true; 5156 /* 5157 * TODO: Since file already open, we can't open with DENY_ALL here 5158 * but we could add call to grab a byte range lock to prevent others 5159 * from reading or writing the file 5160 */ 5161 5162 sis->flags |= SWP_FS_OPS; 5163 return add_swap_extent(sis, 0, sis->max, 0); 5164 } 5165 5166 static void cifs_swap_deactivate(struct file *file) 5167 { 5168 struct cifsFileInfo *cfile = file->private_data; 5169 5170 cifs_dbg(FYI, "swap deactivate\n"); 5171 5172 /* TODO: undo sk_set_memalloc(inet) will eventually be needed */ 5173 5174 if (cfile) 5175 cfile->swapfile = false; 5176 5177 /* do we need to unpin (or unlock) the file */ 5178 } 5179 5180 /* 5181 * Mark a page as having been made dirty and thus needing writeback. We also 5182 * need to pin the cache object to write back to. 5183 */ 5184 #ifdef CONFIG_CIFS_FSCACHE 5185 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio) 5186 { 5187 return fscache_dirty_folio(mapping, folio, 5188 cifs_inode_cookie(mapping->host)); 5189 } 5190 #else 5191 #define cifs_dirty_folio filemap_dirty_folio 5192 #endif 5193 5194 const struct address_space_operations cifs_addr_ops = { 5195 .read_folio = cifs_read_folio, 5196 .readahead = cifs_readahead, 5197 .writepages = cifs_writepages, 5198 .write_begin = cifs_write_begin, 5199 .write_end = cifs_write_end, 5200 .dirty_folio = cifs_dirty_folio, 5201 .release_folio = cifs_release_folio, 5202 .direct_IO = cifs_direct_io, 5203 .invalidate_folio = cifs_invalidate_folio, 5204 .launder_folio = cifs_launder_folio, 5205 .migrate_folio = filemap_migrate_folio, 5206 /* 5207 * TODO: investigate and if useful we could add an is_dirty_writeback 5208 * helper if needed 5209 */ 5210 .swap_activate = cifs_swap_activate, 5211 .swap_deactivate = cifs_swap_deactivate, 5212 }; 5213 5214 /* 5215 * cifs_readahead requires the server to support a buffer large enough to 5216 * contain the header plus one complete page of data. Otherwise, we need 5217 * to leave cifs_readahead out of the address space operations. 5218 */ 5219 const struct address_space_operations cifs_addr_ops_smallbuf = { 5220 .read_folio = cifs_read_folio, 5221 .writepages = cifs_writepages, 5222 .write_begin = cifs_write_begin, 5223 .write_end = cifs_write_end, 5224 .dirty_folio = cifs_dirty_folio, 5225 .release_folio = cifs_release_folio, 5226 .invalidate_folio = cifs_invalidate_folio, 5227 .launder_folio = cifs_launder_folio, 5228 .migrate_folio = filemap_migrate_folio, 5229 }; 5230