1 // SPDX-License-Identifier: LGPL-2.1 2 /* 3 * 4 * vfs operations that deal with files 5 * 6 * Copyright (C) International Business Machines Corp., 2002,2010 7 * Author(s): Steve French (sfrench@us.ibm.com) 8 * Jeremy Allison (jra@samba.org) 9 * 10 */ 11 #include <linux/fs.h> 12 #include <linux/filelock.h> 13 #include <linux/backing-dev.h> 14 #include <linux/stat.h> 15 #include <linux/fcntl.h> 16 #include <linux/pagemap.h> 17 #include <linux/pagevec.h> 18 #include <linux/writeback.h> 19 #include <linux/task_io_accounting_ops.h> 20 #include <linux/delay.h> 21 #include <linux/mount.h> 22 #include <linux/slab.h> 23 #include <linux/swap.h> 24 #include <linux/mm.h> 25 #include <asm/div64.h> 26 #include "cifsfs.h" 27 #include "cifspdu.h" 28 #include "cifsglob.h" 29 #include "cifsproto.h" 30 #include "smb2proto.h" 31 #include "cifs_unicode.h" 32 #include "cifs_debug.h" 33 #include "cifs_fs_sb.h" 34 #include "fscache.h" 35 #include "smbdirect.h" 36 #include "fs_context.h" 37 #include "cifs_ioctl.h" 38 #include "cached_dir.h" 39 40 /* 41 * Remove the dirty flags from a span of pages. 42 */ 43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len) 44 { 45 struct address_space *mapping = inode->i_mapping; 46 struct folio *folio; 47 pgoff_t end; 48 49 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 50 51 rcu_read_lock(); 52 53 end = (start + len - 1) / PAGE_SIZE; 54 xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) { 55 if (xas_retry(&xas, folio)) 56 continue; 57 xas_pause(&xas); 58 rcu_read_unlock(); 59 folio_lock(folio); 60 folio_clear_dirty_for_io(folio); 61 folio_unlock(folio); 62 rcu_read_lock(); 63 } 64 65 rcu_read_unlock(); 66 } 67 68 /* 69 * Completion of write to server. 70 */ 71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len) 72 { 73 struct address_space *mapping = inode->i_mapping; 74 struct folio *folio; 75 pgoff_t end; 76 77 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 78 79 if (!len) 80 return; 81 82 rcu_read_lock(); 83 84 end = (start + len - 1) / PAGE_SIZE; 85 xas_for_each(&xas, folio, end) { 86 if (xas_retry(&xas, folio)) 87 continue; 88 if (!folio_test_writeback(folio)) { 89 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 90 len, start, folio->index, end); 91 continue; 92 } 93 94 folio_detach_private(folio); 95 folio_end_writeback(folio); 96 } 97 98 rcu_read_unlock(); 99 } 100 101 /* 102 * Failure of write to server. 103 */ 104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len) 105 { 106 struct address_space *mapping = inode->i_mapping; 107 struct folio *folio; 108 pgoff_t end; 109 110 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 111 112 if (!len) 113 return; 114 115 rcu_read_lock(); 116 117 end = (start + len - 1) / PAGE_SIZE; 118 xas_for_each(&xas, folio, end) { 119 if (xas_retry(&xas, folio)) 120 continue; 121 if (!folio_test_writeback(folio)) { 122 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 123 len, start, folio->index, end); 124 continue; 125 } 126 127 folio_set_error(folio); 128 folio_end_writeback(folio); 129 } 130 131 rcu_read_unlock(); 132 } 133 134 /* 135 * Redirty pages after a temporary failure. 136 */ 137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len) 138 { 139 struct address_space *mapping = inode->i_mapping; 140 struct folio *folio; 141 pgoff_t end; 142 143 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 144 145 if (!len) 146 return; 147 148 rcu_read_lock(); 149 150 end = (start + len - 1) / PAGE_SIZE; 151 xas_for_each(&xas, folio, end) { 152 if (!folio_test_writeback(folio)) { 153 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 154 len, start, folio->index, end); 155 continue; 156 } 157 158 filemap_dirty_folio(folio->mapping, folio); 159 folio_end_writeback(folio); 160 } 161 162 rcu_read_unlock(); 163 } 164 165 /* 166 * Mark as invalid, all open files on tree connections since they 167 * were closed when session to server was lost. 168 */ 169 void 170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon) 171 { 172 struct cifsFileInfo *open_file = NULL; 173 struct list_head *tmp; 174 struct list_head *tmp1; 175 176 /* only send once per connect */ 177 spin_lock(&tcon->tc_lock); 178 if (tcon->need_reconnect) 179 tcon->status = TID_NEED_RECON; 180 181 if (tcon->status != TID_NEED_RECON) { 182 spin_unlock(&tcon->tc_lock); 183 return; 184 } 185 tcon->status = TID_IN_FILES_INVALIDATE; 186 spin_unlock(&tcon->tc_lock); 187 188 /* list all files open on tree connection and mark them invalid */ 189 spin_lock(&tcon->open_file_lock); 190 list_for_each_safe(tmp, tmp1, &tcon->openFileList) { 191 open_file = list_entry(tmp, struct cifsFileInfo, tlist); 192 open_file->invalidHandle = true; 193 open_file->oplock_break_cancelled = true; 194 } 195 spin_unlock(&tcon->open_file_lock); 196 197 invalidate_all_cached_dirs(tcon); 198 spin_lock(&tcon->tc_lock); 199 if (tcon->status == TID_IN_FILES_INVALIDATE) 200 tcon->status = TID_NEED_TCON; 201 spin_unlock(&tcon->tc_lock); 202 203 /* 204 * BB Add call to invalidate_inodes(sb) for all superblocks mounted 205 * to this tcon. 206 */ 207 } 208 209 static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache) 210 { 211 if ((flags & O_ACCMODE) == O_RDONLY) 212 return GENERIC_READ; 213 else if ((flags & O_ACCMODE) == O_WRONLY) 214 return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE; 215 else if ((flags & O_ACCMODE) == O_RDWR) { 216 /* GENERIC_ALL is too much permission to request 217 can cause unnecessary access denied on create */ 218 /* return GENERIC_ALL; */ 219 return (GENERIC_READ | GENERIC_WRITE); 220 } 221 222 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES | 223 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA | 224 FILE_READ_DATA); 225 } 226 227 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 228 static u32 cifs_posix_convert_flags(unsigned int flags) 229 { 230 u32 posix_flags = 0; 231 232 if ((flags & O_ACCMODE) == O_RDONLY) 233 posix_flags = SMB_O_RDONLY; 234 else if ((flags & O_ACCMODE) == O_WRONLY) 235 posix_flags = SMB_O_WRONLY; 236 else if ((flags & O_ACCMODE) == O_RDWR) 237 posix_flags = SMB_O_RDWR; 238 239 if (flags & O_CREAT) { 240 posix_flags |= SMB_O_CREAT; 241 if (flags & O_EXCL) 242 posix_flags |= SMB_O_EXCL; 243 } else if (flags & O_EXCL) 244 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n", 245 current->comm, current->tgid); 246 247 if (flags & O_TRUNC) 248 posix_flags |= SMB_O_TRUNC; 249 /* be safe and imply O_SYNC for O_DSYNC */ 250 if (flags & O_DSYNC) 251 posix_flags |= SMB_O_SYNC; 252 if (flags & O_DIRECTORY) 253 posix_flags |= SMB_O_DIRECTORY; 254 if (flags & O_NOFOLLOW) 255 posix_flags |= SMB_O_NOFOLLOW; 256 if (flags & O_DIRECT) 257 posix_flags |= SMB_O_DIRECT; 258 259 return posix_flags; 260 } 261 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 262 263 static inline int cifs_get_disposition(unsigned int flags) 264 { 265 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) 266 return FILE_CREATE; 267 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) 268 return FILE_OVERWRITE_IF; 269 else if ((flags & O_CREAT) == O_CREAT) 270 return FILE_OPEN_IF; 271 else if ((flags & O_TRUNC) == O_TRUNC) 272 return FILE_OVERWRITE; 273 else 274 return FILE_OPEN; 275 } 276 277 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 278 int cifs_posix_open(const char *full_path, struct inode **pinode, 279 struct super_block *sb, int mode, unsigned int f_flags, 280 __u32 *poplock, __u16 *pnetfid, unsigned int xid) 281 { 282 int rc; 283 FILE_UNIX_BASIC_INFO *presp_data; 284 __u32 posix_flags = 0; 285 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 286 struct cifs_fattr fattr; 287 struct tcon_link *tlink; 288 struct cifs_tcon *tcon; 289 290 cifs_dbg(FYI, "posix open %s\n", full_path); 291 292 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); 293 if (presp_data == NULL) 294 return -ENOMEM; 295 296 tlink = cifs_sb_tlink(cifs_sb); 297 if (IS_ERR(tlink)) { 298 rc = PTR_ERR(tlink); 299 goto posix_open_ret; 300 } 301 302 tcon = tlink_tcon(tlink); 303 mode &= ~current_umask(); 304 305 posix_flags = cifs_posix_convert_flags(f_flags); 306 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data, 307 poplock, full_path, cifs_sb->local_nls, 308 cifs_remap(cifs_sb)); 309 cifs_put_tlink(tlink); 310 311 if (rc) 312 goto posix_open_ret; 313 314 if (presp_data->Type == cpu_to_le32(-1)) 315 goto posix_open_ret; /* open ok, caller does qpathinfo */ 316 317 if (!pinode) 318 goto posix_open_ret; /* caller does not need info */ 319 320 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb); 321 322 /* get new inode and set it up */ 323 if (*pinode == NULL) { 324 cifs_fill_uniqueid(sb, &fattr); 325 *pinode = cifs_iget(sb, &fattr); 326 if (!*pinode) { 327 rc = -ENOMEM; 328 goto posix_open_ret; 329 } 330 } else { 331 cifs_revalidate_mapping(*pinode); 332 rc = cifs_fattr_to_inode(*pinode, &fattr, false); 333 } 334 335 posix_open_ret: 336 kfree(presp_data); 337 return rc; 338 } 339 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 340 341 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, 342 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock, 343 struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf) 344 { 345 int rc; 346 int desired_access; 347 int disposition; 348 int create_options = CREATE_NOT_DIR; 349 struct TCP_Server_Info *server = tcon->ses->server; 350 struct cifs_open_parms oparms; 351 int rdwr_for_fscache = 0; 352 353 if (!server->ops->open) 354 return -ENOSYS; 355 356 /* If we're caching, we need to be able to fill in around partial writes. */ 357 if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY) 358 rdwr_for_fscache = 1; 359 360 desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache); 361 362 /********************************************************************* 363 * open flag mapping table: 364 * 365 * POSIX Flag CIFS Disposition 366 * ---------- ---------------- 367 * O_CREAT FILE_OPEN_IF 368 * O_CREAT | O_EXCL FILE_CREATE 369 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF 370 * O_TRUNC FILE_OVERWRITE 371 * none of the above FILE_OPEN 372 * 373 * Note that there is not a direct match between disposition 374 * FILE_SUPERSEDE (ie create whether or not file exists although 375 * O_CREAT | O_TRUNC is similar but truncates the existing 376 * file rather than creating a new file as FILE_SUPERSEDE does 377 * (which uses the attributes / metadata passed in on open call) 378 *? 379 *? O_SYNC is a reasonable match to CIFS writethrough flag 380 *? and the read write flags match reasonably. O_LARGEFILE 381 *? is irrelevant because largefile support is always used 382 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY, 383 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation 384 *********************************************************************/ 385 386 disposition = cifs_get_disposition(f_flags); 387 388 /* BB pass O_SYNC flag through on file attributes .. BB */ 389 390 /* O_SYNC also has bit for O_DSYNC so following check picks up either */ 391 if (f_flags & O_SYNC) 392 create_options |= CREATE_WRITE_THROUGH; 393 394 if (f_flags & O_DIRECT) 395 create_options |= CREATE_NO_BUFFER; 396 397 retry_open: 398 oparms = (struct cifs_open_parms) { 399 .tcon = tcon, 400 .cifs_sb = cifs_sb, 401 .desired_access = desired_access, 402 .create_options = cifs_create_options(cifs_sb, create_options), 403 .disposition = disposition, 404 .path = full_path, 405 .fid = fid, 406 }; 407 408 rc = server->ops->open(xid, &oparms, oplock, buf); 409 if (rc) { 410 if (rc == -EACCES && rdwr_for_fscache == 1) { 411 desired_access = cifs_convert_flags(f_flags, 0); 412 rdwr_for_fscache = 2; 413 goto retry_open; 414 } 415 return rc; 416 } 417 if (rdwr_for_fscache == 2) 418 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); 419 420 /* TODO: Add support for calling posix query info but with passing in fid */ 421 if (tcon->unix_ext) 422 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb, 423 xid); 424 else 425 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, 426 xid, fid); 427 428 if (rc) { 429 server->ops->close(xid, tcon, fid); 430 if (rc == -ESTALE) 431 rc = -EOPENSTALE; 432 } 433 434 return rc; 435 } 436 437 static bool 438 cifs_has_mand_locks(struct cifsInodeInfo *cinode) 439 { 440 struct cifs_fid_locks *cur; 441 bool has_locks = false; 442 443 down_read(&cinode->lock_sem); 444 list_for_each_entry(cur, &cinode->llist, llist) { 445 if (!list_empty(&cur->locks)) { 446 has_locks = true; 447 break; 448 } 449 } 450 up_read(&cinode->lock_sem); 451 return has_locks; 452 } 453 454 void 455 cifs_down_write(struct rw_semaphore *sem) 456 { 457 while (!down_write_trylock(sem)) 458 msleep(10); 459 } 460 461 static void cifsFileInfo_put_work(struct work_struct *work); 462 void serverclose_work(struct work_struct *work); 463 464 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, 465 struct tcon_link *tlink, __u32 oplock, 466 const char *symlink_target) 467 { 468 struct dentry *dentry = file_dentry(file); 469 struct inode *inode = d_inode(dentry); 470 struct cifsInodeInfo *cinode = CIFS_I(inode); 471 struct cifsFileInfo *cfile; 472 struct cifs_fid_locks *fdlocks; 473 struct cifs_tcon *tcon = tlink_tcon(tlink); 474 struct TCP_Server_Info *server = tcon->ses->server; 475 476 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); 477 if (cfile == NULL) 478 return cfile; 479 480 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL); 481 if (!fdlocks) { 482 kfree(cfile); 483 return NULL; 484 } 485 486 if (symlink_target) { 487 cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL); 488 if (!cfile->symlink_target) { 489 kfree(fdlocks); 490 kfree(cfile); 491 return NULL; 492 } 493 } 494 495 INIT_LIST_HEAD(&fdlocks->locks); 496 fdlocks->cfile = cfile; 497 cfile->llist = fdlocks; 498 499 cfile->count = 1; 500 cfile->pid = current->tgid; 501 cfile->uid = current_fsuid(); 502 cfile->dentry = dget(dentry); 503 cfile->f_flags = file->f_flags; 504 cfile->status_file_deleted = false; 505 cfile->invalidHandle = false; 506 cfile->deferred_close_scheduled = false; 507 cfile->tlink = cifs_get_tlink(tlink); 508 INIT_WORK(&cfile->oplock_break, cifs_oplock_break); 509 INIT_WORK(&cfile->put, cifsFileInfo_put_work); 510 INIT_WORK(&cfile->serverclose, serverclose_work); 511 INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close); 512 mutex_init(&cfile->fh_mutex); 513 spin_lock_init(&cfile->file_info_lock); 514 515 cifs_sb_active(inode->i_sb); 516 517 /* 518 * If the server returned a read oplock and we have mandatory brlocks, 519 * set oplock level to None. 520 */ 521 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) { 522 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); 523 oplock = 0; 524 } 525 526 cifs_down_write(&cinode->lock_sem); 527 list_add(&fdlocks->llist, &cinode->llist); 528 up_write(&cinode->lock_sem); 529 530 spin_lock(&tcon->open_file_lock); 531 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock) 532 oplock = fid->pending_open->oplock; 533 list_del(&fid->pending_open->olist); 534 535 fid->purge_cache = false; 536 server->ops->set_fid(cfile, fid, oplock); 537 538 list_add(&cfile->tlist, &tcon->openFileList); 539 atomic_inc(&tcon->num_local_opens); 540 541 /* if readable file instance put first in list*/ 542 spin_lock(&cinode->open_file_lock); 543 if (file->f_mode & FMODE_READ) 544 list_add(&cfile->flist, &cinode->openFileList); 545 else 546 list_add_tail(&cfile->flist, &cinode->openFileList); 547 spin_unlock(&cinode->open_file_lock); 548 spin_unlock(&tcon->open_file_lock); 549 550 if (fid->purge_cache) 551 cifs_zap_mapping(inode); 552 553 file->private_data = cfile; 554 return cfile; 555 } 556 557 struct cifsFileInfo * 558 cifsFileInfo_get(struct cifsFileInfo *cifs_file) 559 { 560 spin_lock(&cifs_file->file_info_lock); 561 cifsFileInfo_get_locked(cifs_file); 562 spin_unlock(&cifs_file->file_info_lock); 563 return cifs_file; 564 } 565 566 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file) 567 { 568 struct inode *inode = d_inode(cifs_file->dentry); 569 struct cifsInodeInfo *cifsi = CIFS_I(inode); 570 struct cifsLockInfo *li, *tmp; 571 struct super_block *sb = inode->i_sb; 572 573 /* 574 * Delete any outstanding lock records. We'll lose them when the file 575 * is closed anyway. 576 */ 577 cifs_down_write(&cifsi->lock_sem); 578 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) { 579 list_del(&li->llist); 580 cifs_del_lock_waiters(li); 581 kfree(li); 582 } 583 list_del(&cifs_file->llist->llist); 584 kfree(cifs_file->llist); 585 up_write(&cifsi->lock_sem); 586 587 cifs_put_tlink(cifs_file->tlink); 588 dput(cifs_file->dentry); 589 cifs_sb_deactive(sb); 590 kfree(cifs_file->symlink_target); 591 kfree(cifs_file); 592 } 593 594 static void cifsFileInfo_put_work(struct work_struct *work) 595 { 596 struct cifsFileInfo *cifs_file = container_of(work, 597 struct cifsFileInfo, put); 598 599 cifsFileInfo_put_final(cifs_file); 600 } 601 602 void serverclose_work(struct work_struct *work) 603 { 604 struct cifsFileInfo *cifs_file = container_of(work, 605 struct cifsFileInfo, serverclose); 606 607 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); 608 609 struct TCP_Server_Info *server = tcon->ses->server; 610 int rc = 0; 611 int retries = 0; 612 int MAX_RETRIES = 4; 613 614 do { 615 if (server->ops->close_getattr) 616 rc = server->ops->close_getattr(0, tcon, cifs_file); 617 else if (server->ops->close) 618 rc = server->ops->close(0, tcon, &cifs_file->fid); 619 620 if (rc == -EBUSY || rc == -EAGAIN) { 621 retries++; 622 msleep(250); 623 } 624 } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES) 625 ); 626 627 if (retries == MAX_RETRIES) 628 pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES); 629 630 if (cifs_file->offload) 631 queue_work(fileinfo_put_wq, &cifs_file->put); 632 else 633 cifsFileInfo_put_final(cifs_file); 634 } 635 636 /** 637 * cifsFileInfo_put - release a reference of file priv data 638 * 639 * Always potentially wait for oplock handler. See _cifsFileInfo_put(). 640 * 641 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file 642 */ 643 void cifsFileInfo_put(struct cifsFileInfo *cifs_file) 644 { 645 _cifsFileInfo_put(cifs_file, true, true); 646 } 647 648 /** 649 * _cifsFileInfo_put - release a reference of file priv data 650 * 651 * This may involve closing the filehandle @cifs_file out on the 652 * server. Must be called without holding tcon->open_file_lock, 653 * cinode->open_file_lock and cifs_file->file_info_lock. 654 * 655 * If @wait_for_oplock_handler is true and we are releasing the last 656 * reference, wait for any running oplock break handler of the file 657 * and cancel any pending one. 658 * 659 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file 660 * @wait_oplock_handler: must be false if called from oplock_break_handler 661 * @offload: not offloaded on close and oplock breaks 662 * 663 */ 664 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, 665 bool wait_oplock_handler, bool offload) 666 { 667 struct inode *inode = d_inode(cifs_file->dentry); 668 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); 669 struct TCP_Server_Info *server = tcon->ses->server; 670 struct cifsInodeInfo *cifsi = CIFS_I(inode); 671 struct super_block *sb = inode->i_sb; 672 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 673 struct cifs_fid fid = {}; 674 struct cifs_pending_open open; 675 bool oplock_break_cancelled; 676 bool serverclose_offloaded = false; 677 678 spin_lock(&tcon->open_file_lock); 679 spin_lock(&cifsi->open_file_lock); 680 spin_lock(&cifs_file->file_info_lock); 681 682 cifs_file->offload = offload; 683 if (--cifs_file->count > 0) { 684 spin_unlock(&cifs_file->file_info_lock); 685 spin_unlock(&cifsi->open_file_lock); 686 spin_unlock(&tcon->open_file_lock); 687 return; 688 } 689 spin_unlock(&cifs_file->file_info_lock); 690 691 if (server->ops->get_lease_key) 692 server->ops->get_lease_key(inode, &fid); 693 694 /* store open in pending opens to make sure we don't miss lease break */ 695 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open); 696 697 /* remove it from the lists */ 698 list_del(&cifs_file->flist); 699 list_del(&cifs_file->tlist); 700 atomic_dec(&tcon->num_local_opens); 701 702 if (list_empty(&cifsi->openFileList)) { 703 cifs_dbg(FYI, "closing last open instance for inode %p\n", 704 d_inode(cifs_file->dentry)); 705 /* 706 * In strict cache mode we need invalidate mapping on the last 707 * close because it may cause a error when we open this file 708 * again and get at least level II oplock. 709 */ 710 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) 711 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags); 712 cifs_set_oplock_level(cifsi, 0); 713 } 714 715 spin_unlock(&cifsi->open_file_lock); 716 spin_unlock(&tcon->open_file_lock); 717 718 oplock_break_cancelled = wait_oplock_handler ? 719 cancel_work_sync(&cifs_file->oplock_break) : false; 720 721 if (!tcon->need_reconnect && !cifs_file->invalidHandle) { 722 struct TCP_Server_Info *server = tcon->ses->server; 723 unsigned int xid; 724 int rc = 0; 725 726 xid = get_xid(); 727 if (server->ops->close_getattr) 728 rc = server->ops->close_getattr(xid, tcon, cifs_file); 729 else if (server->ops->close) 730 rc = server->ops->close(xid, tcon, &cifs_file->fid); 731 _free_xid(xid); 732 733 if (rc == -EBUSY || rc == -EAGAIN) { 734 // Server close failed, hence offloading it as an async op 735 queue_work(serverclose_wq, &cifs_file->serverclose); 736 serverclose_offloaded = true; 737 } 738 } 739 740 if (oplock_break_cancelled) 741 cifs_done_oplock_break(cifsi); 742 743 cifs_del_pending_open(&open); 744 745 // if serverclose has been offloaded to wq (on failure), it will 746 // handle offloading put as well. If serverclose not offloaded, 747 // we need to handle offloading put here. 748 if (!serverclose_offloaded) { 749 if (offload) 750 queue_work(fileinfo_put_wq, &cifs_file->put); 751 else 752 cifsFileInfo_put_final(cifs_file); 753 } 754 } 755 756 int cifs_open(struct inode *inode, struct file *file) 757 758 { 759 int rc = -EACCES; 760 unsigned int xid; 761 __u32 oplock; 762 struct cifs_sb_info *cifs_sb; 763 struct TCP_Server_Info *server; 764 struct cifs_tcon *tcon; 765 struct tcon_link *tlink; 766 struct cifsFileInfo *cfile = NULL; 767 void *page; 768 const char *full_path; 769 bool posix_open_ok = false; 770 struct cifs_fid fid = {}; 771 struct cifs_pending_open open; 772 struct cifs_open_info_data data = {}; 773 774 xid = get_xid(); 775 776 cifs_sb = CIFS_SB(inode->i_sb); 777 if (unlikely(cifs_forced_shutdown(cifs_sb))) { 778 free_xid(xid); 779 return -EIO; 780 } 781 782 tlink = cifs_sb_tlink(cifs_sb); 783 if (IS_ERR(tlink)) { 784 free_xid(xid); 785 return PTR_ERR(tlink); 786 } 787 tcon = tlink_tcon(tlink); 788 server = tcon->ses->server; 789 790 page = alloc_dentry_path(); 791 full_path = build_path_from_dentry(file_dentry(file), page); 792 if (IS_ERR(full_path)) { 793 rc = PTR_ERR(full_path); 794 goto out; 795 } 796 797 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n", 798 inode, file->f_flags, full_path); 799 800 if (file->f_flags & O_DIRECT && 801 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { 802 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) 803 file->f_op = &cifs_file_direct_nobrl_ops; 804 else 805 file->f_op = &cifs_file_direct_ops; 806 } 807 808 /* Get the cached handle as SMB2 close is deferred */ 809 rc = cifs_get_readable_path(tcon, full_path, &cfile); 810 if (rc == 0) { 811 if (file->f_flags == cfile->f_flags) { 812 file->private_data = cfile; 813 spin_lock(&CIFS_I(inode)->deferred_lock); 814 cifs_del_deferred_close(cfile); 815 spin_unlock(&CIFS_I(inode)->deferred_lock); 816 goto use_cache; 817 } else { 818 _cifsFileInfo_put(cfile, true, false); 819 } 820 } 821 822 if (server->oplocks) 823 oplock = REQ_OPLOCK; 824 else 825 oplock = 0; 826 827 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 828 if (!tcon->broken_posix_open && tcon->unix_ext && 829 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & 830 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 831 /* can not refresh inode info since size could be stale */ 832 rc = cifs_posix_open(full_path, &inode, inode->i_sb, 833 cifs_sb->ctx->file_mode /* ignored */, 834 file->f_flags, &oplock, &fid.netfid, xid); 835 if (rc == 0) { 836 cifs_dbg(FYI, "posix open succeeded\n"); 837 posix_open_ok = true; 838 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 839 if (tcon->ses->serverNOS) 840 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n", 841 tcon->ses->ip_addr, 842 tcon->ses->serverNOS); 843 tcon->broken_posix_open = true; 844 } else if ((rc != -EIO) && (rc != -EREMOTE) && 845 (rc != -EOPNOTSUPP)) /* path not found or net err */ 846 goto out; 847 /* 848 * Else fallthrough to retry open the old way on network i/o 849 * or DFS errors. 850 */ 851 } 852 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 853 854 if (server->ops->get_lease_key) 855 server->ops->get_lease_key(inode, &fid); 856 857 cifs_add_pending_open(&fid, tlink, &open); 858 859 if (!posix_open_ok) { 860 if (server->ops->get_lease_key) 861 server->ops->get_lease_key(inode, &fid); 862 863 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid, 864 xid, &data); 865 if (rc) { 866 cifs_del_pending_open(&open); 867 goto out; 868 } 869 } 870 871 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target); 872 if (cfile == NULL) { 873 if (server->ops->close) 874 server->ops->close(xid, tcon, &fid); 875 cifs_del_pending_open(&open); 876 rc = -ENOMEM; 877 goto out; 878 } 879 880 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 881 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) { 882 /* 883 * Time to set mode which we can not set earlier due to 884 * problems creating new read-only files. 885 */ 886 struct cifs_unix_set_info_args args = { 887 .mode = inode->i_mode, 888 .uid = INVALID_UID, /* no change */ 889 .gid = INVALID_GID, /* no change */ 890 .ctime = NO_CHANGE_64, 891 .atime = NO_CHANGE_64, 892 .mtime = NO_CHANGE_64, 893 .device = 0, 894 }; 895 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid, 896 cfile->pid); 897 } 898 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 899 900 use_cache: 901 fscache_use_cookie(cifs_inode_cookie(file_inode(file)), 902 file->f_mode & FMODE_WRITE); 903 if (!(file->f_flags & O_DIRECT)) 904 goto out; 905 if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY) 906 goto out; 907 cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE); 908 909 out: 910 free_dentry_path(page); 911 free_xid(xid); 912 cifs_put_tlink(tlink); 913 cifs_free_open_info(&data); 914 return rc; 915 } 916 917 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 918 static int cifs_push_posix_locks(struct cifsFileInfo *cfile); 919 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 920 921 /* 922 * Try to reacquire byte range locks that were released when session 923 * to server was lost. 924 */ 925 static int 926 cifs_relock_file(struct cifsFileInfo *cfile) 927 { 928 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 929 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 930 int rc = 0; 931 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 932 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 933 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 934 935 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING); 936 if (cinode->can_cache_brlcks) { 937 /* can cache locks - no need to relock */ 938 up_read(&cinode->lock_sem); 939 return rc; 940 } 941 942 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 943 if (cap_unix(tcon->ses) && 944 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 945 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 946 rc = cifs_push_posix_locks(cfile); 947 else 948 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 949 rc = tcon->ses->server->ops->push_mand_locks(cfile); 950 951 up_read(&cinode->lock_sem); 952 return rc; 953 } 954 955 static int 956 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) 957 { 958 int rc = -EACCES; 959 unsigned int xid; 960 __u32 oplock; 961 struct cifs_sb_info *cifs_sb; 962 struct cifs_tcon *tcon; 963 struct TCP_Server_Info *server; 964 struct cifsInodeInfo *cinode; 965 struct inode *inode; 966 void *page; 967 const char *full_path; 968 int desired_access; 969 int disposition = FILE_OPEN; 970 int create_options = CREATE_NOT_DIR; 971 struct cifs_open_parms oparms; 972 int rdwr_for_fscache = 0; 973 974 xid = get_xid(); 975 mutex_lock(&cfile->fh_mutex); 976 if (!cfile->invalidHandle) { 977 mutex_unlock(&cfile->fh_mutex); 978 free_xid(xid); 979 return 0; 980 } 981 982 inode = d_inode(cfile->dentry); 983 cifs_sb = CIFS_SB(inode->i_sb); 984 tcon = tlink_tcon(cfile->tlink); 985 server = tcon->ses->server; 986 987 /* 988 * Can not grab rename sem here because various ops, including those 989 * that already have the rename sem can end up causing writepage to get 990 * called and if the server was down that means we end up here, and we 991 * can never tell if the caller already has the rename_sem. 992 */ 993 page = alloc_dentry_path(); 994 full_path = build_path_from_dentry(cfile->dentry, page); 995 if (IS_ERR(full_path)) { 996 mutex_unlock(&cfile->fh_mutex); 997 free_dentry_path(page); 998 free_xid(xid); 999 return PTR_ERR(full_path); 1000 } 1001 1002 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n", 1003 inode, cfile->f_flags, full_path); 1004 1005 if (tcon->ses->server->oplocks) 1006 oplock = REQ_OPLOCK; 1007 else 1008 oplock = 0; 1009 1010 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1011 if (tcon->unix_ext && cap_unix(tcon->ses) && 1012 (CIFS_UNIX_POSIX_PATH_OPS_CAP & 1013 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 1014 /* 1015 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the 1016 * original open. Must mask them off for a reopen. 1017 */ 1018 unsigned int oflags = cfile->f_flags & 1019 ~(O_CREAT | O_EXCL | O_TRUNC); 1020 1021 rc = cifs_posix_open(full_path, NULL, inode->i_sb, 1022 cifs_sb->ctx->file_mode /* ignored */, 1023 oflags, &oplock, &cfile->fid.netfid, xid); 1024 if (rc == 0) { 1025 cifs_dbg(FYI, "posix reopen succeeded\n"); 1026 oparms.reconnect = true; 1027 goto reopen_success; 1028 } 1029 /* 1030 * fallthrough to retry open the old way on errors, especially 1031 * in the reconnect path it is important to retry hard 1032 */ 1033 } 1034 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1035 1036 /* If we're caching, we need to be able to fill in around partial writes. */ 1037 if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY) 1038 rdwr_for_fscache = 1; 1039 1040 desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache); 1041 1042 /* O_SYNC also has bit for O_DSYNC so following check picks up either */ 1043 if (cfile->f_flags & O_SYNC) 1044 create_options |= CREATE_WRITE_THROUGH; 1045 1046 if (cfile->f_flags & O_DIRECT) 1047 create_options |= CREATE_NO_BUFFER; 1048 1049 if (server->ops->get_lease_key) 1050 server->ops->get_lease_key(inode, &cfile->fid); 1051 1052 retry_open: 1053 oparms = (struct cifs_open_parms) { 1054 .tcon = tcon, 1055 .cifs_sb = cifs_sb, 1056 .desired_access = desired_access, 1057 .create_options = cifs_create_options(cifs_sb, create_options), 1058 .disposition = disposition, 1059 .path = full_path, 1060 .fid = &cfile->fid, 1061 .reconnect = true, 1062 }; 1063 1064 /* 1065 * Can not refresh inode by passing in file_info buf to be returned by 1066 * ops->open and then calling get_inode_info with returned buf since 1067 * file might have write behind data that needs to be flushed and server 1068 * version of file size can be stale. If we knew for sure that inode was 1069 * not dirty locally we could do this. 1070 */ 1071 rc = server->ops->open(xid, &oparms, &oplock, NULL); 1072 if (rc == -ENOENT && oparms.reconnect == false) { 1073 /* durable handle timeout is expired - open the file again */ 1074 rc = server->ops->open(xid, &oparms, &oplock, NULL); 1075 /* indicate that we need to relock the file */ 1076 oparms.reconnect = true; 1077 } 1078 if (rc == -EACCES && rdwr_for_fscache == 1) { 1079 desired_access = cifs_convert_flags(cfile->f_flags, 0); 1080 rdwr_for_fscache = 2; 1081 goto retry_open; 1082 } 1083 1084 if (rc) { 1085 mutex_unlock(&cfile->fh_mutex); 1086 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc); 1087 cifs_dbg(FYI, "oplock: %d\n", oplock); 1088 goto reopen_error_exit; 1089 } 1090 1091 if (rdwr_for_fscache == 2) 1092 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); 1093 1094 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1095 reopen_success: 1096 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1097 cfile->invalidHandle = false; 1098 mutex_unlock(&cfile->fh_mutex); 1099 cinode = CIFS_I(inode); 1100 1101 if (can_flush) { 1102 rc = filemap_write_and_wait(inode->i_mapping); 1103 if (!is_interrupt_error(rc)) 1104 mapping_set_error(inode->i_mapping, rc); 1105 1106 if (tcon->posix_extensions) { 1107 rc = smb311_posix_get_inode_info(&inode, full_path, 1108 NULL, inode->i_sb, xid); 1109 } else if (tcon->unix_ext) { 1110 rc = cifs_get_inode_info_unix(&inode, full_path, 1111 inode->i_sb, xid); 1112 } else { 1113 rc = cifs_get_inode_info(&inode, full_path, NULL, 1114 inode->i_sb, xid, NULL); 1115 } 1116 } 1117 /* 1118 * Else we are writing out data to server already and could deadlock if 1119 * we tried to flush data, and since we do not know if we have data that 1120 * would invalidate the current end of file on the server we can not go 1121 * to the server to get the new inode info. 1122 */ 1123 1124 /* 1125 * If the server returned a read oplock and we have mandatory brlocks, 1126 * set oplock level to None. 1127 */ 1128 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) { 1129 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); 1130 oplock = 0; 1131 } 1132 1133 server->ops->set_fid(cfile, &cfile->fid, oplock); 1134 if (oparms.reconnect) 1135 cifs_relock_file(cfile); 1136 1137 reopen_error_exit: 1138 free_dentry_path(page); 1139 free_xid(xid); 1140 return rc; 1141 } 1142 1143 void smb2_deferred_work_close(struct work_struct *work) 1144 { 1145 struct cifsFileInfo *cfile = container_of(work, 1146 struct cifsFileInfo, deferred.work); 1147 1148 spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); 1149 cifs_del_deferred_close(cfile); 1150 cfile->deferred_close_scheduled = false; 1151 spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); 1152 _cifsFileInfo_put(cfile, true, false); 1153 } 1154 1155 static bool 1156 smb2_can_defer_close(struct inode *inode, struct cifs_deferred_close *dclose) 1157 { 1158 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1159 struct cifsInodeInfo *cinode = CIFS_I(inode); 1160 1161 return (cifs_sb->ctx->closetimeo && cinode->lease_granted && dclose && 1162 (cinode->oplock == CIFS_CACHE_RHW_FLG || 1163 cinode->oplock == CIFS_CACHE_RH_FLG) && 1164 !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags)); 1165 1166 } 1167 1168 int cifs_close(struct inode *inode, struct file *file) 1169 { 1170 struct cifsFileInfo *cfile; 1171 struct cifsInodeInfo *cinode = CIFS_I(inode); 1172 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1173 struct cifs_deferred_close *dclose; 1174 1175 cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE); 1176 1177 if (file->private_data != NULL) { 1178 cfile = file->private_data; 1179 file->private_data = NULL; 1180 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL); 1181 if ((cfile->status_file_deleted == false) && 1182 (smb2_can_defer_close(inode, dclose))) { 1183 if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) { 1184 inode_set_mtime_to_ts(inode, 1185 inode_set_ctime_current(inode)); 1186 } 1187 spin_lock(&cinode->deferred_lock); 1188 cifs_add_deferred_close(cfile, dclose); 1189 if (cfile->deferred_close_scheduled && 1190 delayed_work_pending(&cfile->deferred)) { 1191 /* 1192 * If there is no pending work, mod_delayed_work queues new work. 1193 * So, Increase the ref count to avoid use-after-free. 1194 */ 1195 if (!mod_delayed_work(deferredclose_wq, 1196 &cfile->deferred, cifs_sb->ctx->closetimeo)) 1197 cifsFileInfo_get(cfile); 1198 } else { 1199 /* Deferred close for files */ 1200 queue_delayed_work(deferredclose_wq, 1201 &cfile->deferred, cifs_sb->ctx->closetimeo); 1202 cfile->deferred_close_scheduled = true; 1203 spin_unlock(&cinode->deferred_lock); 1204 return 0; 1205 } 1206 spin_unlock(&cinode->deferred_lock); 1207 _cifsFileInfo_put(cfile, true, false); 1208 } else { 1209 _cifsFileInfo_put(cfile, true, false); 1210 kfree(dclose); 1211 } 1212 } 1213 1214 /* return code from the ->release op is always ignored */ 1215 return 0; 1216 } 1217 1218 void 1219 cifs_reopen_persistent_handles(struct cifs_tcon *tcon) 1220 { 1221 struct cifsFileInfo *open_file, *tmp; 1222 struct list_head tmp_list; 1223 1224 if (!tcon->use_persistent || !tcon->need_reopen_files) 1225 return; 1226 1227 tcon->need_reopen_files = false; 1228 1229 cifs_dbg(FYI, "Reopen persistent handles\n"); 1230 INIT_LIST_HEAD(&tmp_list); 1231 1232 /* list all files open on tree connection, reopen resilient handles */ 1233 spin_lock(&tcon->open_file_lock); 1234 list_for_each_entry(open_file, &tcon->openFileList, tlist) { 1235 if (!open_file->invalidHandle) 1236 continue; 1237 cifsFileInfo_get(open_file); 1238 list_add_tail(&open_file->rlist, &tmp_list); 1239 } 1240 spin_unlock(&tcon->open_file_lock); 1241 1242 list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) { 1243 if (cifs_reopen_file(open_file, false /* do not flush */)) 1244 tcon->need_reopen_files = true; 1245 list_del_init(&open_file->rlist); 1246 cifsFileInfo_put(open_file); 1247 } 1248 } 1249 1250 int cifs_closedir(struct inode *inode, struct file *file) 1251 { 1252 int rc = 0; 1253 unsigned int xid; 1254 struct cifsFileInfo *cfile = file->private_data; 1255 struct cifs_tcon *tcon; 1256 struct TCP_Server_Info *server; 1257 char *buf; 1258 1259 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode); 1260 1261 if (cfile == NULL) 1262 return rc; 1263 1264 xid = get_xid(); 1265 tcon = tlink_tcon(cfile->tlink); 1266 server = tcon->ses->server; 1267 1268 cifs_dbg(FYI, "Freeing private data in close dir\n"); 1269 spin_lock(&cfile->file_info_lock); 1270 if (server->ops->dir_needs_close(cfile)) { 1271 cfile->invalidHandle = true; 1272 spin_unlock(&cfile->file_info_lock); 1273 if (server->ops->close_dir) 1274 rc = server->ops->close_dir(xid, tcon, &cfile->fid); 1275 else 1276 rc = -ENOSYS; 1277 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc); 1278 /* not much we can do if it fails anyway, ignore rc */ 1279 rc = 0; 1280 } else 1281 spin_unlock(&cfile->file_info_lock); 1282 1283 buf = cfile->srch_inf.ntwrk_buf_start; 1284 if (buf) { 1285 cifs_dbg(FYI, "closedir free smb buf in srch struct\n"); 1286 cfile->srch_inf.ntwrk_buf_start = NULL; 1287 if (cfile->srch_inf.smallBuf) 1288 cifs_small_buf_release(buf); 1289 else 1290 cifs_buf_release(buf); 1291 } 1292 1293 cifs_put_tlink(cfile->tlink); 1294 kfree(file->private_data); 1295 file->private_data = NULL; 1296 /* BB can we lock the filestruct while this is going on? */ 1297 free_xid(xid); 1298 return rc; 1299 } 1300 1301 static struct cifsLockInfo * 1302 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags) 1303 { 1304 struct cifsLockInfo *lock = 1305 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL); 1306 if (!lock) 1307 return lock; 1308 lock->offset = offset; 1309 lock->length = length; 1310 lock->type = type; 1311 lock->pid = current->tgid; 1312 lock->flags = flags; 1313 INIT_LIST_HEAD(&lock->blist); 1314 init_waitqueue_head(&lock->block_q); 1315 return lock; 1316 } 1317 1318 void 1319 cifs_del_lock_waiters(struct cifsLockInfo *lock) 1320 { 1321 struct cifsLockInfo *li, *tmp; 1322 list_for_each_entry_safe(li, tmp, &lock->blist, blist) { 1323 list_del_init(&li->blist); 1324 wake_up(&li->block_q); 1325 } 1326 } 1327 1328 #define CIFS_LOCK_OP 0 1329 #define CIFS_READ_OP 1 1330 #define CIFS_WRITE_OP 2 1331 1332 /* @rw_check : 0 - no op, 1 - read, 2 - write */ 1333 static bool 1334 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset, 1335 __u64 length, __u8 type, __u16 flags, 1336 struct cifsFileInfo *cfile, 1337 struct cifsLockInfo **conf_lock, int rw_check) 1338 { 1339 struct cifsLockInfo *li; 1340 struct cifsFileInfo *cur_cfile = fdlocks->cfile; 1341 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 1342 1343 list_for_each_entry(li, &fdlocks->locks, llist) { 1344 if (offset + length <= li->offset || 1345 offset >= li->offset + li->length) 1346 continue; 1347 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid && 1348 server->ops->compare_fids(cfile, cur_cfile)) { 1349 /* shared lock prevents write op through the same fid */ 1350 if (!(li->type & server->vals->shared_lock_type) || 1351 rw_check != CIFS_WRITE_OP) 1352 continue; 1353 } 1354 if ((type & server->vals->shared_lock_type) && 1355 ((server->ops->compare_fids(cfile, cur_cfile) && 1356 current->tgid == li->pid) || type == li->type)) 1357 continue; 1358 if (rw_check == CIFS_LOCK_OP && 1359 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) && 1360 server->ops->compare_fids(cfile, cur_cfile)) 1361 continue; 1362 if (conf_lock) 1363 *conf_lock = li; 1364 return true; 1365 } 1366 return false; 1367 } 1368 1369 bool 1370 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length, 1371 __u8 type, __u16 flags, 1372 struct cifsLockInfo **conf_lock, int rw_check) 1373 { 1374 bool rc = false; 1375 struct cifs_fid_locks *cur; 1376 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1377 1378 list_for_each_entry(cur, &cinode->llist, llist) { 1379 rc = cifs_find_fid_lock_conflict(cur, offset, length, type, 1380 flags, cfile, conf_lock, 1381 rw_check); 1382 if (rc) 1383 break; 1384 } 1385 1386 return rc; 1387 } 1388 1389 /* 1390 * Check if there is another lock that prevents us to set the lock (mandatory 1391 * style). If such a lock exists, update the flock structure with its 1392 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks 1393 * or leave it the same if we can't. Returns 0 if we don't need to request to 1394 * the server or 1 otherwise. 1395 */ 1396 static int 1397 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length, 1398 __u8 type, struct file_lock *flock) 1399 { 1400 int rc = 0; 1401 struct cifsLockInfo *conf_lock; 1402 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1403 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 1404 bool exist; 1405 1406 down_read(&cinode->lock_sem); 1407 1408 exist = cifs_find_lock_conflict(cfile, offset, length, type, 1409 flock->fl_flags, &conf_lock, 1410 CIFS_LOCK_OP); 1411 if (exist) { 1412 flock->fl_start = conf_lock->offset; 1413 flock->fl_end = conf_lock->offset + conf_lock->length - 1; 1414 flock->fl_pid = conf_lock->pid; 1415 if (conf_lock->type & server->vals->shared_lock_type) 1416 flock->fl_type = F_RDLCK; 1417 else 1418 flock->fl_type = F_WRLCK; 1419 } else if (!cinode->can_cache_brlcks) 1420 rc = 1; 1421 else 1422 flock->fl_type = F_UNLCK; 1423 1424 up_read(&cinode->lock_sem); 1425 return rc; 1426 } 1427 1428 static void 1429 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock) 1430 { 1431 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1432 cifs_down_write(&cinode->lock_sem); 1433 list_add_tail(&lock->llist, &cfile->llist->locks); 1434 up_write(&cinode->lock_sem); 1435 } 1436 1437 /* 1438 * Set the byte-range lock (mandatory style). Returns: 1439 * 1) 0, if we set the lock and don't need to request to the server; 1440 * 2) 1, if no locks prevent us but we need to request to the server; 1441 * 3) -EACCES, if there is a lock that prevents us and wait is false. 1442 */ 1443 static int 1444 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock, 1445 bool wait) 1446 { 1447 struct cifsLockInfo *conf_lock; 1448 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1449 bool exist; 1450 int rc = 0; 1451 1452 try_again: 1453 exist = false; 1454 cifs_down_write(&cinode->lock_sem); 1455 1456 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, 1457 lock->type, lock->flags, &conf_lock, 1458 CIFS_LOCK_OP); 1459 if (!exist && cinode->can_cache_brlcks) { 1460 list_add_tail(&lock->llist, &cfile->llist->locks); 1461 up_write(&cinode->lock_sem); 1462 return rc; 1463 } 1464 1465 if (!exist) 1466 rc = 1; 1467 else if (!wait) 1468 rc = -EACCES; 1469 else { 1470 list_add_tail(&lock->blist, &conf_lock->blist); 1471 up_write(&cinode->lock_sem); 1472 rc = wait_event_interruptible(lock->block_q, 1473 (lock->blist.prev == &lock->blist) && 1474 (lock->blist.next == &lock->blist)); 1475 if (!rc) 1476 goto try_again; 1477 cifs_down_write(&cinode->lock_sem); 1478 list_del_init(&lock->blist); 1479 } 1480 1481 up_write(&cinode->lock_sem); 1482 return rc; 1483 } 1484 1485 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1486 /* 1487 * Check if there is another lock that prevents us to set the lock (posix 1488 * style). If such a lock exists, update the flock structure with its 1489 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks 1490 * or leave it the same if we can't. Returns 0 if we don't need to request to 1491 * the server or 1 otherwise. 1492 */ 1493 static int 1494 cifs_posix_lock_test(struct file *file, struct file_lock *flock) 1495 { 1496 int rc = 0; 1497 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); 1498 unsigned char saved_type = flock->fl_type; 1499 1500 if ((flock->fl_flags & FL_POSIX) == 0) 1501 return 1; 1502 1503 down_read(&cinode->lock_sem); 1504 posix_test_lock(file, flock); 1505 1506 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) { 1507 flock->fl_type = saved_type; 1508 rc = 1; 1509 } 1510 1511 up_read(&cinode->lock_sem); 1512 return rc; 1513 } 1514 1515 /* 1516 * Set the byte-range lock (posix style). Returns: 1517 * 1) <0, if the error occurs while setting the lock; 1518 * 2) 0, if we set the lock and don't need to request to the server; 1519 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock; 1520 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server. 1521 */ 1522 static int 1523 cifs_posix_lock_set(struct file *file, struct file_lock *flock) 1524 { 1525 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); 1526 int rc = FILE_LOCK_DEFERRED + 1; 1527 1528 if ((flock->fl_flags & FL_POSIX) == 0) 1529 return rc; 1530 1531 cifs_down_write(&cinode->lock_sem); 1532 if (!cinode->can_cache_brlcks) { 1533 up_write(&cinode->lock_sem); 1534 return rc; 1535 } 1536 1537 rc = posix_lock_file(file, flock, NULL); 1538 up_write(&cinode->lock_sem); 1539 return rc; 1540 } 1541 1542 int 1543 cifs_push_mandatory_locks(struct cifsFileInfo *cfile) 1544 { 1545 unsigned int xid; 1546 int rc = 0, stored_rc; 1547 struct cifsLockInfo *li, *tmp; 1548 struct cifs_tcon *tcon; 1549 unsigned int num, max_num, max_buf; 1550 LOCKING_ANDX_RANGE *buf, *cur; 1551 static const int types[] = { 1552 LOCKING_ANDX_LARGE_FILES, 1553 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES 1554 }; 1555 int i; 1556 1557 xid = get_xid(); 1558 tcon = tlink_tcon(cfile->tlink); 1559 1560 /* 1561 * Accessing maxBuf is racy with cifs_reconnect - need to store value 1562 * and check it before using. 1563 */ 1564 max_buf = tcon->ses->server->maxBuf; 1565 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) { 1566 free_xid(xid); 1567 return -EINVAL; 1568 } 1569 1570 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > 1571 PAGE_SIZE); 1572 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), 1573 PAGE_SIZE); 1574 max_num = (max_buf - sizeof(struct smb_hdr)) / 1575 sizeof(LOCKING_ANDX_RANGE); 1576 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 1577 if (!buf) { 1578 free_xid(xid); 1579 return -ENOMEM; 1580 } 1581 1582 for (i = 0; i < 2; i++) { 1583 cur = buf; 1584 num = 0; 1585 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { 1586 if (li->type != types[i]) 1587 continue; 1588 cur->Pid = cpu_to_le16(li->pid); 1589 cur->LengthLow = cpu_to_le32((u32)li->length); 1590 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); 1591 cur->OffsetLow = cpu_to_le32((u32)li->offset); 1592 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); 1593 if (++num == max_num) { 1594 stored_rc = cifs_lockv(xid, tcon, 1595 cfile->fid.netfid, 1596 (__u8)li->type, 0, num, 1597 buf); 1598 if (stored_rc) 1599 rc = stored_rc; 1600 cur = buf; 1601 num = 0; 1602 } else 1603 cur++; 1604 } 1605 1606 if (num) { 1607 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, 1608 (__u8)types[i], 0, num, buf); 1609 if (stored_rc) 1610 rc = stored_rc; 1611 } 1612 } 1613 1614 kfree(buf); 1615 free_xid(xid); 1616 return rc; 1617 } 1618 1619 static __u32 1620 hash_lockowner(fl_owner_t owner) 1621 { 1622 return cifs_lock_secret ^ hash32_ptr((const void *)owner); 1623 } 1624 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1625 1626 struct lock_to_push { 1627 struct list_head llist; 1628 __u64 offset; 1629 __u64 length; 1630 __u32 pid; 1631 __u16 netfid; 1632 __u8 type; 1633 }; 1634 1635 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1636 static int 1637 cifs_push_posix_locks(struct cifsFileInfo *cfile) 1638 { 1639 struct inode *inode = d_inode(cfile->dentry); 1640 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1641 struct file_lock *flock; 1642 struct file_lock_context *flctx = locks_inode_context(inode); 1643 unsigned int count = 0, i; 1644 int rc = 0, xid, type; 1645 struct list_head locks_to_send, *el; 1646 struct lock_to_push *lck, *tmp; 1647 __u64 length; 1648 1649 xid = get_xid(); 1650 1651 if (!flctx) 1652 goto out; 1653 1654 spin_lock(&flctx->flc_lock); 1655 list_for_each(el, &flctx->flc_posix) { 1656 count++; 1657 } 1658 spin_unlock(&flctx->flc_lock); 1659 1660 INIT_LIST_HEAD(&locks_to_send); 1661 1662 /* 1663 * Allocating count locks is enough because no FL_POSIX locks can be 1664 * added to the list while we are holding cinode->lock_sem that 1665 * protects locking operations of this inode. 1666 */ 1667 for (i = 0; i < count; i++) { 1668 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); 1669 if (!lck) { 1670 rc = -ENOMEM; 1671 goto err_out; 1672 } 1673 list_add_tail(&lck->llist, &locks_to_send); 1674 } 1675 1676 el = locks_to_send.next; 1677 spin_lock(&flctx->flc_lock); 1678 list_for_each_entry(flock, &flctx->flc_posix, fl_list) { 1679 if (el == &locks_to_send) { 1680 /* 1681 * The list ended. We don't have enough allocated 1682 * structures - something is really wrong. 1683 */ 1684 cifs_dbg(VFS, "Can't push all brlocks!\n"); 1685 break; 1686 } 1687 length = cifs_flock_len(flock); 1688 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK) 1689 type = CIFS_RDLCK; 1690 else 1691 type = CIFS_WRLCK; 1692 lck = list_entry(el, struct lock_to_push, llist); 1693 lck->pid = hash_lockowner(flock->fl_owner); 1694 lck->netfid = cfile->fid.netfid; 1695 lck->length = length; 1696 lck->type = type; 1697 lck->offset = flock->fl_start; 1698 } 1699 spin_unlock(&flctx->flc_lock); 1700 1701 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { 1702 int stored_rc; 1703 1704 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid, 1705 lck->offset, lck->length, NULL, 1706 lck->type, 0); 1707 if (stored_rc) 1708 rc = stored_rc; 1709 list_del(&lck->llist); 1710 kfree(lck); 1711 } 1712 1713 out: 1714 free_xid(xid); 1715 return rc; 1716 err_out: 1717 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { 1718 list_del(&lck->llist); 1719 kfree(lck); 1720 } 1721 goto out; 1722 } 1723 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1724 1725 static int 1726 cifs_push_locks(struct cifsFileInfo *cfile) 1727 { 1728 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1729 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1730 int rc = 0; 1731 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1732 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 1733 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1734 1735 /* we are going to update can_cache_brlcks here - need a write access */ 1736 cifs_down_write(&cinode->lock_sem); 1737 if (!cinode->can_cache_brlcks) { 1738 up_write(&cinode->lock_sem); 1739 return rc; 1740 } 1741 1742 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1743 if (cap_unix(tcon->ses) && 1744 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 1745 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 1746 rc = cifs_push_posix_locks(cfile); 1747 else 1748 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1749 rc = tcon->ses->server->ops->push_mand_locks(cfile); 1750 1751 cinode->can_cache_brlcks = false; 1752 up_write(&cinode->lock_sem); 1753 return rc; 1754 } 1755 1756 static void 1757 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock, 1758 bool *wait_flag, struct TCP_Server_Info *server) 1759 { 1760 if (flock->fl_flags & FL_POSIX) 1761 cifs_dbg(FYI, "Posix\n"); 1762 if (flock->fl_flags & FL_FLOCK) 1763 cifs_dbg(FYI, "Flock\n"); 1764 if (flock->fl_flags & FL_SLEEP) { 1765 cifs_dbg(FYI, "Blocking lock\n"); 1766 *wait_flag = true; 1767 } 1768 if (flock->fl_flags & FL_ACCESS) 1769 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n"); 1770 if (flock->fl_flags & FL_LEASE) 1771 cifs_dbg(FYI, "Lease on file - not implemented yet\n"); 1772 if (flock->fl_flags & 1773 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | 1774 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK))) 1775 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags); 1776 1777 *type = server->vals->large_lock_type; 1778 if (flock->fl_type == F_WRLCK) { 1779 cifs_dbg(FYI, "F_WRLCK\n"); 1780 *type |= server->vals->exclusive_lock_type; 1781 *lock = 1; 1782 } else if (flock->fl_type == F_UNLCK) { 1783 cifs_dbg(FYI, "F_UNLCK\n"); 1784 *type |= server->vals->unlock_lock_type; 1785 *unlock = 1; 1786 /* Check if unlock includes more than one lock range */ 1787 } else if (flock->fl_type == F_RDLCK) { 1788 cifs_dbg(FYI, "F_RDLCK\n"); 1789 *type |= server->vals->shared_lock_type; 1790 *lock = 1; 1791 } else if (flock->fl_type == F_EXLCK) { 1792 cifs_dbg(FYI, "F_EXLCK\n"); 1793 *type |= server->vals->exclusive_lock_type; 1794 *lock = 1; 1795 } else if (flock->fl_type == F_SHLCK) { 1796 cifs_dbg(FYI, "F_SHLCK\n"); 1797 *type |= server->vals->shared_lock_type; 1798 *lock = 1; 1799 } else 1800 cifs_dbg(FYI, "Unknown type of lock\n"); 1801 } 1802 1803 static int 1804 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, 1805 bool wait_flag, bool posix_lck, unsigned int xid) 1806 { 1807 int rc = 0; 1808 __u64 length = cifs_flock_len(flock); 1809 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 1810 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1811 struct TCP_Server_Info *server = tcon->ses->server; 1812 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1813 __u16 netfid = cfile->fid.netfid; 1814 1815 if (posix_lck) { 1816 int posix_lock_type; 1817 1818 rc = cifs_posix_lock_test(file, flock); 1819 if (!rc) 1820 return rc; 1821 1822 if (type & server->vals->shared_lock_type) 1823 posix_lock_type = CIFS_RDLCK; 1824 else 1825 posix_lock_type = CIFS_WRLCK; 1826 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1827 hash_lockowner(flock->fl_owner), 1828 flock->fl_start, length, flock, 1829 posix_lock_type, wait_flag); 1830 return rc; 1831 } 1832 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1833 1834 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock); 1835 if (!rc) 1836 return rc; 1837 1838 /* BB we could chain these into one lock request BB */ 1839 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type, 1840 1, 0, false); 1841 if (rc == 0) { 1842 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1843 type, 0, 1, false); 1844 flock->fl_type = F_UNLCK; 1845 if (rc != 0) 1846 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", 1847 rc); 1848 return 0; 1849 } 1850 1851 if (type & server->vals->shared_lock_type) { 1852 flock->fl_type = F_WRLCK; 1853 return 0; 1854 } 1855 1856 type &= ~server->vals->exclusive_lock_type; 1857 1858 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1859 type | server->vals->shared_lock_type, 1860 1, 0, false); 1861 if (rc == 0) { 1862 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1863 type | server->vals->shared_lock_type, 0, 1, false); 1864 flock->fl_type = F_RDLCK; 1865 if (rc != 0) 1866 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", 1867 rc); 1868 } else 1869 flock->fl_type = F_WRLCK; 1870 1871 return 0; 1872 } 1873 1874 void 1875 cifs_move_llist(struct list_head *source, struct list_head *dest) 1876 { 1877 struct list_head *li, *tmp; 1878 list_for_each_safe(li, tmp, source) 1879 list_move(li, dest); 1880 } 1881 1882 void 1883 cifs_free_llist(struct list_head *llist) 1884 { 1885 struct cifsLockInfo *li, *tmp; 1886 list_for_each_entry_safe(li, tmp, llist, llist) { 1887 cifs_del_lock_waiters(li); 1888 list_del(&li->llist); 1889 kfree(li); 1890 } 1891 } 1892 1893 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1894 int 1895 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, 1896 unsigned int xid) 1897 { 1898 int rc = 0, stored_rc; 1899 static const int types[] = { 1900 LOCKING_ANDX_LARGE_FILES, 1901 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES 1902 }; 1903 unsigned int i; 1904 unsigned int max_num, num, max_buf; 1905 LOCKING_ANDX_RANGE *buf, *cur; 1906 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1907 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1908 struct cifsLockInfo *li, *tmp; 1909 __u64 length = cifs_flock_len(flock); 1910 struct list_head tmp_llist; 1911 1912 INIT_LIST_HEAD(&tmp_llist); 1913 1914 /* 1915 * Accessing maxBuf is racy with cifs_reconnect - need to store value 1916 * and check it before using. 1917 */ 1918 max_buf = tcon->ses->server->maxBuf; 1919 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) 1920 return -EINVAL; 1921 1922 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > 1923 PAGE_SIZE); 1924 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), 1925 PAGE_SIZE); 1926 max_num = (max_buf - sizeof(struct smb_hdr)) / 1927 sizeof(LOCKING_ANDX_RANGE); 1928 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 1929 if (!buf) 1930 return -ENOMEM; 1931 1932 cifs_down_write(&cinode->lock_sem); 1933 for (i = 0; i < 2; i++) { 1934 cur = buf; 1935 num = 0; 1936 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { 1937 if (flock->fl_start > li->offset || 1938 (flock->fl_start + length) < 1939 (li->offset + li->length)) 1940 continue; 1941 if (current->tgid != li->pid) 1942 continue; 1943 if (types[i] != li->type) 1944 continue; 1945 if (cinode->can_cache_brlcks) { 1946 /* 1947 * We can cache brlock requests - simply remove 1948 * a lock from the file's list. 1949 */ 1950 list_del(&li->llist); 1951 cifs_del_lock_waiters(li); 1952 kfree(li); 1953 continue; 1954 } 1955 cur->Pid = cpu_to_le16(li->pid); 1956 cur->LengthLow = cpu_to_le32((u32)li->length); 1957 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); 1958 cur->OffsetLow = cpu_to_le32((u32)li->offset); 1959 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); 1960 /* 1961 * We need to save a lock here to let us add it again to 1962 * the file's list if the unlock range request fails on 1963 * the server. 1964 */ 1965 list_move(&li->llist, &tmp_llist); 1966 if (++num == max_num) { 1967 stored_rc = cifs_lockv(xid, tcon, 1968 cfile->fid.netfid, 1969 li->type, num, 0, buf); 1970 if (stored_rc) { 1971 /* 1972 * We failed on the unlock range 1973 * request - add all locks from the tmp 1974 * list to the head of the file's list. 1975 */ 1976 cifs_move_llist(&tmp_llist, 1977 &cfile->llist->locks); 1978 rc = stored_rc; 1979 } else 1980 /* 1981 * The unlock range request succeed - 1982 * free the tmp list. 1983 */ 1984 cifs_free_llist(&tmp_llist); 1985 cur = buf; 1986 num = 0; 1987 } else 1988 cur++; 1989 } 1990 if (num) { 1991 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, 1992 types[i], num, 0, buf); 1993 if (stored_rc) { 1994 cifs_move_llist(&tmp_llist, 1995 &cfile->llist->locks); 1996 rc = stored_rc; 1997 } else 1998 cifs_free_llist(&tmp_llist); 1999 } 2000 } 2001 2002 up_write(&cinode->lock_sem); 2003 kfree(buf); 2004 return rc; 2005 } 2006 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 2007 2008 static int 2009 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, 2010 bool wait_flag, bool posix_lck, int lock, int unlock, 2011 unsigned int xid) 2012 { 2013 int rc = 0; 2014 __u64 length = cifs_flock_len(flock); 2015 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 2016 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 2017 struct TCP_Server_Info *server = tcon->ses->server; 2018 struct inode *inode = d_inode(cfile->dentry); 2019 2020 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 2021 if (posix_lck) { 2022 int posix_lock_type; 2023 2024 rc = cifs_posix_lock_set(file, flock); 2025 if (rc <= FILE_LOCK_DEFERRED) 2026 return rc; 2027 2028 if (type & server->vals->shared_lock_type) 2029 posix_lock_type = CIFS_RDLCK; 2030 else 2031 posix_lock_type = CIFS_WRLCK; 2032 2033 if (unlock == 1) 2034 posix_lock_type = CIFS_UNLCK; 2035 2036 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid, 2037 hash_lockowner(flock->fl_owner), 2038 flock->fl_start, length, 2039 NULL, posix_lock_type, wait_flag); 2040 goto out; 2041 } 2042 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 2043 if (lock) { 2044 struct cifsLockInfo *lock; 2045 2046 lock = cifs_lock_init(flock->fl_start, length, type, 2047 flock->fl_flags); 2048 if (!lock) 2049 return -ENOMEM; 2050 2051 rc = cifs_lock_add_if(cfile, lock, wait_flag); 2052 if (rc < 0) { 2053 kfree(lock); 2054 return rc; 2055 } 2056 if (!rc) 2057 goto out; 2058 2059 /* 2060 * Windows 7 server can delay breaking lease from read to None 2061 * if we set a byte-range lock on a file - break it explicitly 2062 * before sending the lock to the server to be sure the next 2063 * read won't conflict with non-overlapted locks due to 2064 * pagereading. 2065 */ 2066 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) && 2067 CIFS_CACHE_READ(CIFS_I(inode))) { 2068 cifs_zap_mapping(inode); 2069 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n", 2070 inode); 2071 CIFS_I(inode)->oplock = 0; 2072 } 2073 2074 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 2075 type, 1, 0, wait_flag); 2076 if (rc) { 2077 kfree(lock); 2078 return rc; 2079 } 2080 2081 cifs_lock_add(cfile, lock); 2082 } else if (unlock) 2083 rc = server->ops->mand_unlock_range(cfile, flock, xid); 2084 2085 out: 2086 if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) { 2087 /* 2088 * If this is a request to remove all locks because we 2089 * are closing the file, it doesn't matter if the 2090 * unlocking failed as both cifs.ko and the SMB server 2091 * remove the lock on file close 2092 */ 2093 if (rc) { 2094 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc); 2095 if (!(flock->fl_flags & FL_CLOSE)) 2096 return rc; 2097 } 2098 rc = locks_lock_file_wait(file, flock); 2099 } 2100 return rc; 2101 } 2102 2103 int cifs_flock(struct file *file, int cmd, struct file_lock *fl) 2104 { 2105 int rc, xid; 2106 int lock = 0, unlock = 0; 2107 bool wait_flag = false; 2108 bool posix_lck = false; 2109 struct cifs_sb_info *cifs_sb; 2110 struct cifs_tcon *tcon; 2111 struct cifsFileInfo *cfile; 2112 __u32 type; 2113 2114 xid = get_xid(); 2115 2116 if (!(fl->fl_flags & FL_FLOCK)) { 2117 rc = -ENOLCK; 2118 free_xid(xid); 2119 return rc; 2120 } 2121 2122 cfile = (struct cifsFileInfo *)file->private_data; 2123 tcon = tlink_tcon(cfile->tlink); 2124 2125 cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag, 2126 tcon->ses->server); 2127 cifs_sb = CIFS_FILE_SB(file); 2128 2129 if (cap_unix(tcon->ses) && 2130 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 2131 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2132 posix_lck = true; 2133 2134 if (!lock && !unlock) { 2135 /* 2136 * if no lock or unlock then nothing to do since we do not 2137 * know what it is 2138 */ 2139 rc = -EOPNOTSUPP; 2140 free_xid(xid); 2141 return rc; 2142 } 2143 2144 rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock, 2145 xid); 2146 free_xid(xid); 2147 return rc; 2148 2149 2150 } 2151 2152 int cifs_lock(struct file *file, int cmd, struct file_lock *flock) 2153 { 2154 int rc, xid; 2155 int lock = 0, unlock = 0; 2156 bool wait_flag = false; 2157 bool posix_lck = false; 2158 struct cifs_sb_info *cifs_sb; 2159 struct cifs_tcon *tcon; 2160 struct cifsFileInfo *cfile; 2161 __u32 type; 2162 2163 rc = -EACCES; 2164 xid = get_xid(); 2165 2166 cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd, 2167 flock->fl_flags, flock->fl_type, (long long)flock->fl_start, 2168 (long long)flock->fl_end); 2169 2170 cfile = (struct cifsFileInfo *)file->private_data; 2171 tcon = tlink_tcon(cfile->tlink); 2172 2173 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag, 2174 tcon->ses->server); 2175 cifs_sb = CIFS_FILE_SB(file); 2176 set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags); 2177 2178 if (cap_unix(tcon->ses) && 2179 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 2180 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2181 posix_lck = true; 2182 /* 2183 * BB add code here to normalize offset and length to account for 2184 * negative length which we can not accept over the wire. 2185 */ 2186 if (IS_GETLK(cmd)) { 2187 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid); 2188 free_xid(xid); 2189 return rc; 2190 } 2191 2192 if (!lock && !unlock) { 2193 /* 2194 * if no lock or unlock then nothing to do since we do not 2195 * know what it is 2196 */ 2197 free_xid(xid); 2198 return -EOPNOTSUPP; 2199 } 2200 2201 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock, 2202 xid); 2203 free_xid(xid); 2204 return rc; 2205 } 2206 2207 /* 2208 * update the file size (if needed) after a write. Should be called with 2209 * the inode->i_lock held 2210 */ 2211 void 2212 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, 2213 unsigned int bytes_written) 2214 { 2215 loff_t end_of_write = offset + bytes_written; 2216 2217 if (end_of_write > cifsi->server_eof) 2218 cifsi->server_eof = end_of_write; 2219 } 2220 2221 static ssize_t 2222 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data, 2223 size_t write_size, loff_t *offset) 2224 { 2225 int rc = 0; 2226 unsigned int bytes_written = 0; 2227 unsigned int total_written; 2228 struct cifs_tcon *tcon; 2229 struct TCP_Server_Info *server; 2230 unsigned int xid; 2231 struct dentry *dentry = open_file->dentry; 2232 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry)); 2233 struct cifs_io_parms io_parms = {0}; 2234 2235 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n", 2236 write_size, *offset, dentry); 2237 2238 tcon = tlink_tcon(open_file->tlink); 2239 server = tcon->ses->server; 2240 2241 if (!server->ops->sync_write) 2242 return -ENOSYS; 2243 2244 xid = get_xid(); 2245 2246 for (total_written = 0; write_size > total_written; 2247 total_written += bytes_written) { 2248 rc = -EAGAIN; 2249 while (rc == -EAGAIN) { 2250 struct kvec iov[2]; 2251 unsigned int len; 2252 2253 if (open_file->invalidHandle) { 2254 /* we could deadlock if we called 2255 filemap_fdatawait from here so tell 2256 reopen_file not to flush data to 2257 server now */ 2258 rc = cifs_reopen_file(open_file, false); 2259 if (rc != 0) 2260 break; 2261 } 2262 2263 len = min(server->ops->wp_retry_size(d_inode(dentry)), 2264 (unsigned int)write_size - total_written); 2265 /* iov[0] is reserved for smb header */ 2266 iov[1].iov_base = (char *)write_data + total_written; 2267 iov[1].iov_len = len; 2268 io_parms.pid = pid; 2269 io_parms.tcon = tcon; 2270 io_parms.offset = *offset; 2271 io_parms.length = len; 2272 rc = server->ops->sync_write(xid, &open_file->fid, 2273 &io_parms, &bytes_written, iov, 1); 2274 } 2275 if (rc || (bytes_written == 0)) { 2276 if (total_written) 2277 break; 2278 else { 2279 free_xid(xid); 2280 return rc; 2281 } 2282 } else { 2283 spin_lock(&d_inode(dentry)->i_lock); 2284 cifs_update_eof(cifsi, *offset, bytes_written); 2285 spin_unlock(&d_inode(dentry)->i_lock); 2286 *offset += bytes_written; 2287 } 2288 } 2289 2290 cifs_stats_bytes_written(tcon, total_written); 2291 2292 if (total_written > 0) { 2293 spin_lock(&d_inode(dentry)->i_lock); 2294 if (*offset > d_inode(dentry)->i_size) { 2295 i_size_write(d_inode(dentry), *offset); 2296 d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9; 2297 } 2298 spin_unlock(&d_inode(dentry)->i_lock); 2299 } 2300 mark_inode_dirty_sync(d_inode(dentry)); 2301 free_xid(xid); 2302 return total_written; 2303 } 2304 2305 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, 2306 bool fsuid_only) 2307 { 2308 struct cifsFileInfo *open_file = NULL; 2309 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); 2310 2311 /* only filter by fsuid on multiuser mounts */ 2312 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 2313 fsuid_only = false; 2314 2315 spin_lock(&cifs_inode->open_file_lock); 2316 /* we could simply get the first_list_entry since write-only entries 2317 are always at the end of the list but since the first entry might 2318 have a close pending, we go through the whole list */ 2319 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 2320 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) 2321 continue; 2322 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) { 2323 if ((!open_file->invalidHandle)) { 2324 /* found a good file */ 2325 /* lock it so it will not be closed on us */ 2326 cifsFileInfo_get(open_file); 2327 spin_unlock(&cifs_inode->open_file_lock); 2328 return open_file; 2329 } /* else might as well continue, and look for 2330 another, or simply have the caller reopen it 2331 again rather than trying to fix this handle */ 2332 } else /* write only file */ 2333 break; /* write only files are last so must be done */ 2334 } 2335 spin_unlock(&cifs_inode->open_file_lock); 2336 return NULL; 2337 } 2338 2339 /* Return -EBADF if no handle is found and general rc otherwise */ 2340 int 2341 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, 2342 struct cifsFileInfo **ret_file) 2343 { 2344 struct cifsFileInfo *open_file, *inv_file = NULL; 2345 struct cifs_sb_info *cifs_sb; 2346 bool any_available = false; 2347 int rc = -EBADF; 2348 unsigned int refind = 0; 2349 bool fsuid_only = flags & FIND_WR_FSUID_ONLY; 2350 bool with_delete = flags & FIND_WR_WITH_DELETE; 2351 *ret_file = NULL; 2352 2353 /* 2354 * Having a null inode here (because mapping->host was set to zero by 2355 * the VFS or MM) should not happen but we had reports of on oops (due 2356 * to it being zero) during stress testcases so we need to check for it 2357 */ 2358 2359 if (cifs_inode == NULL) { 2360 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n"); 2361 dump_stack(); 2362 return rc; 2363 } 2364 2365 cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); 2366 2367 /* only filter by fsuid on multiuser mounts */ 2368 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 2369 fsuid_only = false; 2370 2371 spin_lock(&cifs_inode->open_file_lock); 2372 refind_writable: 2373 if (refind > MAX_REOPEN_ATT) { 2374 spin_unlock(&cifs_inode->open_file_lock); 2375 return rc; 2376 } 2377 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 2378 if (!any_available && open_file->pid != current->tgid) 2379 continue; 2380 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) 2381 continue; 2382 if (with_delete && !(open_file->fid.access & DELETE)) 2383 continue; 2384 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { 2385 if (!open_file->invalidHandle) { 2386 /* found a good writable file */ 2387 cifsFileInfo_get(open_file); 2388 spin_unlock(&cifs_inode->open_file_lock); 2389 *ret_file = open_file; 2390 return 0; 2391 } else { 2392 if (!inv_file) 2393 inv_file = open_file; 2394 } 2395 } 2396 } 2397 /* couldn't find useable FH with same pid, try any available */ 2398 if (!any_available) { 2399 any_available = true; 2400 goto refind_writable; 2401 } 2402 2403 if (inv_file) { 2404 any_available = false; 2405 cifsFileInfo_get(inv_file); 2406 } 2407 2408 spin_unlock(&cifs_inode->open_file_lock); 2409 2410 if (inv_file) { 2411 rc = cifs_reopen_file(inv_file, false); 2412 if (!rc) { 2413 *ret_file = inv_file; 2414 return 0; 2415 } 2416 2417 spin_lock(&cifs_inode->open_file_lock); 2418 list_move_tail(&inv_file->flist, &cifs_inode->openFileList); 2419 spin_unlock(&cifs_inode->open_file_lock); 2420 cifsFileInfo_put(inv_file); 2421 ++refind; 2422 inv_file = NULL; 2423 spin_lock(&cifs_inode->open_file_lock); 2424 goto refind_writable; 2425 } 2426 2427 return rc; 2428 } 2429 2430 struct cifsFileInfo * 2431 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags) 2432 { 2433 struct cifsFileInfo *cfile; 2434 int rc; 2435 2436 rc = cifs_get_writable_file(cifs_inode, flags, &cfile); 2437 if (rc) 2438 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc); 2439 2440 return cfile; 2441 } 2442 2443 int 2444 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, 2445 int flags, 2446 struct cifsFileInfo **ret_file) 2447 { 2448 struct cifsFileInfo *cfile; 2449 void *page = alloc_dentry_path(); 2450 2451 *ret_file = NULL; 2452 2453 spin_lock(&tcon->open_file_lock); 2454 list_for_each_entry(cfile, &tcon->openFileList, tlist) { 2455 struct cifsInodeInfo *cinode; 2456 const char *full_path = build_path_from_dentry(cfile->dentry, page); 2457 if (IS_ERR(full_path)) { 2458 spin_unlock(&tcon->open_file_lock); 2459 free_dentry_path(page); 2460 return PTR_ERR(full_path); 2461 } 2462 if (strcmp(full_path, name)) 2463 continue; 2464 2465 cinode = CIFS_I(d_inode(cfile->dentry)); 2466 spin_unlock(&tcon->open_file_lock); 2467 free_dentry_path(page); 2468 return cifs_get_writable_file(cinode, flags, ret_file); 2469 } 2470 2471 spin_unlock(&tcon->open_file_lock); 2472 free_dentry_path(page); 2473 return -ENOENT; 2474 } 2475 2476 int 2477 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, 2478 struct cifsFileInfo **ret_file) 2479 { 2480 struct cifsFileInfo *cfile; 2481 void *page = alloc_dentry_path(); 2482 2483 *ret_file = NULL; 2484 2485 spin_lock(&tcon->open_file_lock); 2486 list_for_each_entry(cfile, &tcon->openFileList, tlist) { 2487 struct cifsInodeInfo *cinode; 2488 const char *full_path = build_path_from_dentry(cfile->dentry, page); 2489 if (IS_ERR(full_path)) { 2490 spin_unlock(&tcon->open_file_lock); 2491 free_dentry_path(page); 2492 return PTR_ERR(full_path); 2493 } 2494 if (strcmp(full_path, name)) 2495 continue; 2496 2497 cinode = CIFS_I(d_inode(cfile->dentry)); 2498 spin_unlock(&tcon->open_file_lock); 2499 free_dentry_path(page); 2500 *ret_file = find_readable_file(cinode, 0); 2501 return *ret_file ? 0 : -ENOENT; 2502 } 2503 2504 spin_unlock(&tcon->open_file_lock); 2505 free_dentry_path(page); 2506 return -ENOENT; 2507 } 2508 2509 void 2510 cifs_writedata_release(struct kref *refcount) 2511 { 2512 struct cifs_writedata *wdata = container_of(refcount, 2513 struct cifs_writedata, refcount); 2514 #ifdef CONFIG_CIFS_SMB_DIRECT 2515 if (wdata->mr) { 2516 smbd_deregister_mr(wdata->mr); 2517 wdata->mr = NULL; 2518 } 2519 #endif 2520 2521 if (wdata->cfile) 2522 cifsFileInfo_put(wdata->cfile); 2523 2524 kfree(wdata); 2525 } 2526 2527 /* 2528 * Write failed with a retryable error. Resend the write request. It's also 2529 * possible that the page was redirtied so re-clean the page. 2530 */ 2531 static void 2532 cifs_writev_requeue(struct cifs_writedata *wdata) 2533 { 2534 int rc = 0; 2535 struct inode *inode = d_inode(wdata->cfile->dentry); 2536 struct TCP_Server_Info *server; 2537 unsigned int rest_len = wdata->bytes; 2538 loff_t fpos = wdata->offset; 2539 2540 server = tlink_tcon(wdata->cfile->tlink)->ses->server; 2541 do { 2542 struct cifs_writedata *wdata2; 2543 unsigned int wsize, cur_len; 2544 2545 wsize = server->ops->wp_retry_size(inode); 2546 if (wsize < rest_len) { 2547 if (wsize < PAGE_SIZE) { 2548 rc = -EOPNOTSUPP; 2549 break; 2550 } 2551 cur_len = min(round_down(wsize, PAGE_SIZE), rest_len); 2552 } else { 2553 cur_len = rest_len; 2554 } 2555 2556 wdata2 = cifs_writedata_alloc(cifs_writev_complete); 2557 if (!wdata2) { 2558 rc = -ENOMEM; 2559 break; 2560 } 2561 2562 wdata2->sync_mode = wdata->sync_mode; 2563 wdata2->offset = fpos; 2564 wdata2->bytes = cur_len; 2565 wdata2->iter = wdata->iter; 2566 2567 iov_iter_advance(&wdata2->iter, fpos - wdata->offset); 2568 iov_iter_truncate(&wdata2->iter, wdata2->bytes); 2569 2570 if (iov_iter_is_xarray(&wdata2->iter)) 2571 /* Check for pages having been redirtied and clean 2572 * them. We can do this by walking the xarray. If 2573 * it's not an xarray, then it's a DIO and we shouldn't 2574 * be mucking around with the page bits. 2575 */ 2576 cifs_undirty_folios(inode, fpos, cur_len); 2577 2578 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, 2579 &wdata2->cfile); 2580 if (!wdata2->cfile) { 2581 cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n", 2582 rc); 2583 if (!is_retryable_error(rc)) 2584 rc = -EBADF; 2585 } else { 2586 wdata2->pid = wdata2->cfile->pid; 2587 rc = server->ops->async_writev(wdata2, 2588 cifs_writedata_release); 2589 } 2590 2591 kref_put(&wdata2->refcount, cifs_writedata_release); 2592 if (rc) { 2593 if (is_retryable_error(rc)) 2594 continue; 2595 fpos += cur_len; 2596 rest_len -= cur_len; 2597 break; 2598 } 2599 2600 fpos += cur_len; 2601 rest_len -= cur_len; 2602 } while (rest_len > 0); 2603 2604 /* Clean up remaining pages from the original wdata */ 2605 if (iov_iter_is_xarray(&wdata->iter)) 2606 cifs_pages_write_failed(inode, fpos, rest_len); 2607 2608 if (rc != 0 && !is_retryable_error(rc)) 2609 mapping_set_error(inode->i_mapping, rc); 2610 kref_put(&wdata->refcount, cifs_writedata_release); 2611 } 2612 2613 void 2614 cifs_writev_complete(struct work_struct *work) 2615 { 2616 struct cifs_writedata *wdata = container_of(work, 2617 struct cifs_writedata, work); 2618 struct inode *inode = d_inode(wdata->cfile->dentry); 2619 2620 if (wdata->result == 0) { 2621 spin_lock(&inode->i_lock); 2622 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes); 2623 spin_unlock(&inode->i_lock); 2624 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink), 2625 wdata->bytes); 2626 } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN) 2627 return cifs_writev_requeue(wdata); 2628 2629 if (wdata->result == -EAGAIN) 2630 cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes); 2631 else if (wdata->result < 0) 2632 cifs_pages_write_failed(inode, wdata->offset, wdata->bytes); 2633 else 2634 cifs_pages_written_back(inode, wdata->offset, wdata->bytes); 2635 2636 if (wdata->result != -EAGAIN) 2637 mapping_set_error(inode->i_mapping, wdata->result); 2638 kref_put(&wdata->refcount, cifs_writedata_release); 2639 } 2640 2641 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete) 2642 { 2643 struct cifs_writedata *wdata; 2644 2645 wdata = kzalloc(sizeof(*wdata), GFP_NOFS); 2646 if (wdata != NULL) { 2647 kref_init(&wdata->refcount); 2648 INIT_LIST_HEAD(&wdata->list); 2649 init_completion(&wdata->done); 2650 INIT_WORK(&wdata->work, complete); 2651 } 2652 return wdata; 2653 } 2654 2655 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) 2656 { 2657 struct address_space *mapping = page->mapping; 2658 loff_t offset = (loff_t)page->index << PAGE_SHIFT; 2659 char *write_data; 2660 int rc = -EFAULT; 2661 int bytes_written = 0; 2662 struct inode *inode; 2663 struct cifsFileInfo *open_file; 2664 2665 if (!mapping || !mapping->host) 2666 return -EFAULT; 2667 2668 inode = page->mapping->host; 2669 2670 offset += (loff_t)from; 2671 write_data = kmap(page); 2672 write_data += from; 2673 2674 if ((to > PAGE_SIZE) || (from > to)) { 2675 kunmap(page); 2676 return -EIO; 2677 } 2678 2679 /* racing with truncate? */ 2680 if (offset > mapping->host->i_size) { 2681 kunmap(page); 2682 return 0; /* don't care */ 2683 } 2684 2685 /* check to make sure that we are not extending the file */ 2686 if (mapping->host->i_size - offset < (loff_t)to) 2687 to = (unsigned)(mapping->host->i_size - offset); 2688 2689 rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY, 2690 &open_file); 2691 if (!rc) { 2692 bytes_written = cifs_write(open_file, open_file->pid, 2693 write_data, to - from, &offset); 2694 cifsFileInfo_put(open_file); 2695 /* Does mm or vfs already set times? */ 2696 simple_inode_init_ts(inode); 2697 if ((bytes_written > 0) && (offset)) 2698 rc = 0; 2699 else if (bytes_written < 0) 2700 rc = bytes_written; 2701 else 2702 rc = -EFAULT; 2703 } else { 2704 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc); 2705 if (!is_retryable_error(rc)) 2706 rc = -EIO; 2707 } 2708 2709 kunmap(page); 2710 return rc; 2711 } 2712 2713 /* 2714 * Extend the region to be written back to include subsequent contiguously 2715 * dirty pages if possible, but don't sleep while doing so. 2716 */ 2717 static void cifs_extend_writeback(struct address_space *mapping, 2718 struct xa_state *xas, 2719 long *_count, 2720 loff_t start, 2721 int max_pages, 2722 loff_t max_len, 2723 size_t *_len) 2724 { 2725 struct folio_batch batch; 2726 struct folio *folio; 2727 unsigned int nr_pages; 2728 pgoff_t index = (start + *_len) / PAGE_SIZE; 2729 size_t len; 2730 bool stop = true; 2731 unsigned int i; 2732 2733 folio_batch_init(&batch); 2734 2735 do { 2736 /* Firstly, we gather up a batch of contiguous dirty pages 2737 * under the RCU read lock - but we can't clear the dirty flags 2738 * there if any of those pages are mapped. 2739 */ 2740 rcu_read_lock(); 2741 2742 xas_for_each(xas, folio, ULONG_MAX) { 2743 stop = true; 2744 if (xas_retry(xas, folio)) 2745 continue; 2746 if (xa_is_value(folio)) 2747 break; 2748 if (folio->index != index) { 2749 xas_reset(xas); 2750 break; 2751 } 2752 2753 if (!folio_try_get_rcu(folio)) { 2754 xas_reset(xas); 2755 continue; 2756 } 2757 nr_pages = folio_nr_pages(folio); 2758 if (nr_pages > max_pages) { 2759 xas_reset(xas); 2760 break; 2761 } 2762 2763 /* Has the page moved or been split? */ 2764 if (unlikely(folio != xas_reload(xas))) { 2765 folio_put(folio); 2766 xas_reset(xas); 2767 break; 2768 } 2769 2770 if (!folio_trylock(folio)) { 2771 folio_put(folio); 2772 xas_reset(xas); 2773 break; 2774 } 2775 if (!folio_test_dirty(folio) || 2776 folio_test_writeback(folio)) { 2777 folio_unlock(folio); 2778 folio_put(folio); 2779 xas_reset(xas); 2780 break; 2781 } 2782 2783 max_pages -= nr_pages; 2784 len = folio_size(folio); 2785 stop = false; 2786 2787 index += nr_pages; 2788 *_count -= nr_pages; 2789 *_len += len; 2790 if (max_pages <= 0 || *_len >= max_len || *_count <= 0) 2791 stop = true; 2792 2793 if (!folio_batch_add(&batch, folio)) 2794 break; 2795 if (stop) 2796 break; 2797 } 2798 2799 xas_pause(xas); 2800 rcu_read_unlock(); 2801 2802 /* Now, if we obtained any pages, we can shift them to being 2803 * writable and mark them for caching. 2804 */ 2805 if (!folio_batch_count(&batch)) 2806 break; 2807 2808 for (i = 0; i < folio_batch_count(&batch); i++) { 2809 folio = batch.folios[i]; 2810 /* The folio should be locked, dirty and not undergoing 2811 * writeback from the loop above. 2812 */ 2813 if (!folio_clear_dirty_for_io(folio)) 2814 WARN_ON(1); 2815 folio_start_writeback(folio); 2816 folio_unlock(folio); 2817 } 2818 2819 folio_batch_release(&batch); 2820 cond_resched(); 2821 } while (!stop); 2822 } 2823 2824 /* 2825 * Write back the locked page and any subsequent non-locked dirty pages. 2826 */ 2827 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping, 2828 struct writeback_control *wbc, 2829 struct xa_state *xas, 2830 struct folio *folio, 2831 unsigned long long start, 2832 unsigned long long end) 2833 { 2834 struct inode *inode = mapping->host; 2835 struct TCP_Server_Info *server; 2836 struct cifs_writedata *wdata; 2837 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 2838 struct cifs_credits credits_on_stack; 2839 struct cifs_credits *credits = &credits_on_stack; 2840 struct cifsFileInfo *cfile = NULL; 2841 unsigned long long i_size = i_size_read(inode), max_len; 2842 unsigned int xid, wsize; 2843 size_t len = folio_size(folio); 2844 long count = wbc->nr_to_write; 2845 int rc; 2846 2847 /* The folio should be locked, dirty and not undergoing writeback. */ 2848 if (!folio_clear_dirty_for_io(folio)) 2849 WARN_ON_ONCE(1); 2850 folio_start_writeback(folio); 2851 2852 count -= folio_nr_pages(folio); 2853 2854 xid = get_xid(); 2855 server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses); 2856 2857 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile); 2858 if (rc) { 2859 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc); 2860 goto err_xid; 2861 } 2862 2863 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, 2864 &wsize, credits); 2865 if (rc != 0) 2866 goto err_close; 2867 2868 wdata = cifs_writedata_alloc(cifs_writev_complete); 2869 if (!wdata) { 2870 rc = -ENOMEM; 2871 goto err_uncredit; 2872 } 2873 2874 wdata->sync_mode = wbc->sync_mode; 2875 wdata->offset = folio_pos(folio); 2876 wdata->pid = cfile->pid; 2877 wdata->credits = credits_on_stack; 2878 wdata->cfile = cfile; 2879 wdata->server = server; 2880 cfile = NULL; 2881 2882 /* Find all consecutive lockable dirty pages that have contiguous 2883 * written regions, stopping when we find a page that is not 2884 * immediately lockable, is not dirty or is missing, or we reach the 2885 * end of the range. 2886 */ 2887 if (start < i_size) { 2888 /* Trim the write to the EOF; the extra data is ignored. Also 2889 * put an upper limit on the size of a single storedata op. 2890 */ 2891 max_len = wsize; 2892 max_len = min_t(unsigned long long, max_len, end - start + 1); 2893 max_len = min_t(unsigned long long, max_len, i_size - start); 2894 2895 if (len < max_len) { 2896 int max_pages = INT_MAX; 2897 2898 #ifdef CONFIG_CIFS_SMB_DIRECT 2899 if (server->smbd_conn) 2900 max_pages = server->smbd_conn->max_frmr_depth; 2901 #endif 2902 max_pages -= folio_nr_pages(folio); 2903 2904 if (max_pages > 0) 2905 cifs_extend_writeback(mapping, xas, &count, start, 2906 max_pages, max_len, &len); 2907 } 2908 } 2909 len = min_t(unsigned long long, len, i_size - start); 2910 2911 /* We now have a contiguous set of dirty pages, each with writeback 2912 * set; the first page is still locked at this point, but all the rest 2913 * have been unlocked. 2914 */ 2915 folio_unlock(folio); 2916 wdata->bytes = len; 2917 2918 if (start < i_size) { 2919 iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages, 2920 start, len); 2921 2922 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes); 2923 if (rc) 2924 goto err_wdata; 2925 2926 if (wdata->cfile->invalidHandle) 2927 rc = -EAGAIN; 2928 else 2929 rc = wdata->server->ops->async_writev(wdata, 2930 cifs_writedata_release); 2931 if (rc >= 0) { 2932 kref_put(&wdata->refcount, cifs_writedata_release); 2933 goto err_close; 2934 } 2935 } else { 2936 /* The dirty region was entirely beyond the EOF. */ 2937 cifs_pages_written_back(inode, start, len); 2938 rc = 0; 2939 } 2940 2941 err_wdata: 2942 kref_put(&wdata->refcount, cifs_writedata_release); 2943 err_uncredit: 2944 add_credits_and_wake_if(server, credits, 0); 2945 err_close: 2946 if (cfile) 2947 cifsFileInfo_put(cfile); 2948 err_xid: 2949 free_xid(xid); 2950 if (rc == 0) { 2951 wbc->nr_to_write = count; 2952 rc = len; 2953 } else if (is_retryable_error(rc)) { 2954 cifs_pages_write_redirty(inode, start, len); 2955 } else { 2956 cifs_pages_write_failed(inode, start, len); 2957 mapping_set_error(mapping, rc); 2958 } 2959 /* Indication to update ctime and mtime as close is deferred */ 2960 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 2961 return rc; 2962 } 2963 2964 /* 2965 * write a region of pages back to the server 2966 */ 2967 static ssize_t cifs_writepages_begin(struct address_space *mapping, 2968 struct writeback_control *wbc, 2969 struct xa_state *xas, 2970 unsigned long long *_start, 2971 unsigned long long end) 2972 { 2973 struct folio *folio; 2974 unsigned long long start = *_start; 2975 ssize_t ret; 2976 int skips = 0; 2977 2978 search_again: 2979 /* Find the first dirty page. */ 2980 rcu_read_lock(); 2981 2982 for (;;) { 2983 folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY); 2984 if (xas_retry(xas, folio) || xa_is_value(folio)) 2985 continue; 2986 if (!folio) 2987 break; 2988 2989 if (!folio_try_get_rcu(folio)) { 2990 xas_reset(xas); 2991 continue; 2992 } 2993 2994 if (unlikely(folio != xas_reload(xas))) { 2995 folio_put(folio); 2996 xas_reset(xas); 2997 continue; 2998 } 2999 3000 xas_pause(xas); 3001 break; 3002 } 3003 rcu_read_unlock(); 3004 if (!folio) 3005 return 0; 3006 3007 start = folio_pos(folio); /* May regress with THPs */ 3008 3009 /* At this point we hold neither the i_pages lock nor the page lock: 3010 * the page may be truncated or invalidated (changing page->mapping to 3011 * NULL), or even swizzled back from swapper_space to tmpfs file 3012 * mapping 3013 */ 3014 lock_again: 3015 if (wbc->sync_mode != WB_SYNC_NONE) { 3016 ret = folio_lock_killable(folio); 3017 if (ret < 0) 3018 return ret; 3019 } else { 3020 if (!folio_trylock(folio)) 3021 goto search_again; 3022 } 3023 3024 if (folio->mapping != mapping || 3025 !folio_test_dirty(folio)) { 3026 start += folio_size(folio); 3027 folio_unlock(folio); 3028 goto search_again; 3029 } 3030 3031 if (folio_test_writeback(folio) || 3032 folio_test_fscache(folio)) { 3033 folio_unlock(folio); 3034 if (wbc->sync_mode != WB_SYNC_NONE) { 3035 folio_wait_writeback(folio); 3036 #ifdef CONFIG_CIFS_FSCACHE 3037 folio_wait_fscache(folio); 3038 #endif 3039 goto lock_again; 3040 } 3041 3042 start += folio_size(folio); 3043 if (wbc->sync_mode == WB_SYNC_NONE) { 3044 if (skips >= 5 || need_resched()) { 3045 ret = 0; 3046 goto out; 3047 } 3048 skips++; 3049 } 3050 goto search_again; 3051 } 3052 3053 ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end); 3054 out: 3055 if (ret > 0) 3056 *_start = start + ret; 3057 return ret; 3058 } 3059 3060 /* 3061 * Write a region of pages back to the server 3062 */ 3063 static int cifs_writepages_region(struct address_space *mapping, 3064 struct writeback_control *wbc, 3065 unsigned long long *_start, 3066 unsigned long long end) 3067 { 3068 ssize_t ret; 3069 3070 XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE); 3071 3072 do { 3073 ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end); 3074 if (ret > 0 && wbc->nr_to_write > 0) 3075 cond_resched(); 3076 } while (ret > 0 && wbc->nr_to_write > 0); 3077 3078 return ret > 0 ? 0 : ret; 3079 } 3080 3081 /* 3082 * Write some of the pending data back to the server 3083 */ 3084 static int cifs_writepages(struct address_space *mapping, 3085 struct writeback_control *wbc) 3086 { 3087 loff_t start, end; 3088 int ret; 3089 3090 /* We have to be careful as we can end up racing with setattr() 3091 * truncating the pagecache since the caller doesn't take a lock here 3092 * to prevent it. 3093 */ 3094 3095 if (wbc->range_cyclic && mapping->writeback_index) { 3096 start = mapping->writeback_index * PAGE_SIZE; 3097 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX); 3098 if (ret < 0) 3099 goto out; 3100 3101 if (wbc->nr_to_write <= 0) { 3102 mapping->writeback_index = start / PAGE_SIZE; 3103 goto out; 3104 } 3105 3106 start = 0; 3107 end = mapping->writeback_index * PAGE_SIZE; 3108 mapping->writeback_index = 0; 3109 ret = cifs_writepages_region(mapping, wbc, &start, end); 3110 if (ret == 0) 3111 mapping->writeback_index = start / PAGE_SIZE; 3112 } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { 3113 start = 0; 3114 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX); 3115 if (wbc->nr_to_write > 0 && ret == 0) 3116 mapping->writeback_index = start / PAGE_SIZE; 3117 } else { 3118 start = wbc->range_start; 3119 ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end); 3120 } 3121 3122 out: 3123 return ret; 3124 } 3125 3126 static int 3127 cifs_writepage_locked(struct page *page, struct writeback_control *wbc) 3128 { 3129 int rc; 3130 unsigned int xid; 3131 3132 xid = get_xid(); 3133 /* BB add check for wbc flags */ 3134 get_page(page); 3135 if (!PageUptodate(page)) 3136 cifs_dbg(FYI, "ppw - page not up to date\n"); 3137 3138 /* 3139 * Set the "writeback" flag, and clear "dirty" in the radix tree. 3140 * 3141 * A writepage() implementation always needs to do either this, 3142 * or re-dirty the page with "redirty_page_for_writepage()" in 3143 * the case of a failure. 3144 * 3145 * Just unlocking the page will cause the radix tree tag-bits 3146 * to fail to update with the state of the page correctly. 3147 */ 3148 set_page_writeback(page); 3149 retry_write: 3150 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE); 3151 if (is_retryable_error(rc)) { 3152 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) 3153 goto retry_write; 3154 redirty_page_for_writepage(wbc, page); 3155 } else if (rc != 0) { 3156 SetPageError(page); 3157 mapping_set_error(page->mapping, rc); 3158 } else { 3159 SetPageUptodate(page); 3160 } 3161 end_page_writeback(page); 3162 put_page(page); 3163 free_xid(xid); 3164 return rc; 3165 } 3166 3167 static int cifs_write_end(struct file *file, struct address_space *mapping, 3168 loff_t pos, unsigned len, unsigned copied, 3169 struct page *page, void *fsdata) 3170 { 3171 int rc; 3172 struct inode *inode = mapping->host; 3173 struct cifsFileInfo *cfile = file->private_data; 3174 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 3175 struct folio *folio = page_folio(page); 3176 __u32 pid; 3177 3178 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3179 pid = cfile->pid; 3180 else 3181 pid = current->tgid; 3182 3183 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n", 3184 page, pos, copied); 3185 3186 if (folio_test_checked(folio)) { 3187 if (copied == len) 3188 folio_mark_uptodate(folio); 3189 folio_clear_checked(folio); 3190 } else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE) 3191 folio_mark_uptodate(folio); 3192 3193 if (!folio_test_uptodate(folio)) { 3194 char *page_data; 3195 unsigned offset = pos & (PAGE_SIZE - 1); 3196 unsigned int xid; 3197 3198 xid = get_xid(); 3199 /* this is probably better than directly calling 3200 partialpage_write since in this function the file handle is 3201 known which we might as well leverage */ 3202 /* BB check if anything else missing out of ppw 3203 such as updating last write time */ 3204 page_data = kmap(page); 3205 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos); 3206 /* if (rc < 0) should we set writebehind rc? */ 3207 kunmap(page); 3208 3209 free_xid(xid); 3210 } else { 3211 rc = copied; 3212 pos += copied; 3213 set_page_dirty(page); 3214 } 3215 3216 if (rc > 0) { 3217 spin_lock(&inode->i_lock); 3218 if (pos > inode->i_size) { 3219 loff_t additional_blocks = (512 - 1 + copied) >> 9; 3220 3221 i_size_write(inode, pos); 3222 /* 3223 * Estimate new allocation size based on the amount written. 3224 * This will be updated from server on close (and on queryinfo) 3225 */ 3226 inode->i_blocks = min_t(blkcnt_t, (512 - 1 + pos) >> 9, 3227 inode->i_blocks + additional_blocks); 3228 } 3229 spin_unlock(&inode->i_lock); 3230 } 3231 3232 unlock_page(page); 3233 put_page(page); 3234 /* Indication to update ctime and mtime as close is deferred */ 3235 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 3236 3237 return rc; 3238 } 3239 3240 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, 3241 int datasync) 3242 { 3243 unsigned int xid; 3244 int rc = 0; 3245 struct cifs_tcon *tcon; 3246 struct TCP_Server_Info *server; 3247 struct cifsFileInfo *smbfile = file->private_data; 3248 struct inode *inode = file_inode(file); 3249 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 3250 3251 rc = file_write_and_wait_range(file, start, end); 3252 if (rc) { 3253 trace_cifs_fsync_err(inode->i_ino, rc); 3254 return rc; 3255 } 3256 3257 xid = get_xid(); 3258 3259 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", 3260 file, datasync); 3261 3262 if (!CIFS_CACHE_READ(CIFS_I(inode))) { 3263 rc = cifs_zap_mapping(inode); 3264 if (rc) { 3265 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc); 3266 rc = 0; /* don't care about it in fsync */ 3267 } 3268 } 3269 3270 tcon = tlink_tcon(smbfile->tlink); 3271 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { 3272 server = tcon->ses->server; 3273 if (server->ops->flush == NULL) { 3274 rc = -ENOSYS; 3275 goto strict_fsync_exit; 3276 } 3277 3278 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { 3279 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); 3280 if (smbfile) { 3281 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3282 cifsFileInfo_put(smbfile); 3283 } else 3284 cifs_dbg(FYI, "ignore fsync for file not open for write\n"); 3285 } else 3286 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3287 } 3288 3289 strict_fsync_exit: 3290 free_xid(xid); 3291 return rc; 3292 } 3293 3294 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) 3295 { 3296 unsigned int xid; 3297 int rc = 0; 3298 struct cifs_tcon *tcon; 3299 struct TCP_Server_Info *server; 3300 struct cifsFileInfo *smbfile = file->private_data; 3301 struct inode *inode = file_inode(file); 3302 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); 3303 3304 rc = file_write_and_wait_range(file, start, end); 3305 if (rc) { 3306 trace_cifs_fsync_err(file_inode(file)->i_ino, rc); 3307 return rc; 3308 } 3309 3310 xid = get_xid(); 3311 3312 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", 3313 file, datasync); 3314 3315 tcon = tlink_tcon(smbfile->tlink); 3316 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { 3317 server = tcon->ses->server; 3318 if (server->ops->flush == NULL) { 3319 rc = -ENOSYS; 3320 goto fsync_exit; 3321 } 3322 3323 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { 3324 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); 3325 if (smbfile) { 3326 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3327 cifsFileInfo_put(smbfile); 3328 } else 3329 cifs_dbg(FYI, "ignore fsync for file not open for write\n"); 3330 } else 3331 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3332 } 3333 3334 fsync_exit: 3335 free_xid(xid); 3336 return rc; 3337 } 3338 3339 /* 3340 * As file closes, flush all cached write data for this inode checking 3341 * for write behind errors. 3342 */ 3343 int cifs_flush(struct file *file, fl_owner_t id) 3344 { 3345 struct inode *inode = file_inode(file); 3346 int rc = 0; 3347 3348 if (file->f_mode & FMODE_WRITE) 3349 rc = filemap_write_and_wait(inode->i_mapping); 3350 3351 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc); 3352 if (rc) { 3353 /* get more nuanced writeback errors */ 3354 rc = filemap_check_wb_err(file->f_mapping, 0); 3355 trace_cifs_flush_err(inode->i_ino, rc); 3356 } 3357 return rc; 3358 } 3359 3360 static void 3361 cifs_uncached_writedata_release(struct kref *refcount) 3362 { 3363 struct cifs_writedata *wdata = container_of(refcount, 3364 struct cifs_writedata, refcount); 3365 3366 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release); 3367 cifs_writedata_release(refcount); 3368 } 3369 3370 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx); 3371 3372 static void 3373 cifs_uncached_writev_complete(struct work_struct *work) 3374 { 3375 struct cifs_writedata *wdata = container_of(work, 3376 struct cifs_writedata, work); 3377 struct inode *inode = d_inode(wdata->cfile->dentry); 3378 struct cifsInodeInfo *cifsi = CIFS_I(inode); 3379 3380 spin_lock(&inode->i_lock); 3381 cifs_update_eof(cifsi, wdata->offset, wdata->bytes); 3382 if (cifsi->server_eof > inode->i_size) 3383 i_size_write(inode, cifsi->server_eof); 3384 spin_unlock(&inode->i_lock); 3385 3386 complete(&wdata->done); 3387 collect_uncached_write_data(wdata->ctx); 3388 /* the below call can possibly free the last ref to aio ctx */ 3389 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3390 } 3391 3392 static int 3393 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list, 3394 struct cifs_aio_ctx *ctx) 3395 { 3396 unsigned int wsize; 3397 struct cifs_credits credits; 3398 int rc; 3399 struct TCP_Server_Info *server = wdata->server; 3400 3401 do { 3402 if (wdata->cfile->invalidHandle) { 3403 rc = cifs_reopen_file(wdata->cfile, false); 3404 if (rc == -EAGAIN) 3405 continue; 3406 else if (rc) 3407 break; 3408 } 3409 3410 3411 /* 3412 * Wait for credits to resend this wdata. 3413 * Note: we are attempting to resend the whole wdata not in 3414 * segments 3415 */ 3416 do { 3417 rc = server->ops->wait_mtu_credits(server, wdata->bytes, 3418 &wsize, &credits); 3419 if (rc) 3420 goto fail; 3421 3422 if (wsize < wdata->bytes) { 3423 add_credits_and_wake_if(server, &credits, 0); 3424 msleep(1000); 3425 } 3426 } while (wsize < wdata->bytes); 3427 wdata->credits = credits; 3428 3429 rc = adjust_credits(server, &wdata->credits, wdata->bytes); 3430 3431 if (!rc) { 3432 if (wdata->cfile->invalidHandle) 3433 rc = -EAGAIN; 3434 else { 3435 wdata->replay = true; 3436 #ifdef CONFIG_CIFS_SMB_DIRECT 3437 if (wdata->mr) { 3438 wdata->mr->need_invalidate = true; 3439 smbd_deregister_mr(wdata->mr); 3440 wdata->mr = NULL; 3441 } 3442 #endif 3443 rc = server->ops->async_writev(wdata, 3444 cifs_uncached_writedata_release); 3445 } 3446 } 3447 3448 /* If the write was successfully sent, we are done */ 3449 if (!rc) { 3450 list_add_tail(&wdata->list, wdata_list); 3451 return 0; 3452 } 3453 3454 /* Roll back credits and retry if needed */ 3455 add_credits_and_wake_if(server, &wdata->credits, 0); 3456 } while (rc == -EAGAIN); 3457 3458 fail: 3459 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3460 return rc; 3461 } 3462 3463 /* 3464 * Select span of a bvec iterator we're going to use. Limit it by both maximum 3465 * size and maximum number of segments. 3466 */ 3467 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size, 3468 size_t max_segs, unsigned int *_nsegs) 3469 { 3470 const struct bio_vec *bvecs = iter->bvec; 3471 unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0; 3472 size_t len, span = 0, n = iter->count; 3473 size_t skip = iter->iov_offset; 3474 3475 if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0) 3476 return 0; 3477 3478 while (n && ix < nbv && skip) { 3479 len = bvecs[ix].bv_len; 3480 if (skip < len) 3481 break; 3482 skip -= len; 3483 n -= len; 3484 ix++; 3485 } 3486 3487 while (n && ix < nbv) { 3488 len = min3(n, bvecs[ix].bv_len - skip, max_size); 3489 span += len; 3490 max_size -= len; 3491 nsegs++; 3492 ix++; 3493 if (max_size == 0 || nsegs >= max_segs) 3494 break; 3495 skip = 0; 3496 n -= len; 3497 } 3498 3499 *_nsegs = nsegs; 3500 return span; 3501 } 3502 3503 static int 3504 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from, 3505 struct cifsFileInfo *open_file, 3506 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list, 3507 struct cifs_aio_ctx *ctx) 3508 { 3509 int rc = 0; 3510 size_t cur_len, max_len; 3511 struct cifs_writedata *wdata; 3512 pid_t pid; 3513 struct TCP_Server_Info *server; 3514 unsigned int xid, max_segs = INT_MAX; 3515 3516 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3517 pid = open_file->pid; 3518 else 3519 pid = current->tgid; 3520 3521 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 3522 xid = get_xid(); 3523 3524 #ifdef CONFIG_CIFS_SMB_DIRECT 3525 if (server->smbd_conn) 3526 max_segs = server->smbd_conn->max_frmr_depth; 3527 #endif 3528 3529 do { 3530 struct cifs_credits credits_on_stack; 3531 struct cifs_credits *credits = &credits_on_stack; 3532 unsigned int wsize, nsegs = 0; 3533 3534 if (signal_pending(current)) { 3535 rc = -EINTR; 3536 break; 3537 } 3538 3539 if (open_file->invalidHandle) { 3540 rc = cifs_reopen_file(open_file, false); 3541 if (rc == -EAGAIN) 3542 continue; 3543 else if (rc) 3544 break; 3545 } 3546 3547 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, 3548 &wsize, credits); 3549 if (rc) 3550 break; 3551 3552 max_len = min_t(const size_t, len, wsize); 3553 if (!max_len) { 3554 rc = -EAGAIN; 3555 add_credits_and_wake_if(server, credits, 0); 3556 break; 3557 } 3558 3559 cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs); 3560 cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n", 3561 cur_len, max_len, nsegs, from->nr_segs, max_segs); 3562 if (cur_len == 0) { 3563 rc = -EIO; 3564 add_credits_and_wake_if(server, credits, 0); 3565 break; 3566 } 3567 3568 wdata = cifs_writedata_alloc(cifs_uncached_writev_complete); 3569 if (!wdata) { 3570 rc = -ENOMEM; 3571 add_credits_and_wake_if(server, credits, 0); 3572 break; 3573 } 3574 3575 wdata->sync_mode = WB_SYNC_ALL; 3576 wdata->offset = (__u64)fpos; 3577 wdata->cfile = cifsFileInfo_get(open_file); 3578 wdata->server = server; 3579 wdata->pid = pid; 3580 wdata->bytes = cur_len; 3581 wdata->credits = credits_on_stack; 3582 wdata->iter = *from; 3583 wdata->ctx = ctx; 3584 kref_get(&ctx->refcount); 3585 3586 iov_iter_truncate(&wdata->iter, cur_len); 3587 3588 rc = adjust_credits(server, &wdata->credits, wdata->bytes); 3589 3590 if (!rc) { 3591 if (wdata->cfile->invalidHandle) 3592 rc = -EAGAIN; 3593 else 3594 rc = server->ops->async_writev(wdata, 3595 cifs_uncached_writedata_release); 3596 } 3597 3598 if (rc) { 3599 add_credits_and_wake_if(server, &wdata->credits, 0); 3600 kref_put(&wdata->refcount, 3601 cifs_uncached_writedata_release); 3602 if (rc == -EAGAIN) 3603 continue; 3604 break; 3605 } 3606 3607 list_add_tail(&wdata->list, wdata_list); 3608 iov_iter_advance(from, cur_len); 3609 fpos += cur_len; 3610 len -= cur_len; 3611 } while (len > 0); 3612 3613 free_xid(xid); 3614 return rc; 3615 } 3616 3617 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx) 3618 { 3619 struct cifs_writedata *wdata, *tmp; 3620 struct cifs_tcon *tcon; 3621 struct cifs_sb_info *cifs_sb; 3622 struct dentry *dentry = ctx->cfile->dentry; 3623 ssize_t rc; 3624 3625 tcon = tlink_tcon(ctx->cfile->tlink); 3626 cifs_sb = CIFS_SB(dentry->d_sb); 3627 3628 mutex_lock(&ctx->aio_mutex); 3629 3630 if (list_empty(&ctx->list)) { 3631 mutex_unlock(&ctx->aio_mutex); 3632 return; 3633 } 3634 3635 rc = ctx->rc; 3636 /* 3637 * Wait for and collect replies for any successful sends in order of 3638 * increasing offset. Once an error is hit, then return without waiting 3639 * for any more replies. 3640 */ 3641 restart_loop: 3642 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) { 3643 if (!rc) { 3644 if (!try_wait_for_completion(&wdata->done)) { 3645 mutex_unlock(&ctx->aio_mutex); 3646 return; 3647 } 3648 3649 if (wdata->result) 3650 rc = wdata->result; 3651 else 3652 ctx->total_len += wdata->bytes; 3653 3654 /* resend call if it's a retryable error */ 3655 if (rc == -EAGAIN) { 3656 struct list_head tmp_list; 3657 struct iov_iter tmp_from = ctx->iter; 3658 3659 INIT_LIST_HEAD(&tmp_list); 3660 list_del_init(&wdata->list); 3661 3662 if (ctx->direct_io) 3663 rc = cifs_resend_wdata( 3664 wdata, &tmp_list, ctx); 3665 else { 3666 iov_iter_advance(&tmp_from, 3667 wdata->offset - ctx->pos); 3668 3669 rc = cifs_write_from_iter(wdata->offset, 3670 wdata->bytes, &tmp_from, 3671 ctx->cfile, cifs_sb, &tmp_list, 3672 ctx); 3673 3674 kref_put(&wdata->refcount, 3675 cifs_uncached_writedata_release); 3676 } 3677 3678 list_splice(&tmp_list, &ctx->list); 3679 goto restart_loop; 3680 } 3681 } 3682 list_del_init(&wdata->list); 3683 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3684 } 3685 3686 cifs_stats_bytes_written(tcon, ctx->total_len); 3687 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags); 3688 3689 ctx->rc = (rc == 0) ? ctx->total_len : rc; 3690 3691 mutex_unlock(&ctx->aio_mutex); 3692 3693 if (ctx->iocb && ctx->iocb->ki_complete) 3694 ctx->iocb->ki_complete(ctx->iocb, ctx->rc); 3695 else 3696 complete(&ctx->done); 3697 } 3698 3699 static ssize_t __cifs_writev( 3700 struct kiocb *iocb, struct iov_iter *from, bool direct) 3701 { 3702 struct file *file = iocb->ki_filp; 3703 ssize_t total_written = 0; 3704 struct cifsFileInfo *cfile; 3705 struct cifs_tcon *tcon; 3706 struct cifs_sb_info *cifs_sb; 3707 struct cifs_aio_ctx *ctx; 3708 int rc; 3709 3710 rc = generic_write_checks(iocb, from); 3711 if (rc <= 0) 3712 return rc; 3713 3714 cifs_sb = CIFS_FILE_SB(file); 3715 cfile = file->private_data; 3716 tcon = tlink_tcon(cfile->tlink); 3717 3718 if (!tcon->ses->server->ops->async_writev) 3719 return -ENOSYS; 3720 3721 ctx = cifs_aio_ctx_alloc(); 3722 if (!ctx) 3723 return -ENOMEM; 3724 3725 ctx->cfile = cifsFileInfo_get(cfile); 3726 3727 if (!is_sync_kiocb(iocb)) 3728 ctx->iocb = iocb; 3729 3730 ctx->pos = iocb->ki_pos; 3731 ctx->direct_io = direct; 3732 ctx->nr_pinned_pages = 0; 3733 3734 if (user_backed_iter(from)) { 3735 /* 3736 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as 3737 * they contain references to the calling process's virtual 3738 * memory layout which won't be available in an async worker 3739 * thread. This also takes a pin on every folio involved. 3740 */ 3741 rc = netfs_extract_user_iter(from, iov_iter_count(from), 3742 &ctx->iter, 0); 3743 if (rc < 0) { 3744 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3745 return rc; 3746 } 3747 3748 ctx->nr_pinned_pages = rc; 3749 ctx->bv = (void *)ctx->iter.bvec; 3750 ctx->bv_need_unpin = iov_iter_extract_will_pin(from); 3751 } else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) && 3752 !is_sync_kiocb(iocb)) { 3753 /* 3754 * If the op is asynchronous, we need to copy the list attached 3755 * to a BVEC/KVEC-type iterator, but we assume that the storage 3756 * will be pinned by the caller; in any case, we may or may not 3757 * be able to pin the pages, so we don't try. 3758 */ 3759 ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL); 3760 if (!ctx->bv) { 3761 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3762 return -ENOMEM; 3763 } 3764 } else { 3765 /* 3766 * Otherwise, we just pass the iterator down as-is and rely on 3767 * the caller to make sure the pages referred to by the 3768 * iterator don't evaporate. 3769 */ 3770 ctx->iter = *from; 3771 } 3772 3773 ctx->len = iov_iter_count(&ctx->iter); 3774 3775 /* grab a lock here due to read response handlers can access ctx */ 3776 mutex_lock(&ctx->aio_mutex); 3777 3778 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter, 3779 cfile, cifs_sb, &ctx->list, ctx); 3780 3781 /* 3782 * If at least one write was successfully sent, then discard any rc 3783 * value from the later writes. If the other write succeeds, then 3784 * we'll end up returning whatever was written. If it fails, then 3785 * we'll get a new rc value from that. 3786 */ 3787 if (!list_empty(&ctx->list)) 3788 rc = 0; 3789 3790 mutex_unlock(&ctx->aio_mutex); 3791 3792 if (rc) { 3793 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3794 return rc; 3795 } 3796 3797 if (!is_sync_kiocb(iocb)) { 3798 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3799 return -EIOCBQUEUED; 3800 } 3801 3802 rc = wait_for_completion_killable(&ctx->done); 3803 if (rc) { 3804 mutex_lock(&ctx->aio_mutex); 3805 ctx->rc = rc = -EINTR; 3806 total_written = ctx->total_len; 3807 mutex_unlock(&ctx->aio_mutex); 3808 } else { 3809 rc = ctx->rc; 3810 total_written = ctx->total_len; 3811 } 3812 3813 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3814 3815 if (unlikely(!total_written)) 3816 return rc; 3817 3818 iocb->ki_pos += total_written; 3819 return total_written; 3820 } 3821 3822 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from) 3823 { 3824 struct file *file = iocb->ki_filp; 3825 3826 cifs_revalidate_mapping(file->f_inode); 3827 return __cifs_writev(iocb, from, true); 3828 } 3829 3830 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) 3831 { 3832 return __cifs_writev(iocb, from, false); 3833 } 3834 3835 static ssize_t 3836 cifs_writev(struct kiocb *iocb, struct iov_iter *from) 3837 { 3838 struct file *file = iocb->ki_filp; 3839 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 3840 struct inode *inode = file->f_mapping->host; 3841 struct cifsInodeInfo *cinode = CIFS_I(inode); 3842 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 3843 ssize_t rc; 3844 3845 inode_lock(inode); 3846 /* 3847 * We need to hold the sem to be sure nobody modifies lock list 3848 * with a brlock that prevents writing. 3849 */ 3850 down_read(&cinode->lock_sem); 3851 3852 rc = generic_write_checks(iocb, from); 3853 if (rc <= 0) 3854 goto out; 3855 3856 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), 3857 server->vals->exclusive_lock_type, 0, 3858 NULL, CIFS_WRITE_OP)) 3859 rc = __generic_file_write_iter(iocb, from); 3860 else 3861 rc = -EACCES; 3862 out: 3863 up_read(&cinode->lock_sem); 3864 inode_unlock(inode); 3865 3866 if (rc > 0) 3867 rc = generic_write_sync(iocb, rc); 3868 return rc; 3869 } 3870 3871 ssize_t 3872 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from) 3873 { 3874 struct inode *inode = file_inode(iocb->ki_filp); 3875 struct cifsInodeInfo *cinode = CIFS_I(inode); 3876 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 3877 struct cifsFileInfo *cfile = (struct cifsFileInfo *) 3878 iocb->ki_filp->private_data; 3879 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 3880 ssize_t written; 3881 3882 written = cifs_get_writer(cinode); 3883 if (written) 3884 return written; 3885 3886 if (CIFS_CACHE_WRITE(cinode)) { 3887 if (cap_unix(tcon->ses) && 3888 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) 3889 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { 3890 written = generic_file_write_iter(iocb, from); 3891 goto out; 3892 } 3893 written = cifs_writev(iocb, from); 3894 goto out; 3895 } 3896 /* 3897 * For non-oplocked files in strict cache mode we need to write the data 3898 * to the server exactly from the pos to pos+len-1 rather than flush all 3899 * affected pages because it may cause a error with mandatory locks on 3900 * these pages but not on the region from pos to ppos+len-1. 3901 */ 3902 written = cifs_user_writev(iocb, from); 3903 if (CIFS_CACHE_READ(cinode)) { 3904 /* 3905 * We have read level caching and we have just sent a write 3906 * request to the server thus making data in the cache stale. 3907 * Zap the cache and set oplock/lease level to NONE to avoid 3908 * reading stale data from the cache. All subsequent read 3909 * operations will read new data from the server. 3910 */ 3911 cifs_zap_mapping(inode); 3912 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n", 3913 inode); 3914 cinode->oplock = 0; 3915 } 3916 out: 3917 cifs_put_writer(cinode); 3918 return written; 3919 } 3920 3921 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete) 3922 { 3923 struct cifs_readdata *rdata; 3924 3925 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL); 3926 if (rdata) { 3927 kref_init(&rdata->refcount); 3928 INIT_LIST_HEAD(&rdata->list); 3929 init_completion(&rdata->done); 3930 INIT_WORK(&rdata->work, complete); 3931 } 3932 3933 return rdata; 3934 } 3935 3936 void 3937 cifs_readdata_release(struct kref *refcount) 3938 { 3939 struct cifs_readdata *rdata = container_of(refcount, 3940 struct cifs_readdata, refcount); 3941 3942 if (rdata->ctx) 3943 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release); 3944 #ifdef CONFIG_CIFS_SMB_DIRECT 3945 if (rdata->mr) { 3946 smbd_deregister_mr(rdata->mr); 3947 rdata->mr = NULL; 3948 } 3949 #endif 3950 if (rdata->cfile) 3951 cifsFileInfo_put(rdata->cfile); 3952 3953 kfree(rdata); 3954 } 3955 3956 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx); 3957 3958 static void 3959 cifs_uncached_readv_complete(struct work_struct *work) 3960 { 3961 struct cifs_readdata *rdata = container_of(work, 3962 struct cifs_readdata, work); 3963 3964 complete(&rdata->done); 3965 collect_uncached_read_data(rdata->ctx); 3966 /* the below call can possibly free the last ref to aio ctx */ 3967 kref_put(&rdata->refcount, cifs_readdata_release); 3968 } 3969 3970 static int cifs_resend_rdata(struct cifs_readdata *rdata, 3971 struct list_head *rdata_list, 3972 struct cifs_aio_ctx *ctx) 3973 { 3974 unsigned int rsize; 3975 struct cifs_credits credits; 3976 int rc; 3977 struct TCP_Server_Info *server; 3978 3979 /* XXX: should we pick a new channel here? */ 3980 server = rdata->server; 3981 3982 do { 3983 if (rdata->cfile->invalidHandle) { 3984 rc = cifs_reopen_file(rdata->cfile, true); 3985 if (rc == -EAGAIN) 3986 continue; 3987 else if (rc) 3988 break; 3989 } 3990 3991 /* 3992 * Wait for credits to resend this rdata. 3993 * Note: we are attempting to resend the whole rdata not in 3994 * segments 3995 */ 3996 do { 3997 rc = server->ops->wait_mtu_credits(server, rdata->bytes, 3998 &rsize, &credits); 3999 4000 if (rc) 4001 goto fail; 4002 4003 if (rsize < rdata->bytes) { 4004 add_credits_and_wake_if(server, &credits, 0); 4005 msleep(1000); 4006 } 4007 } while (rsize < rdata->bytes); 4008 rdata->credits = credits; 4009 4010 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 4011 if (!rc) { 4012 if (rdata->cfile->invalidHandle) 4013 rc = -EAGAIN; 4014 else { 4015 #ifdef CONFIG_CIFS_SMB_DIRECT 4016 if (rdata->mr) { 4017 rdata->mr->need_invalidate = true; 4018 smbd_deregister_mr(rdata->mr); 4019 rdata->mr = NULL; 4020 } 4021 #endif 4022 rc = server->ops->async_readv(rdata); 4023 } 4024 } 4025 4026 /* If the read was successfully sent, we are done */ 4027 if (!rc) { 4028 /* Add to aio pending list */ 4029 list_add_tail(&rdata->list, rdata_list); 4030 return 0; 4031 } 4032 4033 /* Roll back credits and retry if needed */ 4034 add_credits_and_wake_if(server, &rdata->credits, 0); 4035 } while (rc == -EAGAIN); 4036 4037 fail: 4038 kref_put(&rdata->refcount, cifs_readdata_release); 4039 return rc; 4040 } 4041 4042 static int 4043 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file, 4044 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list, 4045 struct cifs_aio_ctx *ctx) 4046 { 4047 struct cifs_readdata *rdata; 4048 unsigned int rsize, nsegs, max_segs = INT_MAX; 4049 struct cifs_credits credits_on_stack; 4050 struct cifs_credits *credits = &credits_on_stack; 4051 size_t cur_len, max_len; 4052 int rc; 4053 pid_t pid; 4054 struct TCP_Server_Info *server; 4055 4056 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 4057 4058 #ifdef CONFIG_CIFS_SMB_DIRECT 4059 if (server->smbd_conn) 4060 max_segs = server->smbd_conn->max_frmr_depth; 4061 #endif 4062 4063 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4064 pid = open_file->pid; 4065 else 4066 pid = current->tgid; 4067 4068 do { 4069 if (open_file->invalidHandle) { 4070 rc = cifs_reopen_file(open_file, true); 4071 if (rc == -EAGAIN) 4072 continue; 4073 else if (rc) 4074 break; 4075 } 4076 4077 if (cifs_sb->ctx->rsize == 0) 4078 cifs_sb->ctx->rsize = 4079 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), 4080 cifs_sb->ctx); 4081 4082 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, 4083 &rsize, credits); 4084 if (rc) 4085 break; 4086 4087 max_len = min_t(size_t, len, rsize); 4088 4089 cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len, 4090 max_segs, &nsegs); 4091 cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n", 4092 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs); 4093 if (cur_len == 0) { 4094 rc = -EIO; 4095 add_credits_and_wake_if(server, credits, 0); 4096 break; 4097 } 4098 4099 rdata = cifs_readdata_alloc(cifs_uncached_readv_complete); 4100 if (!rdata) { 4101 add_credits_and_wake_if(server, credits, 0); 4102 rc = -ENOMEM; 4103 break; 4104 } 4105 4106 rdata->server = server; 4107 rdata->cfile = cifsFileInfo_get(open_file); 4108 rdata->offset = fpos; 4109 rdata->bytes = cur_len; 4110 rdata->pid = pid; 4111 rdata->credits = credits_on_stack; 4112 rdata->ctx = ctx; 4113 kref_get(&ctx->refcount); 4114 4115 rdata->iter = ctx->iter; 4116 iov_iter_truncate(&rdata->iter, cur_len); 4117 4118 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 4119 4120 if (!rc) { 4121 if (rdata->cfile->invalidHandle) 4122 rc = -EAGAIN; 4123 else 4124 rc = server->ops->async_readv(rdata); 4125 } 4126 4127 if (rc) { 4128 add_credits_and_wake_if(server, &rdata->credits, 0); 4129 kref_put(&rdata->refcount, cifs_readdata_release); 4130 if (rc == -EAGAIN) 4131 continue; 4132 break; 4133 } 4134 4135 list_add_tail(&rdata->list, rdata_list); 4136 iov_iter_advance(&ctx->iter, cur_len); 4137 fpos += cur_len; 4138 len -= cur_len; 4139 } while (len > 0); 4140 4141 return rc; 4142 } 4143 4144 static void 4145 collect_uncached_read_data(struct cifs_aio_ctx *ctx) 4146 { 4147 struct cifs_readdata *rdata, *tmp; 4148 struct cifs_sb_info *cifs_sb; 4149 int rc; 4150 4151 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb); 4152 4153 mutex_lock(&ctx->aio_mutex); 4154 4155 if (list_empty(&ctx->list)) { 4156 mutex_unlock(&ctx->aio_mutex); 4157 return; 4158 } 4159 4160 rc = ctx->rc; 4161 /* the loop below should proceed in the order of increasing offsets */ 4162 again: 4163 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) { 4164 if (!rc) { 4165 if (!try_wait_for_completion(&rdata->done)) { 4166 mutex_unlock(&ctx->aio_mutex); 4167 return; 4168 } 4169 4170 if (rdata->result == -EAGAIN) { 4171 /* resend call if it's a retryable error */ 4172 struct list_head tmp_list; 4173 unsigned int got_bytes = rdata->got_bytes; 4174 4175 list_del_init(&rdata->list); 4176 INIT_LIST_HEAD(&tmp_list); 4177 4178 if (ctx->direct_io) { 4179 /* 4180 * Re-use rdata as this is a 4181 * direct I/O 4182 */ 4183 rc = cifs_resend_rdata( 4184 rdata, 4185 &tmp_list, ctx); 4186 } else { 4187 rc = cifs_send_async_read( 4188 rdata->offset + got_bytes, 4189 rdata->bytes - got_bytes, 4190 rdata->cfile, cifs_sb, 4191 &tmp_list, ctx); 4192 4193 kref_put(&rdata->refcount, 4194 cifs_readdata_release); 4195 } 4196 4197 list_splice(&tmp_list, &ctx->list); 4198 4199 goto again; 4200 } else if (rdata->result) 4201 rc = rdata->result; 4202 4203 /* if there was a short read -- discard anything left */ 4204 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes) 4205 rc = -ENODATA; 4206 4207 ctx->total_len += rdata->got_bytes; 4208 } 4209 list_del_init(&rdata->list); 4210 kref_put(&rdata->refcount, cifs_readdata_release); 4211 } 4212 4213 /* mask nodata case */ 4214 if (rc == -ENODATA) 4215 rc = 0; 4216 4217 ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc; 4218 4219 mutex_unlock(&ctx->aio_mutex); 4220 4221 if (ctx->iocb && ctx->iocb->ki_complete) 4222 ctx->iocb->ki_complete(ctx->iocb, ctx->rc); 4223 else 4224 complete(&ctx->done); 4225 } 4226 4227 static ssize_t __cifs_readv( 4228 struct kiocb *iocb, struct iov_iter *to, bool direct) 4229 { 4230 size_t len; 4231 struct file *file = iocb->ki_filp; 4232 struct cifs_sb_info *cifs_sb; 4233 struct cifsFileInfo *cfile; 4234 struct cifs_tcon *tcon; 4235 ssize_t rc, total_read = 0; 4236 loff_t offset = iocb->ki_pos; 4237 struct cifs_aio_ctx *ctx; 4238 4239 len = iov_iter_count(to); 4240 if (!len) 4241 return 0; 4242 4243 cifs_sb = CIFS_FILE_SB(file); 4244 cfile = file->private_data; 4245 tcon = tlink_tcon(cfile->tlink); 4246 4247 if (!tcon->ses->server->ops->async_readv) 4248 return -ENOSYS; 4249 4250 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 4251 cifs_dbg(FYI, "attempting read on write only file instance\n"); 4252 4253 ctx = cifs_aio_ctx_alloc(); 4254 if (!ctx) 4255 return -ENOMEM; 4256 4257 ctx->pos = offset; 4258 ctx->direct_io = direct; 4259 ctx->len = len; 4260 ctx->cfile = cifsFileInfo_get(cfile); 4261 ctx->nr_pinned_pages = 0; 4262 4263 if (!is_sync_kiocb(iocb)) 4264 ctx->iocb = iocb; 4265 4266 if (user_backed_iter(to)) { 4267 /* 4268 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as 4269 * they contain references to the calling process's virtual 4270 * memory layout which won't be available in an async worker 4271 * thread. This also takes a pin on every folio involved. 4272 */ 4273 rc = netfs_extract_user_iter(to, iov_iter_count(to), 4274 &ctx->iter, 0); 4275 if (rc < 0) { 4276 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4277 return rc; 4278 } 4279 4280 ctx->nr_pinned_pages = rc; 4281 ctx->bv = (void *)ctx->iter.bvec; 4282 ctx->bv_need_unpin = iov_iter_extract_will_pin(to); 4283 ctx->should_dirty = true; 4284 } else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) && 4285 !is_sync_kiocb(iocb)) { 4286 /* 4287 * If the op is asynchronous, we need to copy the list attached 4288 * to a BVEC/KVEC-type iterator, but we assume that the storage 4289 * will be retained by the caller; in any case, we may or may 4290 * not be able to pin the pages, so we don't try. 4291 */ 4292 ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL); 4293 if (!ctx->bv) { 4294 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4295 return -ENOMEM; 4296 } 4297 } else { 4298 /* 4299 * Otherwise, we just pass the iterator down as-is and rely on 4300 * the caller to make sure the pages referred to by the 4301 * iterator don't evaporate. 4302 */ 4303 ctx->iter = *to; 4304 } 4305 4306 if (direct) { 4307 rc = filemap_write_and_wait_range(file->f_inode->i_mapping, 4308 offset, offset + len - 1); 4309 if (rc) { 4310 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4311 return -EAGAIN; 4312 } 4313 } 4314 4315 /* grab a lock here due to read response handlers can access ctx */ 4316 mutex_lock(&ctx->aio_mutex); 4317 4318 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx); 4319 4320 /* if at least one read request send succeeded, then reset rc */ 4321 if (!list_empty(&ctx->list)) 4322 rc = 0; 4323 4324 mutex_unlock(&ctx->aio_mutex); 4325 4326 if (rc) { 4327 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4328 return rc; 4329 } 4330 4331 if (!is_sync_kiocb(iocb)) { 4332 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4333 return -EIOCBQUEUED; 4334 } 4335 4336 rc = wait_for_completion_killable(&ctx->done); 4337 if (rc) { 4338 mutex_lock(&ctx->aio_mutex); 4339 ctx->rc = rc = -EINTR; 4340 total_read = ctx->total_len; 4341 mutex_unlock(&ctx->aio_mutex); 4342 } else { 4343 rc = ctx->rc; 4344 total_read = ctx->total_len; 4345 } 4346 4347 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4348 4349 if (total_read) { 4350 iocb->ki_pos += total_read; 4351 return total_read; 4352 } 4353 return rc; 4354 } 4355 4356 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to) 4357 { 4358 return __cifs_readv(iocb, to, true); 4359 } 4360 4361 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) 4362 { 4363 return __cifs_readv(iocb, to, false); 4364 } 4365 4366 ssize_t 4367 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) 4368 { 4369 struct inode *inode = file_inode(iocb->ki_filp); 4370 struct cifsInodeInfo *cinode = CIFS_I(inode); 4371 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 4372 struct cifsFileInfo *cfile = (struct cifsFileInfo *) 4373 iocb->ki_filp->private_data; 4374 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 4375 int rc = -EACCES; 4376 4377 /* 4378 * In strict cache mode we need to read from the server all the time 4379 * if we don't have level II oplock because the server can delay mtime 4380 * change - so we can't make a decision about inode invalidating. 4381 * And we can also fail with pagereading if there are mandatory locks 4382 * on pages affected by this read but not on the region from pos to 4383 * pos+len-1. 4384 */ 4385 if (!CIFS_CACHE_READ(cinode)) 4386 return cifs_user_readv(iocb, to); 4387 4388 if (cap_unix(tcon->ses) && 4389 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 4390 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 4391 return generic_file_read_iter(iocb, to); 4392 4393 /* 4394 * We need to hold the sem to be sure nobody modifies lock list 4395 * with a brlock that prevents reading. 4396 */ 4397 down_read(&cinode->lock_sem); 4398 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to), 4399 tcon->ses->server->vals->shared_lock_type, 4400 0, NULL, CIFS_READ_OP)) 4401 rc = generic_file_read_iter(iocb, to); 4402 up_read(&cinode->lock_sem); 4403 return rc; 4404 } 4405 4406 static ssize_t 4407 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) 4408 { 4409 int rc = -EACCES; 4410 unsigned int bytes_read = 0; 4411 unsigned int total_read; 4412 unsigned int current_read_size; 4413 unsigned int rsize; 4414 struct cifs_sb_info *cifs_sb; 4415 struct cifs_tcon *tcon; 4416 struct TCP_Server_Info *server; 4417 unsigned int xid; 4418 char *cur_offset; 4419 struct cifsFileInfo *open_file; 4420 struct cifs_io_parms io_parms = {0}; 4421 int buf_type = CIFS_NO_BUFFER; 4422 __u32 pid; 4423 4424 xid = get_xid(); 4425 cifs_sb = CIFS_FILE_SB(file); 4426 4427 /* FIXME: set up handlers for larger reads and/or convert to async */ 4428 rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize); 4429 4430 if (file->private_data == NULL) { 4431 rc = -EBADF; 4432 free_xid(xid); 4433 return rc; 4434 } 4435 open_file = file->private_data; 4436 tcon = tlink_tcon(open_file->tlink); 4437 server = cifs_pick_channel(tcon->ses); 4438 4439 if (!server->ops->sync_read) { 4440 free_xid(xid); 4441 return -ENOSYS; 4442 } 4443 4444 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4445 pid = open_file->pid; 4446 else 4447 pid = current->tgid; 4448 4449 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 4450 cifs_dbg(FYI, "attempting read on write only file instance\n"); 4451 4452 for (total_read = 0, cur_offset = read_data; read_size > total_read; 4453 total_read += bytes_read, cur_offset += bytes_read) { 4454 do { 4455 current_read_size = min_t(uint, read_size - total_read, 4456 rsize); 4457 /* 4458 * For windows me and 9x we do not want to request more 4459 * than it negotiated since it will refuse the read 4460 * then. 4461 */ 4462 if (!(tcon->ses->capabilities & 4463 tcon->ses->server->vals->cap_large_files)) { 4464 current_read_size = min_t(uint, 4465 current_read_size, CIFSMaxBufSize); 4466 } 4467 if (open_file->invalidHandle) { 4468 rc = cifs_reopen_file(open_file, true); 4469 if (rc != 0) 4470 break; 4471 } 4472 io_parms.pid = pid; 4473 io_parms.tcon = tcon; 4474 io_parms.offset = *offset; 4475 io_parms.length = current_read_size; 4476 io_parms.server = server; 4477 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms, 4478 &bytes_read, &cur_offset, 4479 &buf_type); 4480 } while (rc == -EAGAIN); 4481 4482 if (rc || (bytes_read == 0)) { 4483 if (total_read) { 4484 break; 4485 } else { 4486 free_xid(xid); 4487 return rc; 4488 } 4489 } else { 4490 cifs_stats_bytes_read(tcon, total_read); 4491 *offset += bytes_read; 4492 } 4493 } 4494 free_xid(xid); 4495 return total_read; 4496 } 4497 4498 /* 4499 * If the page is mmap'ed into a process' page tables, then we need to make 4500 * sure that it doesn't change while being written back. 4501 */ 4502 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf) 4503 { 4504 struct folio *folio = page_folio(vmf->page); 4505 4506 /* Wait for the folio to be written to the cache before we allow it to 4507 * be modified. We then assume the entire folio will need writing back. 4508 */ 4509 #ifdef CONFIG_CIFS_FSCACHE 4510 if (folio_test_fscache(folio) && 4511 folio_wait_fscache_killable(folio) < 0) 4512 return VM_FAULT_RETRY; 4513 #endif 4514 4515 folio_wait_writeback(folio); 4516 4517 if (folio_lock_killable(folio) < 0) 4518 return VM_FAULT_RETRY; 4519 return VM_FAULT_LOCKED; 4520 } 4521 4522 static const struct vm_operations_struct cifs_file_vm_ops = { 4523 .fault = filemap_fault, 4524 .map_pages = filemap_map_pages, 4525 .page_mkwrite = cifs_page_mkwrite, 4526 }; 4527 4528 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) 4529 { 4530 int xid, rc = 0; 4531 struct inode *inode = file_inode(file); 4532 4533 xid = get_xid(); 4534 4535 if (!CIFS_CACHE_READ(CIFS_I(inode))) 4536 rc = cifs_zap_mapping(inode); 4537 if (!rc) 4538 rc = generic_file_mmap(file, vma); 4539 if (!rc) 4540 vma->vm_ops = &cifs_file_vm_ops; 4541 4542 free_xid(xid); 4543 return rc; 4544 } 4545 4546 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) 4547 { 4548 int rc, xid; 4549 4550 xid = get_xid(); 4551 4552 rc = cifs_revalidate_file(file); 4553 if (rc) 4554 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n", 4555 rc); 4556 if (!rc) 4557 rc = generic_file_mmap(file, vma); 4558 if (!rc) 4559 vma->vm_ops = &cifs_file_vm_ops; 4560 4561 free_xid(xid); 4562 return rc; 4563 } 4564 4565 /* 4566 * Unlock a bunch of folios in the pagecache. 4567 */ 4568 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last) 4569 { 4570 struct folio *folio; 4571 XA_STATE(xas, &mapping->i_pages, first); 4572 4573 rcu_read_lock(); 4574 xas_for_each(&xas, folio, last) { 4575 folio_unlock(folio); 4576 } 4577 rcu_read_unlock(); 4578 } 4579 4580 static void cifs_readahead_complete(struct work_struct *work) 4581 { 4582 struct cifs_readdata *rdata = container_of(work, 4583 struct cifs_readdata, work); 4584 struct folio *folio; 4585 pgoff_t last; 4586 bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes); 4587 4588 XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE); 4589 4590 if (good) 4591 cifs_readahead_to_fscache(rdata->mapping->host, 4592 rdata->offset, rdata->bytes); 4593 4594 if (iov_iter_count(&rdata->iter) > 0) 4595 iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter); 4596 4597 last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE; 4598 4599 rcu_read_lock(); 4600 xas_for_each(&xas, folio, last) { 4601 if (good) { 4602 flush_dcache_folio(folio); 4603 folio_mark_uptodate(folio); 4604 } 4605 folio_unlock(folio); 4606 } 4607 rcu_read_unlock(); 4608 4609 kref_put(&rdata->refcount, cifs_readdata_release); 4610 } 4611 4612 static void cifs_readahead(struct readahead_control *ractl) 4613 { 4614 struct cifsFileInfo *open_file = ractl->file->private_data; 4615 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file); 4616 struct TCP_Server_Info *server; 4617 unsigned int xid, nr_pages, cache_nr_pages = 0; 4618 unsigned int ra_pages; 4619 pgoff_t next_cached = ULONG_MAX, ra_index; 4620 bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) && 4621 cifs_inode_cookie(ractl->mapping->host)->cache_priv; 4622 bool check_cache = caching; 4623 pid_t pid; 4624 int rc = 0; 4625 4626 /* Note that readahead_count() lags behind our dequeuing of pages from 4627 * the ractl, wo we have to keep track for ourselves. 4628 */ 4629 ra_pages = readahead_count(ractl); 4630 ra_index = readahead_index(ractl); 4631 4632 xid = get_xid(); 4633 4634 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4635 pid = open_file->pid; 4636 else 4637 pid = current->tgid; 4638 4639 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 4640 4641 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n", 4642 __func__, ractl->file, ractl->mapping, ra_pages); 4643 4644 /* 4645 * Chop the readahead request up into rsize-sized read requests. 4646 */ 4647 while ((nr_pages = ra_pages)) { 4648 unsigned int i, rsize; 4649 struct cifs_readdata *rdata; 4650 struct cifs_credits credits_on_stack; 4651 struct cifs_credits *credits = &credits_on_stack; 4652 struct folio *folio; 4653 pgoff_t fsize; 4654 4655 /* 4656 * Find out if we have anything cached in the range of 4657 * interest, and if so, where the next chunk of cached data is. 4658 */ 4659 if (caching) { 4660 if (check_cache) { 4661 rc = cifs_fscache_query_occupancy( 4662 ractl->mapping->host, ra_index, nr_pages, 4663 &next_cached, &cache_nr_pages); 4664 if (rc < 0) 4665 caching = false; 4666 check_cache = false; 4667 } 4668 4669 if (ra_index == next_cached) { 4670 /* 4671 * TODO: Send a whole batch of pages to be read 4672 * by the cache. 4673 */ 4674 folio = readahead_folio(ractl); 4675 fsize = folio_nr_pages(folio); 4676 ra_pages -= fsize; 4677 ra_index += fsize; 4678 if (cifs_readpage_from_fscache(ractl->mapping->host, 4679 &folio->page) < 0) { 4680 /* 4681 * TODO: Deal with cache read failure 4682 * here, but for the moment, delegate 4683 * that to readpage. 4684 */ 4685 caching = false; 4686 } 4687 folio_unlock(folio); 4688 next_cached += fsize; 4689 cache_nr_pages -= fsize; 4690 if (cache_nr_pages == 0) 4691 check_cache = true; 4692 continue; 4693 } 4694 } 4695 4696 if (open_file->invalidHandle) { 4697 rc = cifs_reopen_file(open_file, true); 4698 if (rc) { 4699 if (rc == -EAGAIN) 4700 continue; 4701 break; 4702 } 4703 } 4704 4705 if (cifs_sb->ctx->rsize == 0) 4706 cifs_sb->ctx->rsize = 4707 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), 4708 cifs_sb->ctx); 4709 4710 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, 4711 &rsize, credits); 4712 if (rc) 4713 break; 4714 nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages); 4715 if (next_cached != ULONG_MAX) 4716 nr_pages = min_t(size_t, nr_pages, next_cached - ra_index); 4717 4718 /* 4719 * Give up immediately if rsize is too small to read an entire 4720 * page. The VFS will fall back to readpage. We should never 4721 * reach this point however since we set ra_pages to 0 when the 4722 * rsize is smaller than a cache page. 4723 */ 4724 if (unlikely(!nr_pages)) { 4725 add_credits_and_wake_if(server, credits, 0); 4726 break; 4727 } 4728 4729 rdata = cifs_readdata_alloc(cifs_readahead_complete); 4730 if (!rdata) { 4731 /* best to give up if we're out of mem */ 4732 add_credits_and_wake_if(server, credits, 0); 4733 break; 4734 } 4735 4736 rdata->offset = ra_index * PAGE_SIZE; 4737 rdata->bytes = nr_pages * PAGE_SIZE; 4738 rdata->cfile = cifsFileInfo_get(open_file); 4739 rdata->server = server; 4740 rdata->mapping = ractl->mapping; 4741 rdata->pid = pid; 4742 rdata->credits = credits_on_stack; 4743 4744 for (i = 0; i < nr_pages; i++) { 4745 if (!readahead_folio(ractl)) 4746 WARN_ON(1); 4747 } 4748 ra_pages -= nr_pages; 4749 ra_index += nr_pages; 4750 4751 iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages, 4752 rdata->offset, rdata->bytes); 4753 4754 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 4755 if (!rc) { 4756 if (rdata->cfile->invalidHandle) 4757 rc = -EAGAIN; 4758 else 4759 rc = server->ops->async_readv(rdata); 4760 } 4761 4762 if (rc) { 4763 add_credits_and_wake_if(server, &rdata->credits, 0); 4764 cifs_unlock_folios(rdata->mapping, 4765 rdata->offset / PAGE_SIZE, 4766 (rdata->offset + rdata->bytes - 1) / PAGE_SIZE); 4767 /* Fallback to the readpage in error/reconnect cases */ 4768 kref_put(&rdata->refcount, cifs_readdata_release); 4769 break; 4770 } 4771 4772 kref_put(&rdata->refcount, cifs_readdata_release); 4773 } 4774 4775 free_xid(xid); 4776 } 4777 4778 /* 4779 * cifs_readpage_worker must be called with the page pinned 4780 */ 4781 static int cifs_readpage_worker(struct file *file, struct page *page, 4782 loff_t *poffset) 4783 { 4784 struct inode *inode = file_inode(file); 4785 struct timespec64 atime, mtime; 4786 char *read_data; 4787 int rc; 4788 4789 /* Is the page cached? */ 4790 rc = cifs_readpage_from_fscache(inode, page); 4791 if (rc == 0) 4792 goto read_complete; 4793 4794 read_data = kmap(page); 4795 /* for reads over a certain size could initiate async read ahead */ 4796 4797 rc = cifs_read(file, read_data, PAGE_SIZE, poffset); 4798 4799 if (rc < 0) 4800 goto io_error; 4801 else 4802 cifs_dbg(FYI, "Bytes read %d\n", rc); 4803 4804 /* we do not want atime to be less than mtime, it broke some apps */ 4805 atime = inode_set_atime_to_ts(inode, current_time(inode)); 4806 mtime = inode_get_mtime(inode); 4807 if (timespec64_compare(&atime, &mtime) < 0) 4808 inode_set_atime_to_ts(inode, inode_get_mtime(inode)); 4809 4810 if (PAGE_SIZE > rc) 4811 memset(read_data + rc, 0, PAGE_SIZE - rc); 4812 4813 flush_dcache_page(page); 4814 SetPageUptodate(page); 4815 rc = 0; 4816 4817 io_error: 4818 kunmap(page); 4819 4820 read_complete: 4821 unlock_page(page); 4822 return rc; 4823 } 4824 4825 static int cifs_read_folio(struct file *file, struct folio *folio) 4826 { 4827 struct page *page = &folio->page; 4828 loff_t offset = page_file_offset(page); 4829 int rc = -EACCES; 4830 unsigned int xid; 4831 4832 xid = get_xid(); 4833 4834 if (file->private_data == NULL) { 4835 rc = -EBADF; 4836 free_xid(xid); 4837 return rc; 4838 } 4839 4840 cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n", 4841 page, (int)offset, (int)offset); 4842 4843 rc = cifs_readpage_worker(file, page, &offset); 4844 4845 free_xid(xid); 4846 return rc; 4847 } 4848 4849 static int is_inode_writable(struct cifsInodeInfo *cifs_inode) 4850 { 4851 struct cifsFileInfo *open_file; 4852 4853 spin_lock(&cifs_inode->open_file_lock); 4854 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 4855 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { 4856 spin_unlock(&cifs_inode->open_file_lock); 4857 return 1; 4858 } 4859 } 4860 spin_unlock(&cifs_inode->open_file_lock); 4861 return 0; 4862 } 4863 4864 /* We do not want to update the file size from server for inodes 4865 open for write - to avoid races with writepage extending 4866 the file - in the future we could consider allowing 4867 refreshing the inode only on increases in the file size 4868 but this is tricky to do without racing with writebehind 4869 page caching in the current Linux kernel design */ 4870 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file, 4871 bool from_readdir) 4872 { 4873 if (!cifsInode) 4874 return true; 4875 4876 if (is_inode_writable(cifsInode) || 4877 ((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) { 4878 /* This inode is open for write at least once */ 4879 struct cifs_sb_info *cifs_sb; 4880 4881 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb); 4882 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { 4883 /* since no page cache to corrupt on directio 4884 we can change size safely */ 4885 return true; 4886 } 4887 4888 if (i_size_read(&cifsInode->netfs.inode) < end_of_file) 4889 return true; 4890 4891 return false; 4892 } else 4893 return true; 4894 } 4895 4896 static int cifs_write_begin(struct file *file, struct address_space *mapping, 4897 loff_t pos, unsigned len, 4898 struct page **pagep, void **fsdata) 4899 { 4900 int oncethru = 0; 4901 pgoff_t index = pos >> PAGE_SHIFT; 4902 loff_t offset = pos & (PAGE_SIZE - 1); 4903 loff_t page_start = pos & PAGE_MASK; 4904 loff_t i_size; 4905 struct page *page; 4906 int rc = 0; 4907 4908 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len); 4909 4910 start: 4911 page = grab_cache_page_write_begin(mapping, index); 4912 if (!page) { 4913 rc = -ENOMEM; 4914 goto out; 4915 } 4916 4917 if (PageUptodate(page)) 4918 goto out; 4919 4920 /* 4921 * If we write a full page it will be up to date, no need to read from 4922 * the server. If the write is short, we'll end up doing a sync write 4923 * instead. 4924 */ 4925 if (len == PAGE_SIZE) 4926 goto out; 4927 4928 /* 4929 * optimize away the read when we have an oplock, and we're not 4930 * expecting to use any of the data we'd be reading in. That 4931 * is, when the page lies beyond the EOF, or straddles the EOF 4932 * and the write will cover all of the existing data. 4933 */ 4934 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) { 4935 i_size = i_size_read(mapping->host); 4936 if (page_start >= i_size || 4937 (offset == 0 && (pos + len) >= i_size)) { 4938 zero_user_segments(page, 0, offset, 4939 offset + len, 4940 PAGE_SIZE); 4941 /* 4942 * PageChecked means that the parts of the page 4943 * to which we're not writing are considered up 4944 * to date. Once the data is copied to the 4945 * page, it can be set uptodate. 4946 */ 4947 SetPageChecked(page); 4948 goto out; 4949 } 4950 } 4951 4952 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) { 4953 /* 4954 * might as well read a page, it is fast enough. If we get 4955 * an error, we don't need to return it. cifs_write_end will 4956 * do a sync write instead since PG_uptodate isn't set. 4957 */ 4958 cifs_readpage_worker(file, page, &page_start); 4959 put_page(page); 4960 oncethru = 1; 4961 goto start; 4962 } else { 4963 /* we could try using another file handle if there is one - 4964 but how would we lock it to prevent close of that handle 4965 racing with this read? In any case 4966 this will be written out by write_end so is fine */ 4967 } 4968 out: 4969 *pagep = page; 4970 return rc; 4971 } 4972 4973 static bool cifs_release_folio(struct folio *folio, gfp_t gfp) 4974 { 4975 if (folio_test_private(folio)) 4976 return 0; 4977 if (folio_test_fscache(folio)) { 4978 if (current_is_kswapd() || !(gfp & __GFP_FS)) 4979 return false; 4980 folio_wait_fscache(folio); 4981 } 4982 fscache_note_page_release(cifs_inode_cookie(folio->mapping->host)); 4983 return true; 4984 } 4985 4986 static void cifs_invalidate_folio(struct folio *folio, size_t offset, 4987 size_t length) 4988 { 4989 folio_wait_fscache(folio); 4990 } 4991 4992 static int cifs_launder_folio(struct folio *folio) 4993 { 4994 int rc = 0; 4995 loff_t range_start = folio_pos(folio); 4996 loff_t range_end = range_start + folio_size(folio); 4997 struct writeback_control wbc = { 4998 .sync_mode = WB_SYNC_ALL, 4999 .nr_to_write = 0, 5000 .range_start = range_start, 5001 .range_end = range_end, 5002 }; 5003 5004 cifs_dbg(FYI, "Launder page: %lu\n", folio->index); 5005 5006 if (folio_clear_dirty_for_io(folio)) 5007 rc = cifs_writepage_locked(&folio->page, &wbc); 5008 5009 folio_wait_fscache(folio); 5010 return rc; 5011 } 5012 5013 void cifs_oplock_break(struct work_struct *work) 5014 { 5015 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, 5016 oplock_break); 5017 struct inode *inode = d_inode(cfile->dentry); 5018 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 5019 struct cifsInodeInfo *cinode = CIFS_I(inode); 5020 struct cifs_tcon *tcon; 5021 struct TCP_Server_Info *server; 5022 struct tcon_link *tlink; 5023 int rc = 0; 5024 bool purge_cache = false, oplock_break_cancelled; 5025 __u64 persistent_fid, volatile_fid; 5026 __u16 net_fid; 5027 5028 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, 5029 TASK_UNINTERRUPTIBLE); 5030 5031 tlink = cifs_sb_tlink(cifs_sb); 5032 if (IS_ERR(tlink)) 5033 goto out; 5034 tcon = tlink_tcon(tlink); 5035 server = tcon->ses->server; 5036 5037 server->ops->downgrade_oplock(server, cinode, cfile->oplock_level, 5038 cfile->oplock_epoch, &purge_cache); 5039 5040 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) && 5041 cifs_has_mand_locks(cinode)) { 5042 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n", 5043 inode); 5044 cinode->oplock = 0; 5045 } 5046 5047 if (inode && S_ISREG(inode->i_mode)) { 5048 if (CIFS_CACHE_READ(cinode)) 5049 break_lease(inode, O_RDONLY); 5050 else 5051 break_lease(inode, O_WRONLY); 5052 rc = filemap_fdatawrite(inode->i_mapping); 5053 if (!CIFS_CACHE_READ(cinode) || purge_cache) { 5054 rc = filemap_fdatawait(inode->i_mapping); 5055 mapping_set_error(inode->i_mapping, rc); 5056 cifs_zap_mapping(inode); 5057 } 5058 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc); 5059 if (CIFS_CACHE_WRITE(cinode)) 5060 goto oplock_break_ack; 5061 } 5062 5063 rc = cifs_push_locks(cfile); 5064 if (rc) 5065 cifs_dbg(VFS, "Push locks rc = %d\n", rc); 5066 5067 oplock_break_ack: 5068 /* 5069 * When oplock break is received and there are no active 5070 * file handles but cached, then schedule deferred close immediately. 5071 * So, new open will not use cached handle. 5072 */ 5073 5074 if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes)) 5075 cifs_close_deferred_file(cinode); 5076 5077 persistent_fid = cfile->fid.persistent_fid; 5078 volatile_fid = cfile->fid.volatile_fid; 5079 net_fid = cfile->fid.netfid; 5080 oplock_break_cancelled = cfile->oplock_break_cancelled; 5081 5082 _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false); 5083 /* 5084 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require 5085 * an acknowledgment to be sent when the file has already been closed. 5086 */ 5087 spin_lock(&cinode->open_file_lock); 5088 /* check list empty since can race with kill_sb calling tree disconnect */ 5089 if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) { 5090 spin_unlock(&cinode->open_file_lock); 5091 rc = server->ops->oplock_response(tcon, persistent_fid, 5092 volatile_fid, net_fid, cinode); 5093 cifs_dbg(FYI, "Oplock release rc = %d\n", rc); 5094 } else 5095 spin_unlock(&cinode->open_file_lock); 5096 5097 cifs_put_tlink(tlink); 5098 out: 5099 cifs_done_oplock_break(cinode); 5100 } 5101 5102 /* 5103 * The presence of cifs_direct_io() in the address space ops vector 5104 * allowes open() O_DIRECT flags which would have failed otherwise. 5105 * 5106 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests 5107 * so this method should never be called. 5108 * 5109 * Direct IO is not yet supported in the cached mode. 5110 */ 5111 static ssize_t 5112 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter) 5113 { 5114 /* 5115 * FIXME 5116 * Eventually need to support direct IO for non forcedirectio mounts 5117 */ 5118 return -EINVAL; 5119 } 5120 5121 static int cifs_swap_activate(struct swap_info_struct *sis, 5122 struct file *swap_file, sector_t *span) 5123 { 5124 struct cifsFileInfo *cfile = swap_file->private_data; 5125 struct inode *inode = swap_file->f_mapping->host; 5126 unsigned long blocks; 5127 long long isize; 5128 5129 cifs_dbg(FYI, "swap activate\n"); 5130 5131 if (!swap_file->f_mapping->a_ops->swap_rw) 5132 /* Cannot support swap */ 5133 return -EINVAL; 5134 5135 spin_lock(&inode->i_lock); 5136 blocks = inode->i_blocks; 5137 isize = inode->i_size; 5138 spin_unlock(&inode->i_lock); 5139 if (blocks*512 < isize) { 5140 pr_warn("swap activate: swapfile has holes\n"); 5141 return -EINVAL; 5142 } 5143 *span = sis->pages; 5144 5145 pr_warn_once("Swap support over SMB3 is experimental\n"); 5146 5147 /* 5148 * TODO: consider adding ACL (or documenting how) to prevent other 5149 * users (on this or other systems) from reading it 5150 */ 5151 5152 5153 /* TODO: add sk_set_memalloc(inet) or similar */ 5154 5155 if (cfile) 5156 cfile->swapfile = true; 5157 /* 5158 * TODO: Since file already open, we can't open with DENY_ALL here 5159 * but we could add call to grab a byte range lock to prevent others 5160 * from reading or writing the file 5161 */ 5162 5163 sis->flags |= SWP_FS_OPS; 5164 return add_swap_extent(sis, 0, sis->max, 0); 5165 } 5166 5167 static void cifs_swap_deactivate(struct file *file) 5168 { 5169 struct cifsFileInfo *cfile = file->private_data; 5170 5171 cifs_dbg(FYI, "swap deactivate\n"); 5172 5173 /* TODO: undo sk_set_memalloc(inet) will eventually be needed */ 5174 5175 if (cfile) 5176 cfile->swapfile = false; 5177 5178 /* do we need to unpin (or unlock) the file */ 5179 } 5180 5181 /* 5182 * Mark a page as having been made dirty and thus needing writeback. We also 5183 * need to pin the cache object to write back to. 5184 */ 5185 #ifdef CONFIG_CIFS_FSCACHE 5186 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio) 5187 { 5188 return fscache_dirty_folio(mapping, folio, 5189 cifs_inode_cookie(mapping->host)); 5190 } 5191 #else 5192 #define cifs_dirty_folio filemap_dirty_folio 5193 #endif 5194 5195 const struct address_space_operations cifs_addr_ops = { 5196 .read_folio = cifs_read_folio, 5197 .readahead = cifs_readahead, 5198 .writepages = cifs_writepages, 5199 .write_begin = cifs_write_begin, 5200 .write_end = cifs_write_end, 5201 .dirty_folio = cifs_dirty_folio, 5202 .release_folio = cifs_release_folio, 5203 .direct_IO = cifs_direct_io, 5204 .invalidate_folio = cifs_invalidate_folio, 5205 .launder_folio = cifs_launder_folio, 5206 .migrate_folio = filemap_migrate_folio, 5207 /* 5208 * TODO: investigate and if useful we could add an is_dirty_writeback 5209 * helper if needed 5210 */ 5211 .swap_activate = cifs_swap_activate, 5212 .swap_deactivate = cifs_swap_deactivate, 5213 }; 5214 5215 /* 5216 * cifs_readahead requires the server to support a buffer large enough to 5217 * contain the header plus one complete page of data. Otherwise, we need 5218 * to leave cifs_readahead out of the address space operations. 5219 */ 5220 const struct address_space_operations cifs_addr_ops_smallbuf = { 5221 .read_folio = cifs_read_folio, 5222 .writepages = cifs_writepages, 5223 .write_begin = cifs_write_begin, 5224 .write_end = cifs_write_end, 5225 .dirty_folio = cifs_dirty_folio, 5226 .release_folio = cifs_release_folio, 5227 .invalidate_folio = cifs_invalidate_folio, 5228 .launder_folio = cifs_launder_folio, 5229 .migrate_folio = filemap_migrate_folio, 5230 }; 5231