1 // SPDX-License-Identifier: LGPL-2.1 2 /* 3 * 4 * vfs operations that deal with files 5 * 6 * Copyright (C) International Business Machines Corp., 2002,2010 7 * Author(s): Steve French (sfrench@us.ibm.com) 8 * Jeremy Allison (jra@samba.org) 9 * 10 */ 11 #include <linux/fs.h> 12 #include <linux/filelock.h> 13 #include <linux/backing-dev.h> 14 #include <linux/stat.h> 15 #include <linux/fcntl.h> 16 #include <linux/pagemap.h> 17 #include <linux/pagevec.h> 18 #include <linux/writeback.h> 19 #include <linux/task_io_accounting_ops.h> 20 #include <linux/delay.h> 21 #include <linux/mount.h> 22 #include <linux/slab.h> 23 #include <linux/swap.h> 24 #include <linux/mm.h> 25 #include <asm/div64.h> 26 #include "cifsfs.h" 27 #include "cifspdu.h" 28 #include "cifsglob.h" 29 #include "cifsproto.h" 30 #include "smb2proto.h" 31 #include "cifs_unicode.h" 32 #include "cifs_debug.h" 33 #include "cifs_fs_sb.h" 34 #include "fscache.h" 35 #include "smbdirect.h" 36 #include "fs_context.h" 37 #include "cifs_ioctl.h" 38 #include "cached_dir.h" 39 40 /* 41 * Remove the dirty flags from a span of pages. 42 */ 43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len) 44 { 45 struct address_space *mapping = inode->i_mapping; 46 struct folio *folio; 47 pgoff_t end; 48 49 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 50 51 rcu_read_lock(); 52 53 end = (start + len - 1) / PAGE_SIZE; 54 xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) { 55 if (xas_retry(&xas, folio)) 56 continue; 57 xas_pause(&xas); 58 rcu_read_unlock(); 59 folio_lock(folio); 60 folio_clear_dirty_for_io(folio); 61 folio_unlock(folio); 62 rcu_read_lock(); 63 } 64 65 rcu_read_unlock(); 66 } 67 68 /* 69 * Completion of write to server. 70 */ 71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len) 72 { 73 struct address_space *mapping = inode->i_mapping; 74 struct folio *folio; 75 pgoff_t end; 76 77 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 78 79 if (!len) 80 return; 81 82 rcu_read_lock(); 83 84 end = (start + len - 1) / PAGE_SIZE; 85 xas_for_each(&xas, folio, end) { 86 if (xas_retry(&xas, folio)) 87 continue; 88 if (!folio_test_writeback(folio)) { 89 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 90 len, start, folio->index, end); 91 continue; 92 } 93 94 folio_detach_private(folio); 95 folio_end_writeback(folio); 96 } 97 98 rcu_read_unlock(); 99 } 100 101 /* 102 * Failure of write to server. 103 */ 104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len) 105 { 106 struct address_space *mapping = inode->i_mapping; 107 struct folio *folio; 108 pgoff_t end; 109 110 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 111 112 if (!len) 113 return; 114 115 rcu_read_lock(); 116 117 end = (start + len - 1) / PAGE_SIZE; 118 xas_for_each(&xas, folio, end) { 119 if (xas_retry(&xas, folio)) 120 continue; 121 if (!folio_test_writeback(folio)) { 122 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 123 len, start, folio->index, end); 124 continue; 125 } 126 127 folio_set_error(folio); 128 folio_end_writeback(folio); 129 } 130 131 rcu_read_unlock(); 132 } 133 134 /* 135 * Redirty pages after a temporary failure. 136 */ 137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len) 138 { 139 struct address_space *mapping = inode->i_mapping; 140 struct folio *folio; 141 pgoff_t end; 142 143 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 144 145 if (!len) 146 return; 147 148 rcu_read_lock(); 149 150 end = (start + len - 1) / PAGE_SIZE; 151 xas_for_each(&xas, folio, end) { 152 if (!folio_test_writeback(folio)) { 153 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 154 len, start, folio->index, end); 155 continue; 156 } 157 158 filemap_dirty_folio(folio->mapping, folio); 159 folio_end_writeback(folio); 160 } 161 162 rcu_read_unlock(); 163 } 164 165 /* 166 * Mark as invalid, all open files on tree connections since they 167 * were closed when session to server was lost. 168 */ 169 void 170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon) 171 { 172 struct cifsFileInfo *open_file = NULL; 173 struct list_head *tmp; 174 struct list_head *tmp1; 175 176 /* only send once per connect */ 177 spin_lock(&tcon->tc_lock); 178 if (tcon->need_reconnect) 179 tcon->status = TID_NEED_RECON; 180 181 if (tcon->status != TID_NEED_RECON) { 182 spin_unlock(&tcon->tc_lock); 183 return; 184 } 185 tcon->status = TID_IN_FILES_INVALIDATE; 186 spin_unlock(&tcon->tc_lock); 187 188 /* list all files open on tree connection and mark them invalid */ 189 spin_lock(&tcon->open_file_lock); 190 list_for_each_safe(tmp, tmp1, &tcon->openFileList) { 191 open_file = list_entry(tmp, struct cifsFileInfo, tlist); 192 open_file->invalidHandle = true; 193 open_file->oplock_break_cancelled = true; 194 } 195 spin_unlock(&tcon->open_file_lock); 196 197 invalidate_all_cached_dirs(tcon); 198 spin_lock(&tcon->tc_lock); 199 if (tcon->status == TID_IN_FILES_INVALIDATE) 200 tcon->status = TID_NEED_TCON; 201 spin_unlock(&tcon->tc_lock); 202 203 /* 204 * BB Add call to invalidate_inodes(sb) for all superblocks mounted 205 * to this tcon. 206 */ 207 } 208 209 static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache) 210 { 211 if ((flags & O_ACCMODE) == O_RDONLY) 212 return GENERIC_READ; 213 else if ((flags & O_ACCMODE) == O_WRONLY) 214 return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE; 215 else if ((flags & O_ACCMODE) == O_RDWR) { 216 /* GENERIC_ALL is too much permission to request 217 can cause unnecessary access denied on create */ 218 /* return GENERIC_ALL; */ 219 return (GENERIC_READ | GENERIC_WRITE); 220 } 221 222 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES | 223 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA | 224 FILE_READ_DATA); 225 } 226 227 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 228 static u32 cifs_posix_convert_flags(unsigned int flags) 229 { 230 u32 posix_flags = 0; 231 232 if ((flags & O_ACCMODE) == O_RDONLY) 233 posix_flags = SMB_O_RDONLY; 234 else if ((flags & O_ACCMODE) == O_WRONLY) 235 posix_flags = SMB_O_WRONLY; 236 else if ((flags & O_ACCMODE) == O_RDWR) 237 posix_flags = SMB_O_RDWR; 238 239 if (flags & O_CREAT) { 240 posix_flags |= SMB_O_CREAT; 241 if (flags & O_EXCL) 242 posix_flags |= SMB_O_EXCL; 243 } else if (flags & O_EXCL) 244 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n", 245 current->comm, current->tgid); 246 247 if (flags & O_TRUNC) 248 posix_flags |= SMB_O_TRUNC; 249 /* be safe and imply O_SYNC for O_DSYNC */ 250 if (flags & O_DSYNC) 251 posix_flags |= SMB_O_SYNC; 252 if (flags & O_DIRECTORY) 253 posix_flags |= SMB_O_DIRECTORY; 254 if (flags & O_NOFOLLOW) 255 posix_flags |= SMB_O_NOFOLLOW; 256 if (flags & O_DIRECT) 257 posix_flags |= SMB_O_DIRECT; 258 259 return posix_flags; 260 } 261 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 262 263 static inline int cifs_get_disposition(unsigned int flags) 264 { 265 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) 266 return FILE_CREATE; 267 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) 268 return FILE_OVERWRITE_IF; 269 else if ((flags & O_CREAT) == O_CREAT) 270 return FILE_OPEN_IF; 271 else if ((flags & O_TRUNC) == O_TRUNC) 272 return FILE_OVERWRITE; 273 else 274 return FILE_OPEN; 275 } 276 277 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 278 int cifs_posix_open(const char *full_path, struct inode **pinode, 279 struct super_block *sb, int mode, unsigned int f_flags, 280 __u32 *poplock, __u16 *pnetfid, unsigned int xid) 281 { 282 int rc; 283 FILE_UNIX_BASIC_INFO *presp_data; 284 __u32 posix_flags = 0; 285 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 286 struct cifs_fattr fattr; 287 struct tcon_link *tlink; 288 struct cifs_tcon *tcon; 289 290 cifs_dbg(FYI, "posix open %s\n", full_path); 291 292 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); 293 if (presp_data == NULL) 294 return -ENOMEM; 295 296 tlink = cifs_sb_tlink(cifs_sb); 297 if (IS_ERR(tlink)) { 298 rc = PTR_ERR(tlink); 299 goto posix_open_ret; 300 } 301 302 tcon = tlink_tcon(tlink); 303 mode &= ~current_umask(); 304 305 posix_flags = cifs_posix_convert_flags(f_flags); 306 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data, 307 poplock, full_path, cifs_sb->local_nls, 308 cifs_remap(cifs_sb)); 309 cifs_put_tlink(tlink); 310 311 if (rc) 312 goto posix_open_ret; 313 314 if (presp_data->Type == cpu_to_le32(-1)) 315 goto posix_open_ret; /* open ok, caller does qpathinfo */ 316 317 if (!pinode) 318 goto posix_open_ret; /* caller does not need info */ 319 320 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb); 321 322 /* get new inode and set it up */ 323 if (*pinode == NULL) { 324 cifs_fill_uniqueid(sb, &fattr); 325 *pinode = cifs_iget(sb, &fattr); 326 if (!*pinode) { 327 rc = -ENOMEM; 328 goto posix_open_ret; 329 } 330 } else { 331 cifs_revalidate_mapping(*pinode); 332 rc = cifs_fattr_to_inode(*pinode, &fattr, false); 333 } 334 335 posix_open_ret: 336 kfree(presp_data); 337 return rc; 338 } 339 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 340 341 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, 342 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock, 343 struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf) 344 { 345 int rc; 346 int desired_access; 347 int disposition; 348 int create_options = CREATE_NOT_DIR; 349 struct TCP_Server_Info *server = tcon->ses->server; 350 struct cifs_open_parms oparms; 351 int rdwr_for_fscache = 0; 352 353 if (!server->ops->open) 354 return -ENOSYS; 355 356 /* If we're caching, we need to be able to fill in around partial writes. */ 357 if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY) 358 rdwr_for_fscache = 1; 359 360 desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache); 361 362 /********************************************************************* 363 * open flag mapping table: 364 * 365 * POSIX Flag CIFS Disposition 366 * ---------- ---------------- 367 * O_CREAT FILE_OPEN_IF 368 * O_CREAT | O_EXCL FILE_CREATE 369 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF 370 * O_TRUNC FILE_OVERWRITE 371 * none of the above FILE_OPEN 372 * 373 * Note that there is not a direct match between disposition 374 * FILE_SUPERSEDE (ie create whether or not file exists although 375 * O_CREAT | O_TRUNC is similar but truncates the existing 376 * file rather than creating a new file as FILE_SUPERSEDE does 377 * (which uses the attributes / metadata passed in on open call) 378 *? 379 *? O_SYNC is a reasonable match to CIFS writethrough flag 380 *? and the read write flags match reasonably. O_LARGEFILE 381 *? is irrelevant because largefile support is always used 382 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY, 383 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation 384 *********************************************************************/ 385 386 disposition = cifs_get_disposition(f_flags); 387 388 /* BB pass O_SYNC flag through on file attributes .. BB */ 389 390 /* O_SYNC also has bit for O_DSYNC so following check picks up either */ 391 if (f_flags & O_SYNC) 392 create_options |= CREATE_WRITE_THROUGH; 393 394 if (f_flags & O_DIRECT) 395 create_options |= CREATE_NO_BUFFER; 396 397 retry_open: 398 oparms = (struct cifs_open_parms) { 399 .tcon = tcon, 400 .cifs_sb = cifs_sb, 401 .desired_access = desired_access, 402 .create_options = cifs_create_options(cifs_sb, create_options), 403 .disposition = disposition, 404 .path = full_path, 405 .fid = fid, 406 }; 407 408 rc = server->ops->open(xid, &oparms, oplock, buf); 409 if (rc) { 410 if (rc == -EACCES && rdwr_for_fscache == 1) { 411 desired_access = cifs_convert_flags(f_flags, 0); 412 rdwr_for_fscache = 2; 413 goto retry_open; 414 } 415 return rc; 416 } 417 if (rdwr_for_fscache == 2) 418 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); 419 420 /* TODO: Add support for calling posix query info but with passing in fid */ 421 if (tcon->unix_ext) 422 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb, 423 xid); 424 else 425 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, 426 xid, fid); 427 428 if (rc) { 429 server->ops->close(xid, tcon, fid); 430 if (rc == -ESTALE) 431 rc = -EOPENSTALE; 432 } 433 434 return rc; 435 } 436 437 static bool 438 cifs_has_mand_locks(struct cifsInodeInfo *cinode) 439 { 440 struct cifs_fid_locks *cur; 441 bool has_locks = false; 442 443 down_read(&cinode->lock_sem); 444 list_for_each_entry(cur, &cinode->llist, llist) { 445 if (!list_empty(&cur->locks)) { 446 has_locks = true; 447 break; 448 } 449 } 450 up_read(&cinode->lock_sem); 451 return has_locks; 452 } 453 454 void 455 cifs_down_write(struct rw_semaphore *sem) 456 { 457 while (!down_write_trylock(sem)) 458 msleep(10); 459 } 460 461 static void cifsFileInfo_put_work(struct work_struct *work); 462 void serverclose_work(struct work_struct *work); 463 464 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, 465 struct tcon_link *tlink, __u32 oplock, 466 const char *symlink_target) 467 { 468 struct dentry *dentry = file_dentry(file); 469 struct inode *inode = d_inode(dentry); 470 struct cifsInodeInfo *cinode = CIFS_I(inode); 471 struct cifsFileInfo *cfile; 472 struct cifs_fid_locks *fdlocks; 473 struct cifs_tcon *tcon = tlink_tcon(tlink); 474 struct TCP_Server_Info *server = tcon->ses->server; 475 476 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); 477 if (cfile == NULL) 478 return cfile; 479 480 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL); 481 if (!fdlocks) { 482 kfree(cfile); 483 return NULL; 484 } 485 486 if (symlink_target) { 487 cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL); 488 if (!cfile->symlink_target) { 489 kfree(fdlocks); 490 kfree(cfile); 491 return NULL; 492 } 493 } 494 495 INIT_LIST_HEAD(&fdlocks->locks); 496 fdlocks->cfile = cfile; 497 cfile->llist = fdlocks; 498 499 cfile->count = 1; 500 cfile->pid = current->tgid; 501 cfile->uid = current_fsuid(); 502 cfile->dentry = dget(dentry); 503 cfile->f_flags = file->f_flags; 504 cfile->invalidHandle = false; 505 cfile->deferred_close_scheduled = false; 506 cfile->tlink = cifs_get_tlink(tlink); 507 INIT_WORK(&cfile->oplock_break, cifs_oplock_break); 508 INIT_WORK(&cfile->put, cifsFileInfo_put_work); 509 INIT_WORK(&cfile->serverclose, serverclose_work); 510 INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close); 511 mutex_init(&cfile->fh_mutex); 512 spin_lock_init(&cfile->file_info_lock); 513 514 cifs_sb_active(inode->i_sb); 515 516 /* 517 * If the server returned a read oplock and we have mandatory brlocks, 518 * set oplock level to None. 519 */ 520 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) { 521 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); 522 oplock = 0; 523 } 524 525 cifs_down_write(&cinode->lock_sem); 526 list_add(&fdlocks->llist, &cinode->llist); 527 up_write(&cinode->lock_sem); 528 529 spin_lock(&tcon->open_file_lock); 530 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock) 531 oplock = fid->pending_open->oplock; 532 list_del(&fid->pending_open->olist); 533 534 fid->purge_cache = false; 535 server->ops->set_fid(cfile, fid, oplock); 536 537 list_add(&cfile->tlist, &tcon->openFileList); 538 atomic_inc(&tcon->num_local_opens); 539 540 /* if readable file instance put first in list*/ 541 spin_lock(&cinode->open_file_lock); 542 if (file->f_mode & FMODE_READ) 543 list_add(&cfile->flist, &cinode->openFileList); 544 else 545 list_add_tail(&cfile->flist, &cinode->openFileList); 546 spin_unlock(&cinode->open_file_lock); 547 spin_unlock(&tcon->open_file_lock); 548 549 if (fid->purge_cache) 550 cifs_zap_mapping(inode); 551 552 file->private_data = cfile; 553 return cfile; 554 } 555 556 struct cifsFileInfo * 557 cifsFileInfo_get(struct cifsFileInfo *cifs_file) 558 { 559 spin_lock(&cifs_file->file_info_lock); 560 cifsFileInfo_get_locked(cifs_file); 561 spin_unlock(&cifs_file->file_info_lock); 562 return cifs_file; 563 } 564 565 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file) 566 { 567 struct inode *inode = d_inode(cifs_file->dentry); 568 struct cifsInodeInfo *cifsi = CIFS_I(inode); 569 struct cifsLockInfo *li, *tmp; 570 struct super_block *sb = inode->i_sb; 571 572 /* 573 * Delete any outstanding lock records. We'll lose them when the file 574 * is closed anyway. 575 */ 576 cifs_down_write(&cifsi->lock_sem); 577 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) { 578 list_del(&li->llist); 579 cifs_del_lock_waiters(li); 580 kfree(li); 581 } 582 list_del(&cifs_file->llist->llist); 583 kfree(cifs_file->llist); 584 up_write(&cifsi->lock_sem); 585 586 cifs_put_tlink(cifs_file->tlink); 587 dput(cifs_file->dentry); 588 cifs_sb_deactive(sb); 589 kfree(cifs_file->symlink_target); 590 kfree(cifs_file); 591 } 592 593 static void cifsFileInfo_put_work(struct work_struct *work) 594 { 595 struct cifsFileInfo *cifs_file = container_of(work, 596 struct cifsFileInfo, put); 597 598 cifsFileInfo_put_final(cifs_file); 599 } 600 601 void serverclose_work(struct work_struct *work) 602 { 603 struct cifsFileInfo *cifs_file = container_of(work, 604 struct cifsFileInfo, serverclose); 605 606 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); 607 608 struct TCP_Server_Info *server = tcon->ses->server; 609 int rc = 0; 610 int retries = 0; 611 int MAX_RETRIES = 4; 612 613 do { 614 if (server->ops->close_getattr) 615 rc = server->ops->close_getattr(0, tcon, cifs_file); 616 else if (server->ops->close) 617 rc = server->ops->close(0, tcon, &cifs_file->fid); 618 619 if (rc == -EBUSY || rc == -EAGAIN) { 620 retries++; 621 msleep(250); 622 } 623 } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES) 624 ); 625 626 if (retries == MAX_RETRIES) 627 pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES); 628 629 if (cifs_file->offload) 630 queue_work(fileinfo_put_wq, &cifs_file->put); 631 else 632 cifsFileInfo_put_final(cifs_file); 633 } 634 635 /** 636 * cifsFileInfo_put - release a reference of file priv data 637 * 638 * Always potentially wait for oplock handler. See _cifsFileInfo_put(). 639 * 640 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file 641 */ 642 void cifsFileInfo_put(struct cifsFileInfo *cifs_file) 643 { 644 _cifsFileInfo_put(cifs_file, true, true); 645 } 646 647 /** 648 * _cifsFileInfo_put - release a reference of file priv data 649 * 650 * This may involve closing the filehandle @cifs_file out on the 651 * server. Must be called without holding tcon->open_file_lock, 652 * cinode->open_file_lock and cifs_file->file_info_lock. 653 * 654 * If @wait_for_oplock_handler is true and we are releasing the last 655 * reference, wait for any running oplock break handler of the file 656 * and cancel any pending one. 657 * 658 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file 659 * @wait_oplock_handler: must be false if called from oplock_break_handler 660 * @offload: not offloaded on close and oplock breaks 661 * 662 */ 663 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, 664 bool wait_oplock_handler, bool offload) 665 { 666 struct inode *inode = d_inode(cifs_file->dentry); 667 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); 668 struct TCP_Server_Info *server = tcon->ses->server; 669 struct cifsInodeInfo *cifsi = CIFS_I(inode); 670 struct super_block *sb = inode->i_sb; 671 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 672 struct cifs_fid fid = {}; 673 struct cifs_pending_open open; 674 bool oplock_break_cancelled; 675 bool serverclose_offloaded = false; 676 677 spin_lock(&tcon->open_file_lock); 678 spin_lock(&cifsi->open_file_lock); 679 spin_lock(&cifs_file->file_info_lock); 680 681 cifs_file->offload = offload; 682 if (--cifs_file->count > 0) { 683 spin_unlock(&cifs_file->file_info_lock); 684 spin_unlock(&cifsi->open_file_lock); 685 spin_unlock(&tcon->open_file_lock); 686 return; 687 } 688 spin_unlock(&cifs_file->file_info_lock); 689 690 if (server->ops->get_lease_key) 691 server->ops->get_lease_key(inode, &fid); 692 693 /* store open in pending opens to make sure we don't miss lease break */ 694 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open); 695 696 /* remove it from the lists */ 697 list_del(&cifs_file->flist); 698 list_del(&cifs_file->tlist); 699 atomic_dec(&tcon->num_local_opens); 700 701 if (list_empty(&cifsi->openFileList)) { 702 cifs_dbg(FYI, "closing last open instance for inode %p\n", 703 d_inode(cifs_file->dentry)); 704 /* 705 * In strict cache mode we need invalidate mapping on the last 706 * close because it may cause a error when we open this file 707 * again and get at least level II oplock. 708 */ 709 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) 710 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags); 711 cifs_set_oplock_level(cifsi, 0); 712 } 713 714 spin_unlock(&cifsi->open_file_lock); 715 spin_unlock(&tcon->open_file_lock); 716 717 oplock_break_cancelled = wait_oplock_handler ? 718 cancel_work_sync(&cifs_file->oplock_break) : false; 719 720 if (!tcon->need_reconnect && !cifs_file->invalidHandle) { 721 struct TCP_Server_Info *server = tcon->ses->server; 722 unsigned int xid; 723 int rc = 0; 724 725 xid = get_xid(); 726 if (server->ops->close_getattr) 727 rc = server->ops->close_getattr(xid, tcon, cifs_file); 728 else if (server->ops->close) 729 rc = server->ops->close(xid, tcon, &cifs_file->fid); 730 _free_xid(xid); 731 732 if (rc == -EBUSY || rc == -EAGAIN) { 733 // Server close failed, hence offloading it as an async op 734 queue_work(serverclose_wq, &cifs_file->serverclose); 735 serverclose_offloaded = true; 736 } 737 } 738 739 if (oplock_break_cancelled) 740 cifs_done_oplock_break(cifsi); 741 742 cifs_del_pending_open(&open); 743 744 // if serverclose has been offloaded to wq (on failure), it will 745 // handle offloading put as well. If serverclose not offloaded, 746 // we need to handle offloading put here. 747 if (!serverclose_offloaded) { 748 if (offload) 749 queue_work(fileinfo_put_wq, &cifs_file->put); 750 else 751 cifsFileInfo_put_final(cifs_file); 752 } 753 } 754 755 int cifs_open(struct inode *inode, struct file *file) 756 757 { 758 int rc = -EACCES; 759 unsigned int xid; 760 __u32 oplock; 761 struct cifs_sb_info *cifs_sb; 762 struct TCP_Server_Info *server; 763 struct cifs_tcon *tcon; 764 struct tcon_link *tlink; 765 struct cifsFileInfo *cfile = NULL; 766 void *page; 767 const char *full_path; 768 bool posix_open_ok = false; 769 struct cifs_fid fid = {}; 770 struct cifs_pending_open open; 771 struct cifs_open_info_data data = {}; 772 773 xid = get_xid(); 774 775 cifs_sb = CIFS_SB(inode->i_sb); 776 if (unlikely(cifs_forced_shutdown(cifs_sb))) { 777 free_xid(xid); 778 return -EIO; 779 } 780 781 tlink = cifs_sb_tlink(cifs_sb); 782 if (IS_ERR(tlink)) { 783 free_xid(xid); 784 return PTR_ERR(tlink); 785 } 786 tcon = tlink_tcon(tlink); 787 server = tcon->ses->server; 788 789 page = alloc_dentry_path(); 790 full_path = build_path_from_dentry(file_dentry(file), page); 791 if (IS_ERR(full_path)) { 792 rc = PTR_ERR(full_path); 793 goto out; 794 } 795 796 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n", 797 inode, file->f_flags, full_path); 798 799 if (file->f_flags & O_DIRECT && 800 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { 801 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) 802 file->f_op = &cifs_file_direct_nobrl_ops; 803 else 804 file->f_op = &cifs_file_direct_ops; 805 } 806 807 /* Get the cached handle as SMB2 close is deferred */ 808 rc = cifs_get_readable_path(tcon, full_path, &cfile); 809 if (rc == 0) { 810 if (file->f_flags == cfile->f_flags) { 811 file->private_data = cfile; 812 spin_lock(&CIFS_I(inode)->deferred_lock); 813 cifs_del_deferred_close(cfile); 814 spin_unlock(&CIFS_I(inode)->deferred_lock); 815 goto use_cache; 816 } else { 817 _cifsFileInfo_put(cfile, true, false); 818 } 819 } 820 821 if (server->oplocks) 822 oplock = REQ_OPLOCK; 823 else 824 oplock = 0; 825 826 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 827 if (!tcon->broken_posix_open && tcon->unix_ext && 828 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & 829 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 830 /* can not refresh inode info since size could be stale */ 831 rc = cifs_posix_open(full_path, &inode, inode->i_sb, 832 cifs_sb->ctx->file_mode /* ignored */, 833 file->f_flags, &oplock, &fid.netfid, xid); 834 if (rc == 0) { 835 cifs_dbg(FYI, "posix open succeeded\n"); 836 posix_open_ok = true; 837 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 838 if (tcon->ses->serverNOS) 839 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n", 840 tcon->ses->ip_addr, 841 tcon->ses->serverNOS); 842 tcon->broken_posix_open = true; 843 } else if ((rc != -EIO) && (rc != -EREMOTE) && 844 (rc != -EOPNOTSUPP)) /* path not found or net err */ 845 goto out; 846 /* 847 * Else fallthrough to retry open the old way on network i/o 848 * or DFS errors. 849 */ 850 } 851 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 852 853 if (server->ops->get_lease_key) 854 server->ops->get_lease_key(inode, &fid); 855 856 cifs_add_pending_open(&fid, tlink, &open); 857 858 if (!posix_open_ok) { 859 if (server->ops->get_lease_key) 860 server->ops->get_lease_key(inode, &fid); 861 862 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid, 863 xid, &data); 864 if (rc) { 865 cifs_del_pending_open(&open); 866 goto out; 867 } 868 } 869 870 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target); 871 if (cfile == NULL) { 872 if (server->ops->close) 873 server->ops->close(xid, tcon, &fid); 874 cifs_del_pending_open(&open); 875 rc = -ENOMEM; 876 goto out; 877 } 878 879 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 880 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) { 881 /* 882 * Time to set mode which we can not set earlier due to 883 * problems creating new read-only files. 884 */ 885 struct cifs_unix_set_info_args args = { 886 .mode = inode->i_mode, 887 .uid = INVALID_UID, /* no change */ 888 .gid = INVALID_GID, /* no change */ 889 .ctime = NO_CHANGE_64, 890 .atime = NO_CHANGE_64, 891 .mtime = NO_CHANGE_64, 892 .device = 0, 893 }; 894 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid, 895 cfile->pid); 896 } 897 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 898 899 use_cache: 900 fscache_use_cookie(cifs_inode_cookie(file_inode(file)), 901 file->f_mode & FMODE_WRITE); 902 if (!(file->f_flags & O_DIRECT)) 903 goto out; 904 if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY) 905 goto out; 906 cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE); 907 908 out: 909 free_dentry_path(page); 910 free_xid(xid); 911 cifs_put_tlink(tlink); 912 cifs_free_open_info(&data); 913 return rc; 914 } 915 916 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 917 static int cifs_push_posix_locks(struct cifsFileInfo *cfile); 918 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 919 920 /* 921 * Try to reacquire byte range locks that were released when session 922 * to server was lost. 923 */ 924 static int 925 cifs_relock_file(struct cifsFileInfo *cfile) 926 { 927 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 928 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 929 int rc = 0; 930 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 931 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 932 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 933 934 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING); 935 if (cinode->can_cache_brlcks) { 936 /* can cache locks - no need to relock */ 937 up_read(&cinode->lock_sem); 938 return rc; 939 } 940 941 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 942 if (cap_unix(tcon->ses) && 943 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 944 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 945 rc = cifs_push_posix_locks(cfile); 946 else 947 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 948 rc = tcon->ses->server->ops->push_mand_locks(cfile); 949 950 up_read(&cinode->lock_sem); 951 return rc; 952 } 953 954 static int 955 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) 956 { 957 int rc = -EACCES; 958 unsigned int xid; 959 __u32 oplock; 960 struct cifs_sb_info *cifs_sb; 961 struct cifs_tcon *tcon; 962 struct TCP_Server_Info *server; 963 struct cifsInodeInfo *cinode; 964 struct inode *inode; 965 void *page; 966 const char *full_path; 967 int desired_access; 968 int disposition = FILE_OPEN; 969 int create_options = CREATE_NOT_DIR; 970 struct cifs_open_parms oparms; 971 int rdwr_for_fscache = 0; 972 973 xid = get_xid(); 974 mutex_lock(&cfile->fh_mutex); 975 if (!cfile->invalidHandle) { 976 mutex_unlock(&cfile->fh_mutex); 977 free_xid(xid); 978 return 0; 979 } 980 981 inode = d_inode(cfile->dentry); 982 cifs_sb = CIFS_SB(inode->i_sb); 983 tcon = tlink_tcon(cfile->tlink); 984 server = tcon->ses->server; 985 986 /* 987 * Can not grab rename sem here because various ops, including those 988 * that already have the rename sem can end up causing writepage to get 989 * called and if the server was down that means we end up here, and we 990 * can never tell if the caller already has the rename_sem. 991 */ 992 page = alloc_dentry_path(); 993 full_path = build_path_from_dentry(cfile->dentry, page); 994 if (IS_ERR(full_path)) { 995 mutex_unlock(&cfile->fh_mutex); 996 free_dentry_path(page); 997 free_xid(xid); 998 return PTR_ERR(full_path); 999 } 1000 1001 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n", 1002 inode, cfile->f_flags, full_path); 1003 1004 if (tcon->ses->server->oplocks) 1005 oplock = REQ_OPLOCK; 1006 else 1007 oplock = 0; 1008 1009 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1010 if (tcon->unix_ext && cap_unix(tcon->ses) && 1011 (CIFS_UNIX_POSIX_PATH_OPS_CAP & 1012 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 1013 /* 1014 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the 1015 * original open. Must mask them off for a reopen. 1016 */ 1017 unsigned int oflags = cfile->f_flags & 1018 ~(O_CREAT | O_EXCL | O_TRUNC); 1019 1020 rc = cifs_posix_open(full_path, NULL, inode->i_sb, 1021 cifs_sb->ctx->file_mode /* ignored */, 1022 oflags, &oplock, &cfile->fid.netfid, xid); 1023 if (rc == 0) { 1024 cifs_dbg(FYI, "posix reopen succeeded\n"); 1025 oparms.reconnect = true; 1026 goto reopen_success; 1027 } 1028 /* 1029 * fallthrough to retry open the old way on errors, especially 1030 * in the reconnect path it is important to retry hard 1031 */ 1032 } 1033 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1034 1035 /* If we're caching, we need to be able to fill in around partial writes. */ 1036 if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY) 1037 rdwr_for_fscache = 1; 1038 1039 desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache); 1040 1041 /* O_SYNC also has bit for O_DSYNC so following check picks up either */ 1042 if (cfile->f_flags & O_SYNC) 1043 create_options |= CREATE_WRITE_THROUGH; 1044 1045 if (cfile->f_flags & O_DIRECT) 1046 create_options |= CREATE_NO_BUFFER; 1047 1048 if (server->ops->get_lease_key) 1049 server->ops->get_lease_key(inode, &cfile->fid); 1050 1051 retry_open: 1052 oparms = (struct cifs_open_parms) { 1053 .tcon = tcon, 1054 .cifs_sb = cifs_sb, 1055 .desired_access = desired_access, 1056 .create_options = cifs_create_options(cifs_sb, create_options), 1057 .disposition = disposition, 1058 .path = full_path, 1059 .fid = &cfile->fid, 1060 .reconnect = true, 1061 }; 1062 1063 /* 1064 * Can not refresh inode by passing in file_info buf to be returned by 1065 * ops->open and then calling get_inode_info with returned buf since 1066 * file might have write behind data that needs to be flushed and server 1067 * version of file size can be stale. If we knew for sure that inode was 1068 * not dirty locally we could do this. 1069 */ 1070 rc = server->ops->open(xid, &oparms, &oplock, NULL); 1071 if (rc == -ENOENT && oparms.reconnect == false) { 1072 /* durable handle timeout is expired - open the file again */ 1073 rc = server->ops->open(xid, &oparms, &oplock, NULL); 1074 /* indicate that we need to relock the file */ 1075 oparms.reconnect = true; 1076 } 1077 if (rc == -EACCES && rdwr_for_fscache == 1) { 1078 desired_access = cifs_convert_flags(cfile->f_flags, 0); 1079 rdwr_for_fscache = 2; 1080 goto retry_open; 1081 } 1082 1083 if (rc) { 1084 mutex_unlock(&cfile->fh_mutex); 1085 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc); 1086 cifs_dbg(FYI, "oplock: %d\n", oplock); 1087 goto reopen_error_exit; 1088 } 1089 1090 if (rdwr_for_fscache == 2) 1091 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); 1092 1093 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1094 reopen_success: 1095 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1096 cfile->invalidHandle = false; 1097 mutex_unlock(&cfile->fh_mutex); 1098 cinode = CIFS_I(inode); 1099 1100 if (can_flush) { 1101 rc = filemap_write_and_wait(inode->i_mapping); 1102 if (!is_interrupt_error(rc)) 1103 mapping_set_error(inode->i_mapping, rc); 1104 1105 if (tcon->posix_extensions) { 1106 rc = smb311_posix_get_inode_info(&inode, full_path, 1107 NULL, inode->i_sb, xid); 1108 } else if (tcon->unix_ext) { 1109 rc = cifs_get_inode_info_unix(&inode, full_path, 1110 inode->i_sb, xid); 1111 } else { 1112 rc = cifs_get_inode_info(&inode, full_path, NULL, 1113 inode->i_sb, xid, NULL); 1114 } 1115 } 1116 /* 1117 * Else we are writing out data to server already and could deadlock if 1118 * we tried to flush data, and since we do not know if we have data that 1119 * would invalidate the current end of file on the server we can not go 1120 * to the server to get the new inode info. 1121 */ 1122 1123 /* 1124 * If the server returned a read oplock and we have mandatory brlocks, 1125 * set oplock level to None. 1126 */ 1127 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) { 1128 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); 1129 oplock = 0; 1130 } 1131 1132 server->ops->set_fid(cfile, &cfile->fid, oplock); 1133 if (oparms.reconnect) 1134 cifs_relock_file(cfile); 1135 1136 reopen_error_exit: 1137 free_dentry_path(page); 1138 free_xid(xid); 1139 return rc; 1140 } 1141 1142 void smb2_deferred_work_close(struct work_struct *work) 1143 { 1144 struct cifsFileInfo *cfile = container_of(work, 1145 struct cifsFileInfo, deferred.work); 1146 1147 spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); 1148 cifs_del_deferred_close(cfile); 1149 cfile->deferred_close_scheduled = false; 1150 spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); 1151 _cifsFileInfo_put(cfile, true, false); 1152 } 1153 1154 int cifs_close(struct inode *inode, struct file *file) 1155 { 1156 struct cifsFileInfo *cfile; 1157 struct cifsInodeInfo *cinode = CIFS_I(inode); 1158 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1159 struct cifs_deferred_close *dclose; 1160 1161 cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE); 1162 1163 if (file->private_data != NULL) { 1164 cfile = file->private_data; 1165 file->private_data = NULL; 1166 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL); 1167 if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG) 1168 && cinode->lease_granted && 1169 !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) && 1170 dclose) { 1171 if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) { 1172 inode_set_mtime_to_ts(inode, 1173 inode_set_ctime_current(inode)); 1174 } 1175 spin_lock(&cinode->deferred_lock); 1176 cifs_add_deferred_close(cfile, dclose); 1177 if (cfile->deferred_close_scheduled && 1178 delayed_work_pending(&cfile->deferred)) { 1179 /* 1180 * If there is no pending work, mod_delayed_work queues new work. 1181 * So, Increase the ref count to avoid use-after-free. 1182 */ 1183 if (!mod_delayed_work(deferredclose_wq, 1184 &cfile->deferred, cifs_sb->ctx->closetimeo)) 1185 cifsFileInfo_get(cfile); 1186 } else { 1187 /* Deferred close for files */ 1188 queue_delayed_work(deferredclose_wq, 1189 &cfile->deferred, cifs_sb->ctx->closetimeo); 1190 cfile->deferred_close_scheduled = true; 1191 spin_unlock(&cinode->deferred_lock); 1192 return 0; 1193 } 1194 spin_unlock(&cinode->deferred_lock); 1195 _cifsFileInfo_put(cfile, true, false); 1196 } else { 1197 _cifsFileInfo_put(cfile, true, false); 1198 kfree(dclose); 1199 } 1200 } 1201 1202 /* return code from the ->release op is always ignored */ 1203 return 0; 1204 } 1205 1206 void 1207 cifs_reopen_persistent_handles(struct cifs_tcon *tcon) 1208 { 1209 struct cifsFileInfo *open_file, *tmp; 1210 struct list_head tmp_list; 1211 1212 if (!tcon->use_persistent || !tcon->need_reopen_files) 1213 return; 1214 1215 tcon->need_reopen_files = false; 1216 1217 cifs_dbg(FYI, "Reopen persistent handles\n"); 1218 INIT_LIST_HEAD(&tmp_list); 1219 1220 /* list all files open on tree connection, reopen resilient handles */ 1221 spin_lock(&tcon->open_file_lock); 1222 list_for_each_entry(open_file, &tcon->openFileList, tlist) { 1223 if (!open_file->invalidHandle) 1224 continue; 1225 cifsFileInfo_get(open_file); 1226 list_add_tail(&open_file->rlist, &tmp_list); 1227 } 1228 spin_unlock(&tcon->open_file_lock); 1229 1230 list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) { 1231 if (cifs_reopen_file(open_file, false /* do not flush */)) 1232 tcon->need_reopen_files = true; 1233 list_del_init(&open_file->rlist); 1234 cifsFileInfo_put(open_file); 1235 } 1236 } 1237 1238 int cifs_closedir(struct inode *inode, struct file *file) 1239 { 1240 int rc = 0; 1241 unsigned int xid; 1242 struct cifsFileInfo *cfile = file->private_data; 1243 struct cifs_tcon *tcon; 1244 struct TCP_Server_Info *server; 1245 char *buf; 1246 1247 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode); 1248 1249 if (cfile == NULL) 1250 return rc; 1251 1252 xid = get_xid(); 1253 tcon = tlink_tcon(cfile->tlink); 1254 server = tcon->ses->server; 1255 1256 cifs_dbg(FYI, "Freeing private data in close dir\n"); 1257 spin_lock(&cfile->file_info_lock); 1258 if (server->ops->dir_needs_close(cfile)) { 1259 cfile->invalidHandle = true; 1260 spin_unlock(&cfile->file_info_lock); 1261 if (server->ops->close_dir) 1262 rc = server->ops->close_dir(xid, tcon, &cfile->fid); 1263 else 1264 rc = -ENOSYS; 1265 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc); 1266 /* not much we can do if it fails anyway, ignore rc */ 1267 rc = 0; 1268 } else 1269 spin_unlock(&cfile->file_info_lock); 1270 1271 buf = cfile->srch_inf.ntwrk_buf_start; 1272 if (buf) { 1273 cifs_dbg(FYI, "closedir free smb buf in srch struct\n"); 1274 cfile->srch_inf.ntwrk_buf_start = NULL; 1275 if (cfile->srch_inf.smallBuf) 1276 cifs_small_buf_release(buf); 1277 else 1278 cifs_buf_release(buf); 1279 } 1280 1281 cifs_put_tlink(cfile->tlink); 1282 kfree(file->private_data); 1283 file->private_data = NULL; 1284 /* BB can we lock the filestruct while this is going on? */ 1285 free_xid(xid); 1286 return rc; 1287 } 1288 1289 static struct cifsLockInfo * 1290 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags) 1291 { 1292 struct cifsLockInfo *lock = 1293 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL); 1294 if (!lock) 1295 return lock; 1296 lock->offset = offset; 1297 lock->length = length; 1298 lock->type = type; 1299 lock->pid = current->tgid; 1300 lock->flags = flags; 1301 INIT_LIST_HEAD(&lock->blist); 1302 init_waitqueue_head(&lock->block_q); 1303 return lock; 1304 } 1305 1306 void 1307 cifs_del_lock_waiters(struct cifsLockInfo *lock) 1308 { 1309 struct cifsLockInfo *li, *tmp; 1310 list_for_each_entry_safe(li, tmp, &lock->blist, blist) { 1311 list_del_init(&li->blist); 1312 wake_up(&li->block_q); 1313 } 1314 } 1315 1316 #define CIFS_LOCK_OP 0 1317 #define CIFS_READ_OP 1 1318 #define CIFS_WRITE_OP 2 1319 1320 /* @rw_check : 0 - no op, 1 - read, 2 - write */ 1321 static bool 1322 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset, 1323 __u64 length, __u8 type, __u16 flags, 1324 struct cifsFileInfo *cfile, 1325 struct cifsLockInfo **conf_lock, int rw_check) 1326 { 1327 struct cifsLockInfo *li; 1328 struct cifsFileInfo *cur_cfile = fdlocks->cfile; 1329 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 1330 1331 list_for_each_entry(li, &fdlocks->locks, llist) { 1332 if (offset + length <= li->offset || 1333 offset >= li->offset + li->length) 1334 continue; 1335 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid && 1336 server->ops->compare_fids(cfile, cur_cfile)) { 1337 /* shared lock prevents write op through the same fid */ 1338 if (!(li->type & server->vals->shared_lock_type) || 1339 rw_check != CIFS_WRITE_OP) 1340 continue; 1341 } 1342 if ((type & server->vals->shared_lock_type) && 1343 ((server->ops->compare_fids(cfile, cur_cfile) && 1344 current->tgid == li->pid) || type == li->type)) 1345 continue; 1346 if (rw_check == CIFS_LOCK_OP && 1347 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) && 1348 server->ops->compare_fids(cfile, cur_cfile)) 1349 continue; 1350 if (conf_lock) 1351 *conf_lock = li; 1352 return true; 1353 } 1354 return false; 1355 } 1356 1357 bool 1358 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length, 1359 __u8 type, __u16 flags, 1360 struct cifsLockInfo **conf_lock, int rw_check) 1361 { 1362 bool rc = false; 1363 struct cifs_fid_locks *cur; 1364 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1365 1366 list_for_each_entry(cur, &cinode->llist, llist) { 1367 rc = cifs_find_fid_lock_conflict(cur, offset, length, type, 1368 flags, cfile, conf_lock, 1369 rw_check); 1370 if (rc) 1371 break; 1372 } 1373 1374 return rc; 1375 } 1376 1377 /* 1378 * Check if there is another lock that prevents us to set the lock (mandatory 1379 * style). If such a lock exists, update the flock structure with its 1380 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks 1381 * or leave it the same if we can't. Returns 0 if we don't need to request to 1382 * the server or 1 otherwise. 1383 */ 1384 static int 1385 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length, 1386 __u8 type, struct file_lock *flock) 1387 { 1388 int rc = 0; 1389 struct cifsLockInfo *conf_lock; 1390 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1391 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 1392 bool exist; 1393 1394 down_read(&cinode->lock_sem); 1395 1396 exist = cifs_find_lock_conflict(cfile, offset, length, type, 1397 flock->fl_flags, &conf_lock, 1398 CIFS_LOCK_OP); 1399 if (exist) { 1400 flock->fl_start = conf_lock->offset; 1401 flock->fl_end = conf_lock->offset + conf_lock->length - 1; 1402 flock->fl_pid = conf_lock->pid; 1403 if (conf_lock->type & server->vals->shared_lock_type) 1404 flock->fl_type = F_RDLCK; 1405 else 1406 flock->fl_type = F_WRLCK; 1407 } else if (!cinode->can_cache_brlcks) 1408 rc = 1; 1409 else 1410 flock->fl_type = F_UNLCK; 1411 1412 up_read(&cinode->lock_sem); 1413 return rc; 1414 } 1415 1416 static void 1417 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock) 1418 { 1419 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1420 cifs_down_write(&cinode->lock_sem); 1421 list_add_tail(&lock->llist, &cfile->llist->locks); 1422 up_write(&cinode->lock_sem); 1423 } 1424 1425 /* 1426 * Set the byte-range lock (mandatory style). Returns: 1427 * 1) 0, if we set the lock and don't need to request to the server; 1428 * 2) 1, if no locks prevent us but we need to request to the server; 1429 * 3) -EACCES, if there is a lock that prevents us and wait is false. 1430 */ 1431 static int 1432 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock, 1433 bool wait) 1434 { 1435 struct cifsLockInfo *conf_lock; 1436 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1437 bool exist; 1438 int rc = 0; 1439 1440 try_again: 1441 exist = false; 1442 cifs_down_write(&cinode->lock_sem); 1443 1444 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, 1445 lock->type, lock->flags, &conf_lock, 1446 CIFS_LOCK_OP); 1447 if (!exist && cinode->can_cache_brlcks) { 1448 list_add_tail(&lock->llist, &cfile->llist->locks); 1449 up_write(&cinode->lock_sem); 1450 return rc; 1451 } 1452 1453 if (!exist) 1454 rc = 1; 1455 else if (!wait) 1456 rc = -EACCES; 1457 else { 1458 list_add_tail(&lock->blist, &conf_lock->blist); 1459 up_write(&cinode->lock_sem); 1460 rc = wait_event_interruptible(lock->block_q, 1461 (lock->blist.prev == &lock->blist) && 1462 (lock->blist.next == &lock->blist)); 1463 if (!rc) 1464 goto try_again; 1465 cifs_down_write(&cinode->lock_sem); 1466 list_del_init(&lock->blist); 1467 } 1468 1469 up_write(&cinode->lock_sem); 1470 return rc; 1471 } 1472 1473 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1474 /* 1475 * Check if there is another lock that prevents us to set the lock (posix 1476 * style). If such a lock exists, update the flock structure with its 1477 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks 1478 * or leave it the same if we can't. Returns 0 if we don't need to request to 1479 * the server or 1 otherwise. 1480 */ 1481 static int 1482 cifs_posix_lock_test(struct file *file, struct file_lock *flock) 1483 { 1484 int rc = 0; 1485 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); 1486 unsigned char saved_type = flock->fl_type; 1487 1488 if ((flock->fl_flags & FL_POSIX) == 0) 1489 return 1; 1490 1491 down_read(&cinode->lock_sem); 1492 posix_test_lock(file, flock); 1493 1494 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) { 1495 flock->fl_type = saved_type; 1496 rc = 1; 1497 } 1498 1499 up_read(&cinode->lock_sem); 1500 return rc; 1501 } 1502 1503 /* 1504 * Set the byte-range lock (posix style). Returns: 1505 * 1) <0, if the error occurs while setting the lock; 1506 * 2) 0, if we set the lock and don't need to request to the server; 1507 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock; 1508 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server. 1509 */ 1510 static int 1511 cifs_posix_lock_set(struct file *file, struct file_lock *flock) 1512 { 1513 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); 1514 int rc = FILE_LOCK_DEFERRED + 1; 1515 1516 if ((flock->fl_flags & FL_POSIX) == 0) 1517 return rc; 1518 1519 cifs_down_write(&cinode->lock_sem); 1520 if (!cinode->can_cache_brlcks) { 1521 up_write(&cinode->lock_sem); 1522 return rc; 1523 } 1524 1525 rc = posix_lock_file(file, flock, NULL); 1526 up_write(&cinode->lock_sem); 1527 return rc; 1528 } 1529 1530 int 1531 cifs_push_mandatory_locks(struct cifsFileInfo *cfile) 1532 { 1533 unsigned int xid; 1534 int rc = 0, stored_rc; 1535 struct cifsLockInfo *li, *tmp; 1536 struct cifs_tcon *tcon; 1537 unsigned int num, max_num, max_buf; 1538 LOCKING_ANDX_RANGE *buf, *cur; 1539 static const int types[] = { 1540 LOCKING_ANDX_LARGE_FILES, 1541 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES 1542 }; 1543 int i; 1544 1545 xid = get_xid(); 1546 tcon = tlink_tcon(cfile->tlink); 1547 1548 /* 1549 * Accessing maxBuf is racy with cifs_reconnect - need to store value 1550 * and check it before using. 1551 */ 1552 max_buf = tcon->ses->server->maxBuf; 1553 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) { 1554 free_xid(xid); 1555 return -EINVAL; 1556 } 1557 1558 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > 1559 PAGE_SIZE); 1560 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), 1561 PAGE_SIZE); 1562 max_num = (max_buf - sizeof(struct smb_hdr)) / 1563 sizeof(LOCKING_ANDX_RANGE); 1564 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 1565 if (!buf) { 1566 free_xid(xid); 1567 return -ENOMEM; 1568 } 1569 1570 for (i = 0; i < 2; i++) { 1571 cur = buf; 1572 num = 0; 1573 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { 1574 if (li->type != types[i]) 1575 continue; 1576 cur->Pid = cpu_to_le16(li->pid); 1577 cur->LengthLow = cpu_to_le32((u32)li->length); 1578 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); 1579 cur->OffsetLow = cpu_to_le32((u32)li->offset); 1580 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); 1581 if (++num == max_num) { 1582 stored_rc = cifs_lockv(xid, tcon, 1583 cfile->fid.netfid, 1584 (__u8)li->type, 0, num, 1585 buf); 1586 if (stored_rc) 1587 rc = stored_rc; 1588 cur = buf; 1589 num = 0; 1590 } else 1591 cur++; 1592 } 1593 1594 if (num) { 1595 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, 1596 (__u8)types[i], 0, num, buf); 1597 if (stored_rc) 1598 rc = stored_rc; 1599 } 1600 } 1601 1602 kfree(buf); 1603 free_xid(xid); 1604 return rc; 1605 } 1606 1607 static __u32 1608 hash_lockowner(fl_owner_t owner) 1609 { 1610 return cifs_lock_secret ^ hash32_ptr((const void *)owner); 1611 } 1612 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1613 1614 struct lock_to_push { 1615 struct list_head llist; 1616 __u64 offset; 1617 __u64 length; 1618 __u32 pid; 1619 __u16 netfid; 1620 __u8 type; 1621 }; 1622 1623 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1624 static int 1625 cifs_push_posix_locks(struct cifsFileInfo *cfile) 1626 { 1627 struct inode *inode = d_inode(cfile->dentry); 1628 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1629 struct file_lock *flock; 1630 struct file_lock_context *flctx = locks_inode_context(inode); 1631 unsigned int count = 0, i; 1632 int rc = 0, xid, type; 1633 struct list_head locks_to_send, *el; 1634 struct lock_to_push *lck, *tmp; 1635 __u64 length; 1636 1637 xid = get_xid(); 1638 1639 if (!flctx) 1640 goto out; 1641 1642 spin_lock(&flctx->flc_lock); 1643 list_for_each(el, &flctx->flc_posix) { 1644 count++; 1645 } 1646 spin_unlock(&flctx->flc_lock); 1647 1648 INIT_LIST_HEAD(&locks_to_send); 1649 1650 /* 1651 * Allocating count locks is enough because no FL_POSIX locks can be 1652 * added to the list while we are holding cinode->lock_sem that 1653 * protects locking operations of this inode. 1654 */ 1655 for (i = 0; i < count; i++) { 1656 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); 1657 if (!lck) { 1658 rc = -ENOMEM; 1659 goto err_out; 1660 } 1661 list_add_tail(&lck->llist, &locks_to_send); 1662 } 1663 1664 el = locks_to_send.next; 1665 spin_lock(&flctx->flc_lock); 1666 list_for_each_entry(flock, &flctx->flc_posix, fl_list) { 1667 if (el == &locks_to_send) { 1668 /* 1669 * The list ended. We don't have enough allocated 1670 * structures - something is really wrong. 1671 */ 1672 cifs_dbg(VFS, "Can't push all brlocks!\n"); 1673 break; 1674 } 1675 length = cifs_flock_len(flock); 1676 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK) 1677 type = CIFS_RDLCK; 1678 else 1679 type = CIFS_WRLCK; 1680 lck = list_entry(el, struct lock_to_push, llist); 1681 lck->pid = hash_lockowner(flock->fl_owner); 1682 lck->netfid = cfile->fid.netfid; 1683 lck->length = length; 1684 lck->type = type; 1685 lck->offset = flock->fl_start; 1686 } 1687 spin_unlock(&flctx->flc_lock); 1688 1689 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { 1690 int stored_rc; 1691 1692 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid, 1693 lck->offset, lck->length, NULL, 1694 lck->type, 0); 1695 if (stored_rc) 1696 rc = stored_rc; 1697 list_del(&lck->llist); 1698 kfree(lck); 1699 } 1700 1701 out: 1702 free_xid(xid); 1703 return rc; 1704 err_out: 1705 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { 1706 list_del(&lck->llist); 1707 kfree(lck); 1708 } 1709 goto out; 1710 } 1711 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1712 1713 static int 1714 cifs_push_locks(struct cifsFileInfo *cfile) 1715 { 1716 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1717 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1718 int rc = 0; 1719 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1720 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 1721 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1722 1723 /* we are going to update can_cache_brlcks here - need a write access */ 1724 cifs_down_write(&cinode->lock_sem); 1725 if (!cinode->can_cache_brlcks) { 1726 up_write(&cinode->lock_sem); 1727 return rc; 1728 } 1729 1730 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1731 if (cap_unix(tcon->ses) && 1732 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 1733 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 1734 rc = cifs_push_posix_locks(cfile); 1735 else 1736 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1737 rc = tcon->ses->server->ops->push_mand_locks(cfile); 1738 1739 cinode->can_cache_brlcks = false; 1740 up_write(&cinode->lock_sem); 1741 return rc; 1742 } 1743 1744 static void 1745 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock, 1746 bool *wait_flag, struct TCP_Server_Info *server) 1747 { 1748 if (flock->fl_flags & FL_POSIX) 1749 cifs_dbg(FYI, "Posix\n"); 1750 if (flock->fl_flags & FL_FLOCK) 1751 cifs_dbg(FYI, "Flock\n"); 1752 if (flock->fl_flags & FL_SLEEP) { 1753 cifs_dbg(FYI, "Blocking lock\n"); 1754 *wait_flag = true; 1755 } 1756 if (flock->fl_flags & FL_ACCESS) 1757 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n"); 1758 if (flock->fl_flags & FL_LEASE) 1759 cifs_dbg(FYI, "Lease on file - not implemented yet\n"); 1760 if (flock->fl_flags & 1761 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | 1762 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK))) 1763 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags); 1764 1765 *type = server->vals->large_lock_type; 1766 if (flock->fl_type == F_WRLCK) { 1767 cifs_dbg(FYI, "F_WRLCK\n"); 1768 *type |= server->vals->exclusive_lock_type; 1769 *lock = 1; 1770 } else if (flock->fl_type == F_UNLCK) { 1771 cifs_dbg(FYI, "F_UNLCK\n"); 1772 *type |= server->vals->unlock_lock_type; 1773 *unlock = 1; 1774 /* Check if unlock includes more than one lock range */ 1775 } else if (flock->fl_type == F_RDLCK) { 1776 cifs_dbg(FYI, "F_RDLCK\n"); 1777 *type |= server->vals->shared_lock_type; 1778 *lock = 1; 1779 } else if (flock->fl_type == F_EXLCK) { 1780 cifs_dbg(FYI, "F_EXLCK\n"); 1781 *type |= server->vals->exclusive_lock_type; 1782 *lock = 1; 1783 } else if (flock->fl_type == F_SHLCK) { 1784 cifs_dbg(FYI, "F_SHLCK\n"); 1785 *type |= server->vals->shared_lock_type; 1786 *lock = 1; 1787 } else 1788 cifs_dbg(FYI, "Unknown type of lock\n"); 1789 } 1790 1791 static int 1792 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, 1793 bool wait_flag, bool posix_lck, unsigned int xid) 1794 { 1795 int rc = 0; 1796 __u64 length = cifs_flock_len(flock); 1797 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 1798 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1799 struct TCP_Server_Info *server = tcon->ses->server; 1800 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1801 __u16 netfid = cfile->fid.netfid; 1802 1803 if (posix_lck) { 1804 int posix_lock_type; 1805 1806 rc = cifs_posix_lock_test(file, flock); 1807 if (!rc) 1808 return rc; 1809 1810 if (type & server->vals->shared_lock_type) 1811 posix_lock_type = CIFS_RDLCK; 1812 else 1813 posix_lock_type = CIFS_WRLCK; 1814 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1815 hash_lockowner(flock->fl_owner), 1816 flock->fl_start, length, flock, 1817 posix_lock_type, wait_flag); 1818 return rc; 1819 } 1820 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1821 1822 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock); 1823 if (!rc) 1824 return rc; 1825 1826 /* BB we could chain these into one lock request BB */ 1827 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type, 1828 1, 0, false); 1829 if (rc == 0) { 1830 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1831 type, 0, 1, false); 1832 flock->fl_type = F_UNLCK; 1833 if (rc != 0) 1834 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", 1835 rc); 1836 return 0; 1837 } 1838 1839 if (type & server->vals->shared_lock_type) { 1840 flock->fl_type = F_WRLCK; 1841 return 0; 1842 } 1843 1844 type &= ~server->vals->exclusive_lock_type; 1845 1846 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1847 type | server->vals->shared_lock_type, 1848 1, 0, false); 1849 if (rc == 0) { 1850 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1851 type | server->vals->shared_lock_type, 0, 1, false); 1852 flock->fl_type = F_RDLCK; 1853 if (rc != 0) 1854 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", 1855 rc); 1856 } else 1857 flock->fl_type = F_WRLCK; 1858 1859 return 0; 1860 } 1861 1862 void 1863 cifs_move_llist(struct list_head *source, struct list_head *dest) 1864 { 1865 struct list_head *li, *tmp; 1866 list_for_each_safe(li, tmp, source) 1867 list_move(li, dest); 1868 } 1869 1870 void 1871 cifs_free_llist(struct list_head *llist) 1872 { 1873 struct cifsLockInfo *li, *tmp; 1874 list_for_each_entry_safe(li, tmp, llist, llist) { 1875 cifs_del_lock_waiters(li); 1876 list_del(&li->llist); 1877 kfree(li); 1878 } 1879 } 1880 1881 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1882 int 1883 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, 1884 unsigned int xid) 1885 { 1886 int rc = 0, stored_rc; 1887 static const int types[] = { 1888 LOCKING_ANDX_LARGE_FILES, 1889 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES 1890 }; 1891 unsigned int i; 1892 unsigned int max_num, num, max_buf; 1893 LOCKING_ANDX_RANGE *buf, *cur; 1894 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1895 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1896 struct cifsLockInfo *li, *tmp; 1897 __u64 length = cifs_flock_len(flock); 1898 struct list_head tmp_llist; 1899 1900 INIT_LIST_HEAD(&tmp_llist); 1901 1902 /* 1903 * Accessing maxBuf is racy with cifs_reconnect - need to store value 1904 * and check it before using. 1905 */ 1906 max_buf = tcon->ses->server->maxBuf; 1907 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) 1908 return -EINVAL; 1909 1910 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > 1911 PAGE_SIZE); 1912 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), 1913 PAGE_SIZE); 1914 max_num = (max_buf - sizeof(struct smb_hdr)) / 1915 sizeof(LOCKING_ANDX_RANGE); 1916 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 1917 if (!buf) 1918 return -ENOMEM; 1919 1920 cifs_down_write(&cinode->lock_sem); 1921 for (i = 0; i < 2; i++) { 1922 cur = buf; 1923 num = 0; 1924 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { 1925 if (flock->fl_start > li->offset || 1926 (flock->fl_start + length) < 1927 (li->offset + li->length)) 1928 continue; 1929 if (current->tgid != li->pid) 1930 continue; 1931 if (types[i] != li->type) 1932 continue; 1933 if (cinode->can_cache_brlcks) { 1934 /* 1935 * We can cache brlock requests - simply remove 1936 * a lock from the file's list. 1937 */ 1938 list_del(&li->llist); 1939 cifs_del_lock_waiters(li); 1940 kfree(li); 1941 continue; 1942 } 1943 cur->Pid = cpu_to_le16(li->pid); 1944 cur->LengthLow = cpu_to_le32((u32)li->length); 1945 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); 1946 cur->OffsetLow = cpu_to_le32((u32)li->offset); 1947 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); 1948 /* 1949 * We need to save a lock here to let us add it again to 1950 * the file's list if the unlock range request fails on 1951 * the server. 1952 */ 1953 list_move(&li->llist, &tmp_llist); 1954 if (++num == max_num) { 1955 stored_rc = cifs_lockv(xid, tcon, 1956 cfile->fid.netfid, 1957 li->type, num, 0, buf); 1958 if (stored_rc) { 1959 /* 1960 * We failed on the unlock range 1961 * request - add all locks from the tmp 1962 * list to the head of the file's list. 1963 */ 1964 cifs_move_llist(&tmp_llist, 1965 &cfile->llist->locks); 1966 rc = stored_rc; 1967 } else 1968 /* 1969 * The unlock range request succeed - 1970 * free the tmp list. 1971 */ 1972 cifs_free_llist(&tmp_llist); 1973 cur = buf; 1974 num = 0; 1975 } else 1976 cur++; 1977 } 1978 if (num) { 1979 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, 1980 types[i], num, 0, buf); 1981 if (stored_rc) { 1982 cifs_move_llist(&tmp_llist, 1983 &cfile->llist->locks); 1984 rc = stored_rc; 1985 } else 1986 cifs_free_llist(&tmp_llist); 1987 } 1988 } 1989 1990 up_write(&cinode->lock_sem); 1991 kfree(buf); 1992 return rc; 1993 } 1994 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1995 1996 static int 1997 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, 1998 bool wait_flag, bool posix_lck, int lock, int unlock, 1999 unsigned int xid) 2000 { 2001 int rc = 0; 2002 __u64 length = cifs_flock_len(flock); 2003 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 2004 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 2005 struct TCP_Server_Info *server = tcon->ses->server; 2006 struct inode *inode = d_inode(cfile->dentry); 2007 2008 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 2009 if (posix_lck) { 2010 int posix_lock_type; 2011 2012 rc = cifs_posix_lock_set(file, flock); 2013 if (rc <= FILE_LOCK_DEFERRED) 2014 return rc; 2015 2016 if (type & server->vals->shared_lock_type) 2017 posix_lock_type = CIFS_RDLCK; 2018 else 2019 posix_lock_type = CIFS_WRLCK; 2020 2021 if (unlock == 1) 2022 posix_lock_type = CIFS_UNLCK; 2023 2024 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid, 2025 hash_lockowner(flock->fl_owner), 2026 flock->fl_start, length, 2027 NULL, posix_lock_type, wait_flag); 2028 goto out; 2029 } 2030 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 2031 if (lock) { 2032 struct cifsLockInfo *lock; 2033 2034 lock = cifs_lock_init(flock->fl_start, length, type, 2035 flock->fl_flags); 2036 if (!lock) 2037 return -ENOMEM; 2038 2039 rc = cifs_lock_add_if(cfile, lock, wait_flag); 2040 if (rc < 0) { 2041 kfree(lock); 2042 return rc; 2043 } 2044 if (!rc) 2045 goto out; 2046 2047 /* 2048 * Windows 7 server can delay breaking lease from read to None 2049 * if we set a byte-range lock on a file - break it explicitly 2050 * before sending the lock to the server to be sure the next 2051 * read won't conflict with non-overlapted locks due to 2052 * pagereading. 2053 */ 2054 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) && 2055 CIFS_CACHE_READ(CIFS_I(inode))) { 2056 cifs_zap_mapping(inode); 2057 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n", 2058 inode); 2059 CIFS_I(inode)->oplock = 0; 2060 } 2061 2062 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 2063 type, 1, 0, wait_flag); 2064 if (rc) { 2065 kfree(lock); 2066 return rc; 2067 } 2068 2069 cifs_lock_add(cfile, lock); 2070 } else if (unlock) 2071 rc = server->ops->mand_unlock_range(cfile, flock, xid); 2072 2073 out: 2074 if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) { 2075 /* 2076 * If this is a request to remove all locks because we 2077 * are closing the file, it doesn't matter if the 2078 * unlocking failed as both cifs.ko and the SMB server 2079 * remove the lock on file close 2080 */ 2081 if (rc) { 2082 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc); 2083 if (!(flock->fl_flags & FL_CLOSE)) 2084 return rc; 2085 } 2086 rc = locks_lock_file_wait(file, flock); 2087 } 2088 return rc; 2089 } 2090 2091 int cifs_flock(struct file *file, int cmd, struct file_lock *fl) 2092 { 2093 int rc, xid; 2094 int lock = 0, unlock = 0; 2095 bool wait_flag = false; 2096 bool posix_lck = false; 2097 struct cifs_sb_info *cifs_sb; 2098 struct cifs_tcon *tcon; 2099 struct cifsFileInfo *cfile; 2100 __u32 type; 2101 2102 xid = get_xid(); 2103 2104 if (!(fl->fl_flags & FL_FLOCK)) { 2105 rc = -ENOLCK; 2106 free_xid(xid); 2107 return rc; 2108 } 2109 2110 cfile = (struct cifsFileInfo *)file->private_data; 2111 tcon = tlink_tcon(cfile->tlink); 2112 2113 cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag, 2114 tcon->ses->server); 2115 cifs_sb = CIFS_FILE_SB(file); 2116 2117 if (cap_unix(tcon->ses) && 2118 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 2119 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2120 posix_lck = true; 2121 2122 if (!lock && !unlock) { 2123 /* 2124 * if no lock or unlock then nothing to do since we do not 2125 * know what it is 2126 */ 2127 rc = -EOPNOTSUPP; 2128 free_xid(xid); 2129 return rc; 2130 } 2131 2132 rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock, 2133 xid); 2134 free_xid(xid); 2135 return rc; 2136 2137 2138 } 2139 2140 int cifs_lock(struct file *file, int cmd, struct file_lock *flock) 2141 { 2142 int rc, xid; 2143 int lock = 0, unlock = 0; 2144 bool wait_flag = false; 2145 bool posix_lck = false; 2146 struct cifs_sb_info *cifs_sb; 2147 struct cifs_tcon *tcon; 2148 struct cifsFileInfo *cfile; 2149 __u32 type; 2150 2151 rc = -EACCES; 2152 xid = get_xid(); 2153 2154 cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd, 2155 flock->fl_flags, flock->fl_type, (long long)flock->fl_start, 2156 (long long)flock->fl_end); 2157 2158 cfile = (struct cifsFileInfo *)file->private_data; 2159 tcon = tlink_tcon(cfile->tlink); 2160 2161 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag, 2162 tcon->ses->server); 2163 cifs_sb = CIFS_FILE_SB(file); 2164 set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags); 2165 2166 if (cap_unix(tcon->ses) && 2167 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 2168 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2169 posix_lck = true; 2170 /* 2171 * BB add code here to normalize offset and length to account for 2172 * negative length which we can not accept over the wire. 2173 */ 2174 if (IS_GETLK(cmd)) { 2175 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid); 2176 free_xid(xid); 2177 return rc; 2178 } 2179 2180 if (!lock && !unlock) { 2181 /* 2182 * if no lock or unlock then nothing to do since we do not 2183 * know what it is 2184 */ 2185 free_xid(xid); 2186 return -EOPNOTSUPP; 2187 } 2188 2189 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock, 2190 xid); 2191 free_xid(xid); 2192 return rc; 2193 } 2194 2195 /* 2196 * update the file size (if needed) after a write. Should be called with 2197 * the inode->i_lock held 2198 */ 2199 void 2200 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, 2201 unsigned int bytes_written) 2202 { 2203 loff_t end_of_write = offset + bytes_written; 2204 2205 if (end_of_write > cifsi->server_eof) 2206 cifsi->server_eof = end_of_write; 2207 } 2208 2209 static ssize_t 2210 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data, 2211 size_t write_size, loff_t *offset) 2212 { 2213 int rc = 0; 2214 unsigned int bytes_written = 0; 2215 unsigned int total_written; 2216 struct cifs_tcon *tcon; 2217 struct TCP_Server_Info *server; 2218 unsigned int xid; 2219 struct dentry *dentry = open_file->dentry; 2220 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry)); 2221 struct cifs_io_parms io_parms = {0}; 2222 2223 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n", 2224 write_size, *offset, dentry); 2225 2226 tcon = tlink_tcon(open_file->tlink); 2227 server = tcon->ses->server; 2228 2229 if (!server->ops->sync_write) 2230 return -ENOSYS; 2231 2232 xid = get_xid(); 2233 2234 for (total_written = 0; write_size > total_written; 2235 total_written += bytes_written) { 2236 rc = -EAGAIN; 2237 while (rc == -EAGAIN) { 2238 struct kvec iov[2]; 2239 unsigned int len; 2240 2241 if (open_file->invalidHandle) { 2242 /* we could deadlock if we called 2243 filemap_fdatawait from here so tell 2244 reopen_file not to flush data to 2245 server now */ 2246 rc = cifs_reopen_file(open_file, false); 2247 if (rc != 0) 2248 break; 2249 } 2250 2251 len = min(server->ops->wp_retry_size(d_inode(dentry)), 2252 (unsigned int)write_size - total_written); 2253 /* iov[0] is reserved for smb header */ 2254 iov[1].iov_base = (char *)write_data + total_written; 2255 iov[1].iov_len = len; 2256 io_parms.pid = pid; 2257 io_parms.tcon = tcon; 2258 io_parms.offset = *offset; 2259 io_parms.length = len; 2260 rc = server->ops->sync_write(xid, &open_file->fid, 2261 &io_parms, &bytes_written, iov, 1); 2262 } 2263 if (rc || (bytes_written == 0)) { 2264 if (total_written) 2265 break; 2266 else { 2267 free_xid(xid); 2268 return rc; 2269 } 2270 } else { 2271 spin_lock(&d_inode(dentry)->i_lock); 2272 cifs_update_eof(cifsi, *offset, bytes_written); 2273 spin_unlock(&d_inode(dentry)->i_lock); 2274 *offset += bytes_written; 2275 } 2276 } 2277 2278 cifs_stats_bytes_written(tcon, total_written); 2279 2280 if (total_written > 0) { 2281 spin_lock(&d_inode(dentry)->i_lock); 2282 if (*offset > d_inode(dentry)->i_size) { 2283 i_size_write(d_inode(dentry), *offset); 2284 d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9; 2285 } 2286 spin_unlock(&d_inode(dentry)->i_lock); 2287 } 2288 mark_inode_dirty_sync(d_inode(dentry)); 2289 free_xid(xid); 2290 return total_written; 2291 } 2292 2293 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, 2294 bool fsuid_only) 2295 { 2296 struct cifsFileInfo *open_file = NULL; 2297 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); 2298 2299 /* only filter by fsuid on multiuser mounts */ 2300 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 2301 fsuid_only = false; 2302 2303 spin_lock(&cifs_inode->open_file_lock); 2304 /* we could simply get the first_list_entry since write-only entries 2305 are always at the end of the list but since the first entry might 2306 have a close pending, we go through the whole list */ 2307 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 2308 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) 2309 continue; 2310 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) { 2311 if ((!open_file->invalidHandle)) { 2312 /* found a good file */ 2313 /* lock it so it will not be closed on us */ 2314 cifsFileInfo_get(open_file); 2315 spin_unlock(&cifs_inode->open_file_lock); 2316 return open_file; 2317 } /* else might as well continue, and look for 2318 another, or simply have the caller reopen it 2319 again rather than trying to fix this handle */ 2320 } else /* write only file */ 2321 break; /* write only files are last so must be done */ 2322 } 2323 spin_unlock(&cifs_inode->open_file_lock); 2324 return NULL; 2325 } 2326 2327 /* Return -EBADF if no handle is found and general rc otherwise */ 2328 int 2329 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, 2330 struct cifsFileInfo **ret_file) 2331 { 2332 struct cifsFileInfo *open_file, *inv_file = NULL; 2333 struct cifs_sb_info *cifs_sb; 2334 bool any_available = false; 2335 int rc = -EBADF; 2336 unsigned int refind = 0; 2337 bool fsuid_only = flags & FIND_WR_FSUID_ONLY; 2338 bool with_delete = flags & FIND_WR_WITH_DELETE; 2339 *ret_file = NULL; 2340 2341 /* 2342 * Having a null inode here (because mapping->host was set to zero by 2343 * the VFS or MM) should not happen but we had reports of on oops (due 2344 * to it being zero) during stress testcases so we need to check for it 2345 */ 2346 2347 if (cifs_inode == NULL) { 2348 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n"); 2349 dump_stack(); 2350 return rc; 2351 } 2352 2353 cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); 2354 2355 /* only filter by fsuid on multiuser mounts */ 2356 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 2357 fsuid_only = false; 2358 2359 spin_lock(&cifs_inode->open_file_lock); 2360 refind_writable: 2361 if (refind > MAX_REOPEN_ATT) { 2362 spin_unlock(&cifs_inode->open_file_lock); 2363 return rc; 2364 } 2365 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 2366 if (!any_available && open_file->pid != current->tgid) 2367 continue; 2368 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) 2369 continue; 2370 if (with_delete && !(open_file->fid.access & DELETE)) 2371 continue; 2372 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { 2373 if (!open_file->invalidHandle) { 2374 /* found a good writable file */ 2375 cifsFileInfo_get(open_file); 2376 spin_unlock(&cifs_inode->open_file_lock); 2377 *ret_file = open_file; 2378 return 0; 2379 } else { 2380 if (!inv_file) 2381 inv_file = open_file; 2382 } 2383 } 2384 } 2385 /* couldn't find useable FH with same pid, try any available */ 2386 if (!any_available) { 2387 any_available = true; 2388 goto refind_writable; 2389 } 2390 2391 if (inv_file) { 2392 any_available = false; 2393 cifsFileInfo_get(inv_file); 2394 } 2395 2396 spin_unlock(&cifs_inode->open_file_lock); 2397 2398 if (inv_file) { 2399 rc = cifs_reopen_file(inv_file, false); 2400 if (!rc) { 2401 *ret_file = inv_file; 2402 return 0; 2403 } 2404 2405 spin_lock(&cifs_inode->open_file_lock); 2406 list_move_tail(&inv_file->flist, &cifs_inode->openFileList); 2407 spin_unlock(&cifs_inode->open_file_lock); 2408 cifsFileInfo_put(inv_file); 2409 ++refind; 2410 inv_file = NULL; 2411 spin_lock(&cifs_inode->open_file_lock); 2412 goto refind_writable; 2413 } 2414 2415 return rc; 2416 } 2417 2418 struct cifsFileInfo * 2419 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags) 2420 { 2421 struct cifsFileInfo *cfile; 2422 int rc; 2423 2424 rc = cifs_get_writable_file(cifs_inode, flags, &cfile); 2425 if (rc) 2426 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc); 2427 2428 return cfile; 2429 } 2430 2431 int 2432 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, 2433 int flags, 2434 struct cifsFileInfo **ret_file) 2435 { 2436 struct cifsFileInfo *cfile; 2437 void *page = alloc_dentry_path(); 2438 2439 *ret_file = NULL; 2440 2441 spin_lock(&tcon->open_file_lock); 2442 list_for_each_entry(cfile, &tcon->openFileList, tlist) { 2443 struct cifsInodeInfo *cinode; 2444 const char *full_path = build_path_from_dentry(cfile->dentry, page); 2445 if (IS_ERR(full_path)) { 2446 spin_unlock(&tcon->open_file_lock); 2447 free_dentry_path(page); 2448 return PTR_ERR(full_path); 2449 } 2450 if (strcmp(full_path, name)) 2451 continue; 2452 2453 cinode = CIFS_I(d_inode(cfile->dentry)); 2454 spin_unlock(&tcon->open_file_lock); 2455 free_dentry_path(page); 2456 return cifs_get_writable_file(cinode, flags, ret_file); 2457 } 2458 2459 spin_unlock(&tcon->open_file_lock); 2460 free_dentry_path(page); 2461 return -ENOENT; 2462 } 2463 2464 int 2465 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, 2466 struct cifsFileInfo **ret_file) 2467 { 2468 struct cifsFileInfo *cfile; 2469 void *page = alloc_dentry_path(); 2470 2471 *ret_file = NULL; 2472 2473 spin_lock(&tcon->open_file_lock); 2474 list_for_each_entry(cfile, &tcon->openFileList, tlist) { 2475 struct cifsInodeInfo *cinode; 2476 const char *full_path = build_path_from_dentry(cfile->dentry, page); 2477 if (IS_ERR(full_path)) { 2478 spin_unlock(&tcon->open_file_lock); 2479 free_dentry_path(page); 2480 return PTR_ERR(full_path); 2481 } 2482 if (strcmp(full_path, name)) 2483 continue; 2484 2485 cinode = CIFS_I(d_inode(cfile->dentry)); 2486 spin_unlock(&tcon->open_file_lock); 2487 free_dentry_path(page); 2488 *ret_file = find_readable_file(cinode, 0); 2489 return *ret_file ? 0 : -ENOENT; 2490 } 2491 2492 spin_unlock(&tcon->open_file_lock); 2493 free_dentry_path(page); 2494 return -ENOENT; 2495 } 2496 2497 void 2498 cifs_writedata_release(struct kref *refcount) 2499 { 2500 struct cifs_writedata *wdata = container_of(refcount, 2501 struct cifs_writedata, refcount); 2502 #ifdef CONFIG_CIFS_SMB_DIRECT 2503 if (wdata->mr) { 2504 smbd_deregister_mr(wdata->mr); 2505 wdata->mr = NULL; 2506 } 2507 #endif 2508 2509 if (wdata->cfile) 2510 cifsFileInfo_put(wdata->cfile); 2511 2512 kfree(wdata); 2513 } 2514 2515 /* 2516 * Write failed with a retryable error. Resend the write request. It's also 2517 * possible that the page was redirtied so re-clean the page. 2518 */ 2519 static void 2520 cifs_writev_requeue(struct cifs_writedata *wdata) 2521 { 2522 int rc = 0; 2523 struct inode *inode = d_inode(wdata->cfile->dentry); 2524 struct TCP_Server_Info *server; 2525 unsigned int rest_len = wdata->bytes; 2526 loff_t fpos = wdata->offset; 2527 2528 server = tlink_tcon(wdata->cfile->tlink)->ses->server; 2529 do { 2530 struct cifs_writedata *wdata2; 2531 unsigned int wsize, cur_len; 2532 2533 wsize = server->ops->wp_retry_size(inode); 2534 if (wsize < rest_len) { 2535 if (wsize < PAGE_SIZE) { 2536 rc = -EOPNOTSUPP; 2537 break; 2538 } 2539 cur_len = min(round_down(wsize, PAGE_SIZE), rest_len); 2540 } else { 2541 cur_len = rest_len; 2542 } 2543 2544 wdata2 = cifs_writedata_alloc(cifs_writev_complete); 2545 if (!wdata2) { 2546 rc = -ENOMEM; 2547 break; 2548 } 2549 2550 wdata2->sync_mode = wdata->sync_mode; 2551 wdata2->offset = fpos; 2552 wdata2->bytes = cur_len; 2553 wdata2->iter = wdata->iter; 2554 2555 iov_iter_advance(&wdata2->iter, fpos - wdata->offset); 2556 iov_iter_truncate(&wdata2->iter, wdata2->bytes); 2557 2558 if (iov_iter_is_xarray(&wdata2->iter)) 2559 /* Check for pages having been redirtied and clean 2560 * them. We can do this by walking the xarray. If 2561 * it's not an xarray, then it's a DIO and we shouldn't 2562 * be mucking around with the page bits. 2563 */ 2564 cifs_undirty_folios(inode, fpos, cur_len); 2565 2566 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, 2567 &wdata2->cfile); 2568 if (!wdata2->cfile) { 2569 cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n", 2570 rc); 2571 if (!is_retryable_error(rc)) 2572 rc = -EBADF; 2573 } else { 2574 wdata2->pid = wdata2->cfile->pid; 2575 rc = server->ops->async_writev(wdata2, 2576 cifs_writedata_release); 2577 } 2578 2579 kref_put(&wdata2->refcount, cifs_writedata_release); 2580 if (rc) { 2581 if (is_retryable_error(rc)) 2582 continue; 2583 fpos += cur_len; 2584 rest_len -= cur_len; 2585 break; 2586 } 2587 2588 fpos += cur_len; 2589 rest_len -= cur_len; 2590 } while (rest_len > 0); 2591 2592 /* Clean up remaining pages from the original wdata */ 2593 if (iov_iter_is_xarray(&wdata->iter)) 2594 cifs_pages_write_failed(inode, fpos, rest_len); 2595 2596 if (rc != 0 && !is_retryable_error(rc)) 2597 mapping_set_error(inode->i_mapping, rc); 2598 kref_put(&wdata->refcount, cifs_writedata_release); 2599 } 2600 2601 void 2602 cifs_writev_complete(struct work_struct *work) 2603 { 2604 struct cifs_writedata *wdata = container_of(work, 2605 struct cifs_writedata, work); 2606 struct inode *inode = d_inode(wdata->cfile->dentry); 2607 2608 if (wdata->result == 0) { 2609 spin_lock(&inode->i_lock); 2610 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes); 2611 spin_unlock(&inode->i_lock); 2612 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink), 2613 wdata->bytes); 2614 } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN) 2615 return cifs_writev_requeue(wdata); 2616 2617 if (wdata->result == -EAGAIN) 2618 cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes); 2619 else if (wdata->result < 0) 2620 cifs_pages_write_failed(inode, wdata->offset, wdata->bytes); 2621 else 2622 cifs_pages_written_back(inode, wdata->offset, wdata->bytes); 2623 2624 if (wdata->result != -EAGAIN) 2625 mapping_set_error(inode->i_mapping, wdata->result); 2626 kref_put(&wdata->refcount, cifs_writedata_release); 2627 } 2628 2629 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete) 2630 { 2631 struct cifs_writedata *wdata; 2632 2633 wdata = kzalloc(sizeof(*wdata), GFP_NOFS); 2634 if (wdata != NULL) { 2635 kref_init(&wdata->refcount); 2636 INIT_LIST_HEAD(&wdata->list); 2637 init_completion(&wdata->done); 2638 INIT_WORK(&wdata->work, complete); 2639 } 2640 return wdata; 2641 } 2642 2643 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) 2644 { 2645 struct address_space *mapping = page->mapping; 2646 loff_t offset = (loff_t)page->index << PAGE_SHIFT; 2647 char *write_data; 2648 int rc = -EFAULT; 2649 int bytes_written = 0; 2650 struct inode *inode; 2651 struct cifsFileInfo *open_file; 2652 2653 if (!mapping || !mapping->host) 2654 return -EFAULT; 2655 2656 inode = page->mapping->host; 2657 2658 offset += (loff_t)from; 2659 write_data = kmap(page); 2660 write_data += from; 2661 2662 if ((to > PAGE_SIZE) || (from > to)) { 2663 kunmap(page); 2664 return -EIO; 2665 } 2666 2667 /* racing with truncate? */ 2668 if (offset > mapping->host->i_size) { 2669 kunmap(page); 2670 return 0; /* don't care */ 2671 } 2672 2673 /* check to make sure that we are not extending the file */ 2674 if (mapping->host->i_size - offset < (loff_t)to) 2675 to = (unsigned)(mapping->host->i_size - offset); 2676 2677 rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY, 2678 &open_file); 2679 if (!rc) { 2680 bytes_written = cifs_write(open_file, open_file->pid, 2681 write_data, to - from, &offset); 2682 cifsFileInfo_put(open_file); 2683 /* Does mm or vfs already set times? */ 2684 simple_inode_init_ts(inode); 2685 if ((bytes_written > 0) && (offset)) 2686 rc = 0; 2687 else if (bytes_written < 0) 2688 rc = bytes_written; 2689 else 2690 rc = -EFAULT; 2691 } else { 2692 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc); 2693 if (!is_retryable_error(rc)) 2694 rc = -EIO; 2695 } 2696 2697 kunmap(page); 2698 return rc; 2699 } 2700 2701 /* 2702 * Extend the region to be written back to include subsequent contiguously 2703 * dirty pages if possible, but don't sleep while doing so. 2704 */ 2705 static void cifs_extend_writeback(struct address_space *mapping, 2706 struct xa_state *xas, 2707 long *_count, 2708 loff_t start, 2709 int max_pages, 2710 loff_t max_len, 2711 size_t *_len) 2712 { 2713 struct folio_batch batch; 2714 struct folio *folio; 2715 unsigned int nr_pages; 2716 pgoff_t index = (start + *_len) / PAGE_SIZE; 2717 size_t len; 2718 bool stop = true; 2719 unsigned int i; 2720 2721 folio_batch_init(&batch); 2722 2723 do { 2724 /* Firstly, we gather up a batch of contiguous dirty pages 2725 * under the RCU read lock - but we can't clear the dirty flags 2726 * there if any of those pages are mapped. 2727 */ 2728 rcu_read_lock(); 2729 2730 xas_for_each(xas, folio, ULONG_MAX) { 2731 stop = true; 2732 if (xas_retry(xas, folio)) 2733 continue; 2734 if (xa_is_value(folio)) 2735 break; 2736 if (folio->index != index) { 2737 xas_reset(xas); 2738 break; 2739 } 2740 2741 if (!folio_try_get_rcu(folio)) { 2742 xas_reset(xas); 2743 continue; 2744 } 2745 nr_pages = folio_nr_pages(folio); 2746 if (nr_pages > max_pages) { 2747 xas_reset(xas); 2748 break; 2749 } 2750 2751 /* Has the page moved or been split? */ 2752 if (unlikely(folio != xas_reload(xas))) { 2753 folio_put(folio); 2754 xas_reset(xas); 2755 break; 2756 } 2757 2758 if (!folio_trylock(folio)) { 2759 folio_put(folio); 2760 xas_reset(xas); 2761 break; 2762 } 2763 if (!folio_test_dirty(folio) || 2764 folio_test_writeback(folio)) { 2765 folio_unlock(folio); 2766 folio_put(folio); 2767 xas_reset(xas); 2768 break; 2769 } 2770 2771 max_pages -= nr_pages; 2772 len = folio_size(folio); 2773 stop = false; 2774 2775 index += nr_pages; 2776 *_count -= nr_pages; 2777 *_len += len; 2778 if (max_pages <= 0 || *_len >= max_len || *_count <= 0) 2779 stop = true; 2780 2781 if (!folio_batch_add(&batch, folio)) 2782 break; 2783 if (stop) 2784 break; 2785 } 2786 2787 xas_pause(xas); 2788 rcu_read_unlock(); 2789 2790 /* Now, if we obtained any pages, we can shift them to being 2791 * writable and mark them for caching. 2792 */ 2793 if (!folio_batch_count(&batch)) 2794 break; 2795 2796 for (i = 0; i < folio_batch_count(&batch); i++) { 2797 folio = batch.folios[i]; 2798 /* The folio should be locked, dirty and not undergoing 2799 * writeback from the loop above. 2800 */ 2801 if (!folio_clear_dirty_for_io(folio)) 2802 WARN_ON(1); 2803 folio_start_writeback(folio); 2804 folio_unlock(folio); 2805 } 2806 2807 folio_batch_release(&batch); 2808 cond_resched(); 2809 } while (!stop); 2810 } 2811 2812 /* 2813 * Write back the locked page and any subsequent non-locked dirty pages. 2814 */ 2815 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping, 2816 struct writeback_control *wbc, 2817 struct xa_state *xas, 2818 struct folio *folio, 2819 unsigned long long start, 2820 unsigned long long end) 2821 { 2822 struct inode *inode = mapping->host; 2823 struct TCP_Server_Info *server; 2824 struct cifs_writedata *wdata; 2825 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 2826 struct cifs_credits credits_on_stack; 2827 struct cifs_credits *credits = &credits_on_stack; 2828 struct cifsFileInfo *cfile = NULL; 2829 unsigned long long i_size = i_size_read(inode), max_len; 2830 unsigned int xid, wsize; 2831 size_t len = folio_size(folio); 2832 long count = wbc->nr_to_write; 2833 int rc; 2834 2835 /* The folio should be locked, dirty and not undergoing writeback. */ 2836 if (!folio_clear_dirty_for_io(folio)) 2837 WARN_ON_ONCE(1); 2838 folio_start_writeback(folio); 2839 2840 count -= folio_nr_pages(folio); 2841 2842 xid = get_xid(); 2843 server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses); 2844 2845 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile); 2846 if (rc) { 2847 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc); 2848 goto err_xid; 2849 } 2850 2851 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, 2852 &wsize, credits); 2853 if (rc != 0) 2854 goto err_close; 2855 2856 wdata = cifs_writedata_alloc(cifs_writev_complete); 2857 if (!wdata) { 2858 rc = -ENOMEM; 2859 goto err_uncredit; 2860 } 2861 2862 wdata->sync_mode = wbc->sync_mode; 2863 wdata->offset = folio_pos(folio); 2864 wdata->pid = cfile->pid; 2865 wdata->credits = credits_on_stack; 2866 wdata->cfile = cfile; 2867 wdata->server = server; 2868 cfile = NULL; 2869 2870 /* Find all consecutive lockable dirty pages that have contiguous 2871 * written regions, stopping when we find a page that is not 2872 * immediately lockable, is not dirty or is missing, or we reach the 2873 * end of the range. 2874 */ 2875 if (start < i_size) { 2876 /* Trim the write to the EOF; the extra data is ignored. Also 2877 * put an upper limit on the size of a single storedata op. 2878 */ 2879 max_len = wsize; 2880 max_len = min_t(unsigned long long, max_len, end - start + 1); 2881 max_len = min_t(unsigned long long, max_len, i_size - start); 2882 2883 if (len < max_len) { 2884 int max_pages = INT_MAX; 2885 2886 #ifdef CONFIG_CIFS_SMB_DIRECT 2887 if (server->smbd_conn) 2888 max_pages = server->smbd_conn->max_frmr_depth; 2889 #endif 2890 max_pages -= folio_nr_pages(folio); 2891 2892 if (max_pages > 0) 2893 cifs_extend_writeback(mapping, xas, &count, start, 2894 max_pages, max_len, &len); 2895 } 2896 } 2897 len = min_t(unsigned long long, len, i_size - start); 2898 2899 /* We now have a contiguous set of dirty pages, each with writeback 2900 * set; the first page is still locked at this point, but all the rest 2901 * have been unlocked. 2902 */ 2903 folio_unlock(folio); 2904 wdata->bytes = len; 2905 2906 if (start < i_size) { 2907 iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages, 2908 start, len); 2909 2910 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes); 2911 if (rc) 2912 goto err_wdata; 2913 2914 if (wdata->cfile->invalidHandle) 2915 rc = -EAGAIN; 2916 else 2917 rc = wdata->server->ops->async_writev(wdata, 2918 cifs_writedata_release); 2919 if (rc >= 0) { 2920 kref_put(&wdata->refcount, cifs_writedata_release); 2921 goto err_close; 2922 } 2923 } else { 2924 /* The dirty region was entirely beyond the EOF. */ 2925 cifs_pages_written_back(inode, start, len); 2926 rc = 0; 2927 } 2928 2929 err_wdata: 2930 kref_put(&wdata->refcount, cifs_writedata_release); 2931 err_uncredit: 2932 add_credits_and_wake_if(server, credits, 0); 2933 err_close: 2934 if (cfile) 2935 cifsFileInfo_put(cfile); 2936 err_xid: 2937 free_xid(xid); 2938 if (rc == 0) { 2939 wbc->nr_to_write = count; 2940 rc = len; 2941 } else if (is_retryable_error(rc)) { 2942 cifs_pages_write_redirty(inode, start, len); 2943 } else { 2944 cifs_pages_write_failed(inode, start, len); 2945 mapping_set_error(mapping, rc); 2946 } 2947 /* Indication to update ctime and mtime as close is deferred */ 2948 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 2949 return rc; 2950 } 2951 2952 /* 2953 * write a region of pages back to the server 2954 */ 2955 static ssize_t cifs_writepages_begin(struct address_space *mapping, 2956 struct writeback_control *wbc, 2957 struct xa_state *xas, 2958 unsigned long long *_start, 2959 unsigned long long end) 2960 { 2961 struct folio *folio; 2962 unsigned long long start = *_start; 2963 ssize_t ret; 2964 int skips = 0; 2965 2966 search_again: 2967 /* Find the first dirty page. */ 2968 rcu_read_lock(); 2969 2970 for (;;) { 2971 folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY); 2972 if (xas_retry(xas, folio) || xa_is_value(folio)) 2973 continue; 2974 if (!folio) 2975 break; 2976 2977 if (!folio_try_get_rcu(folio)) { 2978 xas_reset(xas); 2979 continue; 2980 } 2981 2982 if (unlikely(folio != xas_reload(xas))) { 2983 folio_put(folio); 2984 xas_reset(xas); 2985 continue; 2986 } 2987 2988 xas_pause(xas); 2989 break; 2990 } 2991 rcu_read_unlock(); 2992 if (!folio) 2993 return 0; 2994 2995 start = folio_pos(folio); /* May regress with THPs */ 2996 2997 /* At this point we hold neither the i_pages lock nor the page lock: 2998 * the page may be truncated or invalidated (changing page->mapping to 2999 * NULL), or even swizzled back from swapper_space to tmpfs file 3000 * mapping 3001 */ 3002 lock_again: 3003 if (wbc->sync_mode != WB_SYNC_NONE) { 3004 ret = folio_lock_killable(folio); 3005 if (ret < 0) 3006 return ret; 3007 } else { 3008 if (!folio_trylock(folio)) 3009 goto search_again; 3010 } 3011 3012 if (folio->mapping != mapping || 3013 !folio_test_dirty(folio)) { 3014 start += folio_size(folio); 3015 folio_unlock(folio); 3016 goto search_again; 3017 } 3018 3019 if (folio_test_writeback(folio) || 3020 folio_test_fscache(folio)) { 3021 folio_unlock(folio); 3022 if (wbc->sync_mode != WB_SYNC_NONE) { 3023 folio_wait_writeback(folio); 3024 #ifdef CONFIG_CIFS_FSCACHE 3025 folio_wait_fscache(folio); 3026 #endif 3027 goto lock_again; 3028 } 3029 3030 start += folio_size(folio); 3031 if (wbc->sync_mode == WB_SYNC_NONE) { 3032 if (skips >= 5 || need_resched()) { 3033 ret = 0; 3034 goto out; 3035 } 3036 skips++; 3037 } 3038 goto search_again; 3039 } 3040 3041 ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end); 3042 out: 3043 if (ret > 0) 3044 *_start = start + ret; 3045 return ret; 3046 } 3047 3048 /* 3049 * Write a region of pages back to the server 3050 */ 3051 static int cifs_writepages_region(struct address_space *mapping, 3052 struct writeback_control *wbc, 3053 unsigned long long *_start, 3054 unsigned long long end) 3055 { 3056 ssize_t ret; 3057 3058 XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE); 3059 3060 do { 3061 ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end); 3062 if (ret > 0 && wbc->nr_to_write > 0) 3063 cond_resched(); 3064 } while (ret > 0 && wbc->nr_to_write > 0); 3065 3066 return ret > 0 ? 0 : ret; 3067 } 3068 3069 /* 3070 * Write some of the pending data back to the server 3071 */ 3072 static int cifs_writepages(struct address_space *mapping, 3073 struct writeback_control *wbc) 3074 { 3075 loff_t start, end; 3076 int ret; 3077 3078 /* We have to be careful as we can end up racing with setattr() 3079 * truncating the pagecache since the caller doesn't take a lock here 3080 * to prevent it. 3081 */ 3082 3083 if (wbc->range_cyclic && mapping->writeback_index) { 3084 start = mapping->writeback_index * PAGE_SIZE; 3085 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX); 3086 if (ret < 0) 3087 goto out; 3088 3089 if (wbc->nr_to_write <= 0) { 3090 mapping->writeback_index = start / PAGE_SIZE; 3091 goto out; 3092 } 3093 3094 start = 0; 3095 end = mapping->writeback_index * PAGE_SIZE; 3096 mapping->writeback_index = 0; 3097 ret = cifs_writepages_region(mapping, wbc, &start, end); 3098 if (ret == 0) 3099 mapping->writeback_index = start / PAGE_SIZE; 3100 } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { 3101 start = 0; 3102 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX); 3103 if (wbc->nr_to_write > 0 && ret == 0) 3104 mapping->writeback_index = start / PAGE_SIZE; 3105 } else { 3106 start = wbc->range_start; 3107 ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end); 3108 } 3109 3110 out: 3111 return ret; 3112 } 3113 3114 static int 3115 cifs_writepage_locked(struct page *page, struct writeback_control *wbc) 3116 { 3117 int rc; 3118 unsigned int xid; 3119 3120 xid = get_xid(); 3121 /* BB add check for wbc flags */ 3122 get_page(page); 3123 if (!PageUptodate(page)) 3124 cifs_dbg(FYI, "ppw - page not up to date\n"); 3125 3126 /* 3127 * Set the "writeback" flag, and clear "dirty" in the radix tree. 3128 * 3129 * A writepage() implementation always needs to do either this, 3130 * or re-dirty the page with "redirty_page_for_writepage()" in 3131 * the case of a failure. 3132 * 3133 * Just unlocking the page will cause the radix tree tag-bits 3134 * to fail to update with the state of the page correctly. 3135 */ 3136 set_page_writeback(page); 3137 retry_write: 3138 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE); 3139 if (is_retryable_error(rc)) { 3140 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) 3141 goto retry_write; 3142 redirty_page_for_writepage(wbc, page); 3143 } else if (rc != 0) { 3144 SetPageError(page); 3145 mapping_set_error(page->mapping, rc); 3146 } else { 3147 SetPageUptodate(page); 3148 } 3149 end_page_writeback(page); 3150 put_page(page); 3151 free_xid(xid); 3152 return rc; 3153 } 3154 3155 static int cifs_write_end(struct file *file, struct address_space *mapping, 3156 loff_t pos, unsigned len, unsigned copied, 3157 struct page *page, void *fsdata) 3158 { 3159 int rc; 3160 struct inode *inode = mapping->host; 3161 struct cifsFileInfo *cfile = file->private_data; 3162 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 3163 struct folio *folio = page_folio(page); 3164 __u32 pid; 3165 3166 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3167 pid = cfile->pid; 3168 else 3169 pid = current->tgid; 3170 3171 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n", 3172 page, pos, copied); 3173 3174 if (folio_test_checked(folio)) { 3175 if (copied == len) 3176 folio_mark_uptodate(folio); 3177 folio_clear_checked(folio); 3178 } else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE) 3179 folio_mark_uptodate(folio); 3180 3181 if (!folio_test_uptodate(folio)) { 3182 char *page_data; 3183 unsigned offset = pos & (PAGE_SIZE - 1); 3184 unsigned int xid; 3185 3186 xid = get_xid(); 3187 /* this is probably better than directly calling 3188 partialpage_write since in this function the file handle is 3189 known which we might as well leverage */ 3190 /* BB check if anything else missing out of ppw 3191 such as updating last write time */ 3192 page_data = kmap(page); 3193 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos); 3194 /* if (rc < 0) should we set writebehind rc? */ 3195 kunmap(page); 3196 3197 free_xid(xid); 3198 } else { 3199 rc = copied; 3200 pos += copied; 3201 set_page_dirty(page); 3202 } 3203 3204 if (rc > 0) { 3205 spin_lock(&inode->i_lock); 3206 if (pos > inode->i_size) { 3207 loff_t additional_blocks = (512 - 1 + copied) >> 9; 3208 3209 i_size_write(inode, pos); 3210 /* 3211 * Estimate new allocation size based on the amount written. 3212 * This will be updated from server on close (and on queryinfo) 3213 */ 3214 inode->i_blocks = min_t(blkcnt_t, (512 - 1 + pos) >> 9, 3215 inode->i_blocks + additional_blocks); 3216 } 3217 spin_unlock(&inode->i_lock); 3218 } 3219 3220 unlock_page(page); 3221 put_page(page); 3222 /* Indication to update ctime and mtime as close is deferred */ 3223 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 3224 3225 return rc; 3226 } 3227 3228 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, 3229 int datasync) 3230 { 3231 unsigned int xid; 3232 int rc = 0; 3233 struct cifs_tcon *tcon; 3234 struct TCP_Server_Info *server; 3235 struct cifsFileInfo *smbfile = file->private_data; 3236 struct inode *inode = file_inode(file); 3237 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 3238 3239 rc = file_write_and_wait_range(file, start, end); 3240 if (rc) { 3241 trace_cifs_fsync_err(inode->i_ino, rc); 3242 return rc; 3243 } 3244 3245 xid = get_xid(); 3246 3247 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", 3248 file, datasync); 3249 3250 if (!CIFS_CACHE_READ(CIFS_I(inode))) { 3251 rc = cifs_zap_mapping(inode); 3252 if (rc) { 3253 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc); 3254 rc = 0; /* don't care about it in fsync */ 3255 } 3256 } 3257 3258 tcon = tlink_tcon(smbfile->tlink); 3259 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { 3260 server = tcon->ses->server; 3261 if (server->ops->flush == NULL) { 3262 rc = -ENOSYS; 3263 goto strict_fsync_exit; 3264 } 3265 3266 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { 3267 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); 3268 if (smbfile) { 3269 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3270 cifsFileInfo_put(smbfile); 3271 } else 3272 cifs_dbg(FYI, "ignore fsync for file not open for write\n"); 3273 } else 3274 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3275 } 3276 3277 strict_fsync_exit: 3278 free_xid(xid); 3279 return rc; 3280 } 3281 3282 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) 3283 { 3284 unsigned int xid; 3285 int rc = 0; 3286 struct cifs_tcon *tcon; 3287 struct TCP_Server_Info *server; 3288 struct cifsFileInfo *smbfile = file->private_data; 3289 struct inode *inode = file_inode(file); 3290 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); 3291 3292 rc = file_write_and_wait_range(file, start, end); 3293 if (rc) { 3294 trace_cifs_fsync_err(file_inode(file)->i_ino, rc); 3295 return rc; 3296 } 3297 3298 xid = get_xid(); 3299 3300 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", 3301 file, datasync); 3302 3303 tcon = tlink_tcon(smbfile->tlink); 3304 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { 3305 server = tcon->ses->server; 3306 if (server->ops->flush == NULL) { 3307 rc = -ENOSYS; 3308 goto fsync_exit; 3309 } 3310 3311 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { 3312 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); 3313 if (smbfile) { 3314 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3315 cifsFileInfo_put(smbfile); 3316 } else 3317 cifs_dbg(FYI, "ignore fsync for file not open for write\n"); 3318 } else 3319 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3320 } 3321 3322 fsync_exit: 3323 free_xid(xid); 3324 return rc; 3325 } 3326 3327 /* 3328 * As file closes, flush all cached write data for this inode checking 3329 * for write behind errors. 3330 */ 3331 int cifs_flush(struct file *file, fl_owner_t id) 3332 { 3333 struct inode *inode = file_inode(file); 3334 int rc = 0; 3335 3336 if (file->f_mode & FMODE_WRITE) 3337 rc = filemap_write_and_wait(inode->i_mapping); 3338 3339 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc); 3340 if (rc) { 3341 /* get more nuanced writeback errors */ 3342 rc = filemap_check_wb_err(file->f_mapping, 0); 3343 trace_cifs_flush_err(inode->i_ino, rc); 3344 } 3345 return rc; 3346 } 3347 3348 static void 3349 cifs_uncached_writedata_release(struct kref *refcount) 3350 { 3351 struct cifs_writedata *wdata = container_of(refcount, 3352 struct cifs_writedata, refcount); 3353 3354 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release); 3355 cifs_writedata_release(refcount); 3356 } 3357 3358 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx); 3359 3360 static void 3361 cifs_uncached_writev_complete(struct work_struct *work) 3362 { 3363 struct cifs_writedata *wdata = container_of(work, 3364 struct cifs_writedata, work); 3365 struct inode *inode = d_inode(wdata->cfile->dentry); 3366 struct cifsInodeInfo *cifsi = CIFS_I(inode); 3367 3368 spin_lock(&inode->i_lock); 3369 cifs_update_eof(cifsi, wdata->offset, wdata->bytes); 3370 if (cifsi->server_eof > inode->i_size) 3371 i_size_write(inode, cifsi->server_eof); 3372 spin_unlock(&inode->i_lock); 3373 3374 complete(&wdata->done); 3375 collect_uncached_write_data(wdata->ctx); 3376 /* the below call can possibly free the last ref to aio ctx */ 3377 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3378 } 3379 3380 static int 3381 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list, 3382 struct cifs_aio_ctx *ctx) 3383 { 3384 unsigned int wsize; 3385 struct cifs_credits credits; 3386 int rc; 3387 struct TCP_Server_Info *server = wdata->server; 3388 3389 do { 3390 if (wdata->cfile->invalidHandle) { 3391 rc = cifs_reopen_file(wdata->cfile, false); 3392 if (rc == -EAGAIN) 3393 continue; 3394 else if (rc) 3395 break; 3396 } 3397 3398 3399 /* 3400 * Wait for credits to resend this wdata. 3401 * Note: we are attempting to resend the whole wdata not in 3402 * segments 3403 */ 3404 do { 3405 rc = server->ops->wait_mtu_credits(server, wdata->bytes, 3406 &wsize, &credits); 3407 if (rc) 3408 goto fail; 3409 3410 if (wsize < wdata->bytes) { 3411 add_credits_and_wake_if(server, &credits, 0); 3412 msleep(1000); 3413 } 3414 } while (wsize < wdata->bytes); 3415 wdata->credits = credits; 3416 3417 rc = adjust_credits(server, &wdata->credits, wdata->bytes); 3418 3419 if (!rc) { 3420 if (wdata->cfile->invalidHandle) 3421 rc = -EAGAIN; 3422 else { 3423 wdata->replay = true; 3424 #ifdef CONFIG_CIFS_SMB_DIRECT 3425 if (wdata->mr) { 3426 wdata->mr->need_invalidate = true; 3427 smbd_deregister_mr(wdata->mr); 3428 wdata->mr = NULL; 3429 } 3430 #endif 3431 rc = server->ops->async_writev(wdata, 3432 cifs_uncached_writedata_release); 3433 } 3434 } 3435 3436 /* If the write was successfully sent, we are done */ 3437 if (!rc) { 3438 list_add_tail(&wdata->list, wdata_list); 3439 return 0; 3440 } 3441 3442 /* Roll back credits and retry if needed */ 3443 add_credits_and_wake_if(server, &wdata->credits, 0); 3444 } while (rc == -EAGAIN); 3445 3446 fail: 3447 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3448 return rc; 3449 } 3450 3451 /* 3452 * Select span of a bvec iterator we're going to use. Limit it by both maximum 3453 * size and maximum number of segments. 3454 */ 3455 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size, 3456 size_t max_segs, unsigned int *_nsegs) 3457 { 3458 const struct bio_vec *bvecs = iter->bvec; 3459 unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0; 3460 size_t len, span = 0, n = iter->count; 3461 size_t skip = iter->iov_offset; 3462 3463 if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0) 3464 return 0; 3465 3466 while (n && ix < nbv && skip) { 3467 len = bvecs[ix].bv_len; 3468 if (skip < len) 3469 break; 3470 skip -= len; 3471 n -= len; 3472 ix++; 3473 } 3474 3475 while (n && ix < nbv) { 3476 len = min3(n, bvecs[ix].bv_len - skip, max_size); 3477 span += len; 3478 max_size -= len; 3479 nsegs++; 3480 ix++; 3481 if (max_size == 0 || nsegs >= max_segs) 3482 break; 3483 skip = 0; 3484 n -= len; 3485 } 3486 3487 *_nsegs = nsegs; 3488 return span; 3489 } 3490 3491 static int 3492 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from, 3493 struct cifsFileInfo *open_file, 3494 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list, 3495 struct cifs_aio_ctx *ctx) 3496 { 3497 int rc = 0; 3498 size_t cur_len, max_len; 3499 struct cifs_writedata *wdata; 3500 pid_t pid; 3501 struct TCP_Server_Info *server; 3502 unsigned int xid, max_segs = INT_MAX; 3503 3504 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3505 pid = open_file->pid; 3506 else 3507 pid = current->tgid; 3508 3509 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 3510 xid = get_xid(); 3511 3512 #ifdef CONFIG_CIFS_SMB_DIRECT 3513 if (server->smbd_conn) 3514 max_segs = server->smbd_conn->max_frmr_depth; 3515 #endif 3516 3517 do { 3518 struct cifs_credits credits_on_stack; 3519 struct cifs_credits *credits = &credits_on_stack; 3520 unsigned int wsize, nsegs = 0; 3521 3522 if (signal_pending(current)) { 3523 rc = -EINTR; 3524 break; 3525 } 3526 3527 if (open_file->invalidHandle) { 3528 rc = cifs_reopen_file(open_file, false); 3529 if (rc == -EAGAIN) 3530 continue; 3531 else if (rc) 3532 break; 3533 } 3534 3535 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, 3536 &wsize, credits); 3537 if (rc) 3538 break; 3539 3540 max_len = min_t(const size_t, len, wsize); 3541 if (!max_len) { 3542 rc = -EAGAIN; 3543 add_credits_and_wake_if(server, credits, 0); 3544 break; 3545 } 3546 3547 cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs); 3548 cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n", 3549 cur_len, max_len, nsegs, from->nr_segs, max_segs); 3550 if (cur_len == 0) { 3551 rc = -EIO; 3552 add_credits_and_wake_if(server, credits, 0); 3553 break; 3554 } 3555 3556 wdata = cifs_writedata_alloc(cifs_uncached_writev_complete); 3557 if (!wdata) { 3558 rc = -ENOMEM; 3559 add_credits_and_wake_if(server, credits, 0); 3560 break; 3561 } 3562 3563 wdata->sync_mode = WB_SYNC_ALL; 3564 wdata->offset = (__u64)fpos; 3565 wdata->cfile = cifsFileInfo_get(open_file); 3566 wdata->server = server; 3567 wdata->pid = pid; 3568 wdata->bytes = cur_len; 3569 wdata->credits = credits_on_stack; 3570 wdata->iter = *from; 3571 wdata->ctx = ctx; 3572 kref_get(&ctx->refcount); 3573 3574 iov_iter_truncate(&wdata->iter, cur_len); 3575 3576 rc = adjust_credits(server, &wdata->credits, wdata->bytes); 3577 3578 if (!rc) { 3579 if (wdata->cfile->invalidHandle) 3580 rc = -EAGAIN; 3581 else 3582 rc = server->ops->async_writev(wdata, 3583 cifs_uncached_writedata_release); 3584 } 3585 3586 if (rc) { 3587 add_credits_and_wake_if(server, &wdata->credits, 0); 3588 kref_put(&wdata->refcount, 3589 cifs_uncached_writedata_release); 3590 if (rc == -EAGAIN) 3591 continue; 3592 break; 3593 } 3594 3595 list_add_tail(&wdata->list, wdata_list); 3596 iov_iter_advance(from, cur_len); 3597 fpos += cur_len; 3598 len -= cur_len; 3599 } while (len > 0); 3600 3601 free_xid(xid); 3602 return rc; 3603 } 3604 3605 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx) 3606 { 3607 struct cifs_writedata *wdata, *tmp; 3608 struct cifs_tcon *tcon; 3609 struct cifs_sb_info *cifs_sb; 3610 struct dentry *dentry = ctx->cfile->dentry; 3611 ssize_t rc; 3612 3613 tcon = tlink_tcon(ctx->cfile->tlink); 3614 cifs_sb = CIFS_SB(dentry->d_sb); 3615 3616 mutex_lock(&ctx->aio_mutex); 3617 3618 if (list_empty(&ctx->list)) { 3619 mutex_unlock(&ctx->aio_mutex); 3620 return; 3621 } 3622 3623 rc = ctx->rc; 3624 /* 3625 * Wait for and collect replies for any successful sends in order of 3626 * increasing offset. Once an error is hit, then return without waiting 3627 * for any more replies. 3628 */ 3629 restart_loop: 3630 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) { 3631 if (!rc) { 3632 if (!try_wait_for_completion(&wdata->done)) { 3633 mutex_unlock(&ctx->aio_mutex); 3634 return; 3635 } 3636 3637 if (wdata->result) 3638 rc = wdata->result; 3639 else 3640 ctx->total_len += wdata->bytes; 3641 3642 /* resend call if it's a retryable error */ 3643 if (rc == -EAGAIN) { 3644 struct list_head tmp_list; 3645 struct iov_iter tmp_from = ctx->iter; 3646 3647 INIT_LIST_HEAD(&tmp_list); 3648 list_del_init(&wdata->list); 3649 3650 if (ctx->direct_io) 3651 rc = cifs_resend_wdata( 3652 wdata, &tmp_list, ctx); 3653 else { 3654 iov_iter_advance(&tmp_from, 3655 wdata->offset - ctx->pos); 3656 3657 rc = cifs_write_from_iter(wdata->offset, 3658 wdata->bytes, &tmp_from, 3659 ctx->cfile, cifs_sb, &tmp_list, 3660 ctx); 3661 3662 kref_put(&wdata->refcount, 3663 cifs_uncached_writedata_release); 3664 } 3665 3666 list_splice(&tmp_list, &ctx->list); 3667 goto restart_loop; 3668 } 3669 } 3670 list_del_init(&wdata->list); 3671 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3672 } 3673 3674 cifs_stats_bytes_written(tcon, ctx->total_len); 3675 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags); 3676 3677 ctx->rc = (rc == 0) ? ctx->total_len : rc; 3678 3679 mutex_unlock(&ctx->aio_mutex); 3680 3681 if (ctx->iocb && ctx->iocb->ki_complete) 3682 ctx->iocb->ki_complete(ctx->iocb, ctx->rc); 3683 else 3684 complete(&ctx->done); 3685 } 3686 3687 static ssize_t __cifs_writev( 3688 struct kiocb *iocb, struct iov_iter *from, bool direct) 3689 { 3690 struct file *file = iocb->ki_filp; 3691 ssize_t total_written = 0; 3692 struct cifsFileInfo *cfile; 3693 struct cifs_tcon *tcon; 3694 struct cifs_sb_info *cifs_sb; 3695 struct cifs_aio_ctx *ctx; 3696 int rc; 3697 3698 rc = generic_write_checks(iocb, from); 3699 if (rc <= 0) 3700 return rc; 3701 3702 cifs_sb = CIFS_FILE_SB(file); 3703 cfile = file->private_data; 3704 tcon = tlink_tcon(cfile->tlink); 3705 3706 if (!tcon->ses->server->ops->async_writev) 3707 return -ENOSYS; 3708 3709 ctx = cifs_aio_ctx_alloc(); 3710 if (!ctx) 3711 return -ENOMEM; 3712 3713 ctx->cfile = cifsFileInfo_get(cfile); 3714 3715 if (!is_sync_kiocb(iocb)) 3716 ctx->iocb = iocb; 3717 3718 ctx->pos = iocb->ki_pos; 3719 ctx->direct_io = direct; 3720 ctx->nr_pinned_pages = 0; 3721 3722 if (user_backed_iter(from)) { 3723 /* 3724 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as 3725 * they contain references to the calling process's virtual 3726 * memory layout which won't be available in an async worker 3727 * thread. This also takes a pin on every folio involved. 3728 */ 3729 rc = netfs_extract_user_iter(from, iov_iter_count(from), 3730 &ctx->iter, 0); 3731 if (rc < 0) { 3732 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3733 return rc; 3734 } 3735 3736 ctx->nr_pinned_pages = rc; 3737 ctx->bv = (void *)ctx->iter.bvec; 3738 ctx->bv_need_unpin = iov_iter_extract_will_pin(from); 3739 } else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) && 3740 !is_sync_kiocb(iocb)) { 3741 /* 3742 * If the op is asynchronous, we need to copy the list attached 3743 * to a BVEC/KVEC-type iterator, but we assume that the storage 3744 * will be pinned by the caller; in any case, we may or may not 3745 * be able to pin the pages, so we don't try. 3746 */ 3747 ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL); 3748 if (!ctx->bv) { 3749 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3750 return -ENOMEM; 3751 } 3752 } else { 3753 /* 3754 * Otherwise, we just pass the iterator down as-is and rely on 3755 * the caller to make sure the pages referred to by the 3756 * iterator don't evaporate. 3757 */ 3758 ctx->iter = *from; 3759 } 3760 3761 ctx->len = iov_iter_count(&ctx->iter); 3762 3763 /* grab a lock here due to read response handlers can access ctx */ 3764 mutex_lock(&ctx->aio_mutex); 3765 3766 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter, 3767 cfile, cifs_sb, &ctx->list, ctx); 3768 3769 /* 3770 * If at least one write was successfully sent, then discard any rc 3771 * value from the later writes. If the other write succeeds, then 3772 * we'll end up returning whatever was written. If it fails, then 3773 * we'll get a new rc value from that. 3774 */ 3775 if (!list_empty(&ctx->list)) 3776 rc = 0; 3777 3778 mutex_unlock(&ctx->aio_mutex); 3779 3780 if (rc) { 3781 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3782 return rc; 3783 } 3784 3785 if (!is_sync_kiocb(iocb)) { 3786 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3787 return -EIOCBQUEUED; 3788 } 3789 3790 rc = wait_for_completion_killable(&ctx->done); 3791 if (rc) { 3792 mutex_lock(&ctx->aio_mutex); 3793 ctx->rc = rc = -EINTR; 3794 total_written = ctx->total_len; 3795 mutex_unlock(&ctx->aio_mutex); 3796 } else { 3797 rc = ctx->rc; 3798 total_written = ctx->total_len; 3799 } 3800 3801 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3802 3803 if (unlikely(!total_written)) 3804 return rc; 3805 3806 iocb->ki_pos += total_written; 3807 return total_written; 3808 } 3809 3810 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from) 3811 { 3812 struct file *file = iocb->ki_filp; 3813 3814 cifs_revalidate_mapping(file->f_inode); 3815 return __cifs_writev(iocb, from, true); 3816 } 3817 3818 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) 3819 { 3820 return __cifs_writev(iocb, from, false); 3821 } 3822 3823 static ssize_t 3824 cifs_writev(struct kiocb *iocb, struct iov_iter *from) 3825 { 3826 struct file *file = iocb->ki_filp; 3827 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 3828 struct inode *inode = file->f_mapping->host; 3829 struct cifsInodeInfo *cinode = CIFS_I(inode); 3830 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 3831 ssize_t rc; 3832 3833 inode_lock(inode); 3834 /* 3835 * We need to hold the sem to be sure nobody modifies lock list 3836 * with a brlock that prevents writing. 3837 */ 3838 down_read(&cinode->lock_sem); 3839 3840 rc = generic_write_checks(iocb, from); 3841 if (rc <= 0) 3842 goto out; 3843 3844 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), 3845 server->vals->exclusive_lock_type, 0, 3846 NULL, CIFS_WRITE_OP)) 3847 rc = __generic_file_write_iter(iocb, from); 3848 else 3849 rc = -EACCES; 3850 out: 3851 up_read(&cinode->lock_sem); 3852 inode_unlock(inode); 3853 3854 if (rc > 0) 3855 rc = generic_write_sync(iocb, rc); 3856 return rc; 3857 } 3858 3859 ssize_t 3860 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from) 3861 { 3862 struct inode *inode = file_inode(iocb->ki_filp); 3863 struct cifsInodeInfo *cinode = CIFS_I(inode); 3864 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 3865 struct cifsFileInfo *cfile = (struct cifsFileInfo *) 3866 iocb->ki_filp->private_data; 3867 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 3868 ssize_t written; 3869 3870 written = cifs_get_writer(cinode); 3871 if (written) 3872 return written; 3873 3874 if (CIFS_CACHE_WRITE(cinode)) { 3875 if (cap_unix(tcon->ses) && 3876 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) 3877 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { 3878 written = generic_file_write_iter(iocb, from); 3879 goto out; 3880 } 3881 written = cifs_writev(iocb, from); 3882 goto out; 3883 } 3884 /* 3885 * For non-oplocked files in strict cache mode we need to write the data 3886 * to the server exactly from the pos to pos+len-1 rather than flush all 3887 * affected pages because it may cause a error with mandatory locks on 3888 * these pages but not on the region from pos to ppos+len-1. 3889 */ 3890 written = cifs_user_writev(iocb, from); 3891 if (CIFS_CACHE_READ(cinode)) { 3892 /* 3893 * We have read level caching and we have just sent a write 3894 * request to the server thus making data in the cache stale. 3895 * Zap the cache and set oplock/lease level to NONE to avoid 3896 * reading stale data from the cache. All subsequent read 3897 * operations will read new data from the server. 3898 */ 3899 cifs_zap_mapping(inode); 3900 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n", 3901 inode); 3902 cinode->oplock = 0; 3903 } 3904 out: 3905 cifs_put_writer(cinode); 3906 return written; 3907 } 3908 3909 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete) 3910 { 3911 struct cifs_readdata *rdata; 3912 3913 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL); 3914 if (rdata) { 3915 kref_init(&rdata->refcount); 3916 INIT_LIST_HEAD(&rdata->list); 3917 init_completion(&rdata->done); 3918 INIT_WORK(&rdata->work, complete); 3919 } 3920 3921 return rdata; 3922 } 3923 3924 void 3925 cifs_readdata_release(struct kref *refcount) 3926 { 3927 struct cifs_readdata *rdata = container_of(refcount, 3928 struct cifs_readdata, refcount); 3929 3930 if (rdata->ctx) 3931 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release); 3932 #ifdef CONFIG_CIFS_SMB_DIRECT 3933 if (rdata->mr) { 3934 smbd_deregister_mr(rdata->mr); 3935 rdata->mr = NULL; 3936 } 3937 #endif 3938 if (rdata->cfile) 3939 cifsFileInfo_put(rdata->cfile); 3940 3941 kfree(rdata); 3942 } 3943 3944 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx); 3945 3946 static void 3947 cifs_uncached_readv_complete(struct work_struct *work) 3948 { 3949 struct cifs_readdata *rdata = container_of(work, 3950 struct cifs_readdata, work); 3951 3952 complete(&rdata->done); 3953 collect_uncached_read_data(rdata->ctx); 3954 /* the below call can possibly free the last ref to aio ctx */ 3955 kref_put(&rdata->refcount, cifs_readdata_release); 3956 } 3957 3958 static int cifs_resend_rdata(struct cifs_readdata *rdata, 3959 struct list_head *rdata_list, 3960 struct cifs_aio_ctx *ctx) 3961 { 3962 unsigned int rsize; 3963 struct cifs_credits credits; 3964 int rc; 3965 struct TCP_Server_Info *server; 3966 3967 /* XXX: should we pick a new channel here? */ 3968 server = rdata->server; 3969 3970 do { 3971 if (rdata->cfile->invalidHandle) { 3972 rc = cifs_reopen_file(rdata->cfile, true); 3973 if (rc == -EAGAIN) 3974 continue; 3975 else if (rc) 3976 break; 3977 } 3978 3979 /* 3980 * Wait for credits to resend this rdata. 3981 * Note: we are attempting to resend the whole rdata not in 3982 * segments 3983 */ 3984 do { 3985 rc = server->ops->wait_mtu_credits(server, rdata->bytes, 3986 &rsize, &credits); 3987 3988 if (rc) 3989 goto fail; 3990 3991 if (rsize < rdata->bytes) { 3992 add_credits_and_wake_if(server, &credits, 0); 3993 msleep(1000); 3994 } 3995 } while (rsize < rdata->bytes); 3996 rdata->credits = credits; 3997 3998 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 3999 if (!rc) { 4000 if (rdata->cfile->invalidHandle) 4001 rc = -EAGAIN; 4002 else { 4003 #ifdef CONFIG_CIFS_SMB_DIRECT 4004 if (rdata->mr) { 4005 rdata->mr->need_invalidate = true; 4006 smbd_deregister_mr(rdata->mr); 4007 rdata->mr = NULL; 4008 } 4009 #endif 4010 rc = server->ops->async_readv(rdata); 4011 } 4012 } 4013 4014 /* If the read was successfully sent, we are done */ 4015 if (!rc) { 4016 /* Add to aio pending list */ 4017 list_add_tail(&rdata->list, rdata_list); 4018 return 0; 4019 } 4020 4021 /* Roll back credits and retry if needed */ 4022 add_credits_and_wake_if(server, &rdata->credits, 0); 4023 } while (rc == -EAGAIN); 4024 4025 fail: 4026 kref_put(&rdata->refcount, cifs_readdata_release); 4027 return rc; 4028 } 4029 4030 static int 4031 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file, 4032 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list, 4033 struct cifs_aio_ctx *ctx) 4034 { 4035 struct cifs_readdata *rdata; 4036 unsigned int rsize, nsegs, max_segs = INT_MAX; 4037 struct cifs_credits credits_on_stack; 4038 struct cifs_credits *credits = &credits_on_stack; 4039 size_t cur_len, max_len; 4040 int rc; 4041 pid_t pid; 4042 struct TCP_Server_Info *server; 4043 4044 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 4045 4046 #ifdef CONFIG_CIFS_SMB_DIRECT 4047 if (server->smbd_conn) 4048 max_segs = server->smbd_conn->max_frmr_depth; 4049 #endif 4050 4051 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4052 pid = open_file->pid; 4053 else 4054 pid = current->tgid; 4055 4056 do { 4057 if (open_file->invalidHandle) { 4058 rc = cifs_reopen_file(open_file, true); 4059 if (rc == -EAGAIN) 4060 continue; 4061 else if (rc) 4062 break; 4063 } 4064 4065 if (cifs_sb->ctx->rsize == 0) 4066 cifs_sb->ctx->rsize = 4067 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), 4068 cifs_sb->ctx); 4069 4070 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, 4071 &rsize, credits); 4072 if (rc) 4073 break; 4074 4075 max_len = min_t(size_t, len, rsize); 4076 4077 cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len, 4078 max_segs, &nsegs); 4079 cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n", 4080 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs); 4081 if (cur_len == 0) { 4082 rc = -EIO; 4083 add_credits_and_wake_if(server, credits, 0); 4084 break; 4085 } 4086 4087 rdata = cifs_readdata_alloc(cifs_uncached_readv_complete); 4088 if (!rdata) { 4089 add_credits_and_wake_if(server, credits, 0); 4090 rc = -ENOMEM; 4091 break; 4092 } 4093 4094 rdata->server = server; 4095 rdata->cfile = cifsFileInfo_get(open_file); 4096 rdata->offset = fpos; 4097 rdata->bytes = cur_len; 4098 rdata->pid = pid; 4099 rdata->credits = credits_on_stack; 4100 rdata->ctx = ctx; 4101 kref_get(&ctx->refcount); 4102 4103 rdata->iter = ctx->iter; 4104 iov_iter_truncate(&rdata->iter, cur_len); 4105 4106 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 4107 4108 if (!rc) { 4109 if (rdata->cfile->invalidHandle) 4110 rc = -EAGAIN; 4111 else 4112 rc = server->ops->async_readv(rdata); 4113 } 4114 4115 if (rc) { 4116 add_credits_and_wake_if(server, &rdata->credits, 0); 4117 kref_put(&rdata->refcount, cifs_readdata_release); 4118 if (rc == -EAGAIN) 4119 continue; 4120 break; 4121 } 4122 4123 list_add_tail(&rdata->list, rdata_list); 4124 iov_iter_advance(&ctx->iter, cur_len); 4125 fpos += cur_len; 4126 len -= cur_len; 4127 } while (len > 0); 4128 4129 return rc; 4130 } 4131 4132 static void 4133 collect_uncached_read_data(struct cifs_aio_ctx *ctx) 4134 { 4135 struct cifs_readdata *rdata, *tmp; 4136 struct cifs_sb_info *cifs_sb; 4137 int rc; 4138 4139 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb); 4140 4141 mutex_lock(&ctx->aio_mutex); 4142 4143 if (list_empty(&ctx->list)) { 4144 mutex_unlock(&ctx->aio_mutex); 4145 return; 4146 } 4147 4148 rc = ctx->rc; 4149 /* the loop below should proceed in the order of increasing offsets */ 4150 again: 4151 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) { 4152 if (!rc) { 4153 if (!try_wait_for_completion(&rdata->done)) { 4154 mutex_unlock(&ctx->aio_mutex); 4155 return; 4156 } 4157 4158 if (rdata->result == -EAGAIN) { 4159 /* resend call if it's a retryable error */ 4160 struct list_head tmp_list; 4161 unsigned int got_bytes = rdata->got_bytes; 4162 4163 list_del_init(&rdata->list); 4164 INIT_LIST_HEAD(&tmp_list); 4165 4166 if (ctx->direct_io) { 4167 /* 4168 * Re-use rdata as this is a 4169 * direct I/O 4170 */ 4171 rc = cifs_resend_rdata( 4172 rdata, 4173 &tmp_list, ctx); 4174 } else { 4175 rc = cifs_send_async_read( 4176 rdata->offset + got_bytes, 4177 rdata->bytes - got_bytes, 4178 rdata->cfile, cifs_sb, 4179 &tmp_list, ctx); 4180 4181 kref_put(&rdata->refcount, 4182 cifs_readdata_release); 4183 } 4184 4185 list_splice(&tmp_list, &ctx->list); 4186 4187 goto again; 4188 } else if (rdata->result) 4189 rc = rdata->result; 4190 4191 /* if there was a short read -- discard anything left */ 4192 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes) 4193 rc = -ENODATA; 4194 4195 ctx->total_len += rdata->got_bytes; 4196 } 4197 list_del_init(&rdata->list); 4198 kref_put(&rdata->refcount, cifs_readdata_release); 4199 } 4200 4201 /* mask nodata case */ 4202 if (rc == -ENODATA) 4203 rc = 0; 4204 4205 ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc; 4206 4207 mutex_unlock(&ctx->aio_mutex); 4208 4209 if (ctx->iocb && ctx->iocb->ki_complete) 4210 ctx->iocb->ki_complete(ctx->iocb, ctx->rc); 4211 else 4212 complete(&ctx->done); 4213 } 4214 4215 static ssize_t __cifs_readv( 4216 struct kiocb *iocb, struct iov_iter *to, bool direct) 4217 { 4218 size_t len; 4219 struct file *file = iocb->ki_filp; 4220 struct cifs_sb_info *cifs_sb; 4221 struct cifsFileInfo *cfile; 4222 struct cifs_tcon *tcon; 4223 ssize_t rc, total_read = 0; 4224 loff_t offset = iocb->ki_pos; 4225 struct cifs_aio_ctx *ctx; 4226 4227 len = iov_iter_count(to); 4228 if (!len) 4229 return 0; 4230 4231 cifs_sb = CIFS_FILE_SB(file); 4232 cfile = file->private_data; 4233 tcon = tlink_tcon(cfile->tlink); 4234 4235 if (!tcon->ses->server->ops->async_readv) 4236 return -ENOSYS; 4237 4238 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 4239 cifs_dbg(FYI, "attempting read on write only file instance\n"); 4240 4241 ctx = cifs_aio_ctx_alloc(); 4242 if (!ctx) 4243 return -ENOMEM; 4244 4245 ctx->pos = offset; 4246 ctx->direct_io = direct; 4247 ctx->len = len; 4248 ctx->cfile = cifsFileInfo_get(cfile); 4249 ctx->nr_pinned_pages = 0; 4250 4251 if (!is_sync_kiocb(iocb)) 4252 ctx->iocb = iocb; 4253 4254 if (user_backed_iter(to)) { 4255 /* 4256 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as 4257 * they contain references to the calling process's virtual 4258 * memory layout which won't be available in an async worker 4259 * thread. This also takes a pin on every folio involved. 4260 */ 4261 rc = netfs_extract_user_iter(to, iov_iter_count(to), 4262 &ctx->iter, 0); 4263 if (rc < 0) { 4264 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4265 return rc; 4266 } 4267 4268 ctx->nr_pinned_pages = rc; 4269 ctx->bv = (void *)ctx->iter.bvec; 4270 ctx->bv_need_unpin = iov_iter_extract_will_pin(to); 4271 ctx->should_dirty = true; 4272 } else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) && 4273 !is_sync_kiocb(iocb)) { 4274 /* 4275 * If the op is asynchronous, we need to copy the list attached 4276 * to a BVEC/KVEC-type iterator, but we assume that the storage 4277 * will be retained by the caller; in any case, we may or may 4278 * not be able to pin the pages, so we don't try. 4279 */ 4280 ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL); 4281 if (!ctx->bv) { 4282 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4283 return -ENOMEM; 4284 } 4285 } else { 4286 /* 4287 * Otherwise, we just pass the iterator down as-is and rely on 4288 * the caller to make sure the pages referred to by the 4289 * iterator don't evaporate. 4290 */ 4291 ctx->iter = *to; 4292 } 4293 4294 if (direct) { 4295 rc = filemap_write_and_wait_range(file->f_inode->i_mapping, 4296 offset, offset + len - 1); 4297 if (rc) { 4298 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4299 return -EAGAIN; 4300 } 4301 } 4302 4303 /* grab a lock here due to read response handlers can access ctx */ 4304 mutex_lock(&ctx->aio_mutex); 4305 4306 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx); 4307 4308 /* if at least one read request send succeeded, then reset rc */ 4309 if (!list_empty(&ctx->list)) 4310 rc = 0; 4311 4312 mutex_unlock(&ctx->aio_mutex); 4313 4314 if (rc) { 4315 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4316 return rc; 4317 } 4318 4319 if (!is_sync_kiocb(iocb)) { 4320 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4321 return -EIOCBQUEUED; 4322 } 4323 4324 rc = wait_for_completion_killable(&ctx->done); 4325 if (rc) { 4326 mutex_lock(&ctx->aio_mutex); 4327 ctx->rc = rc = -EINTR; 4328 total_read = ctx->total_len; 4329 mutex_unlock(&ctx->aio_mutex); 4330 } else { 4331 rc = ctx->rc; 4332 total_read = ctx->total_len; 4333 } 4334 4335 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4336 4337 if (total_read) { 4338 iocb->ki_pos += total_read; 4339 return total_read; 4340 } 4341 return rc; 4342 } 4343 4344 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to) 4345 { 4346 return __cifs_readv(iocb, to, true); 4347 } 4348 4349 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) 4350 { 4351 return __cifs_readv(iocb, to, false); 4352 } 4353 4354 ssize_t 4355 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) 4356 { 4357 struct inode *inode = file_inode(iocb->ki_filp); 4358 struct cifsInodeInfo *cinode = CIFS_I(inode); 4359 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 4360 struct cifsFileInfo *cfile = (struct cifsFileInfo *) 4361 iocb->ki_filp->private_data; 4362 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 4363 int rc = -EACCES; 4364 4365 /* 4366 * In strict cache mode we need to read from the server all the time 4367 * if we don't have level II oplock because the server can delay mtime 4368 * change - so we can't make a decision about inode invalidating. 4369 * And we can also fail with pagereading if there are mandatory locks 4370 * on pages affected by this read but not on the region from pos to 4371 * pos+len-1. 4372 */ 4373 if (!CIFS_CACHE_READ(cinode)) 4374 return cifs_user_readv(iocb, to); 4375 4376 if (cap_unix(tcon->ses) && 4377 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 4378 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 4379 return generic_file_read_iter(iocb, to); 4380 4381 /* 4382 * We need to hold the sem to be sure nobody modifies lock list 4383 * with a brlock that prevents reading. 4384 */ 4385 down_read(&cinode->lock_sem); 4386 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to), 4387 tcon->ses->server->vals->shared_lock_type, 4388 0, NULL, CIFS_READ_OP)) 4389 rc = generic_file_read_iter(iocb, to); 4390 up_read(&cinode->lock_sem); 4391 return rc; 4392 } 4393 4394 static ssize_t 4395 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) 4396 { 4397 int rc = -EACCES; 4398 unsigned int bytes_read = 0; 4399 unsigned int total_read; 4400 unsigned int current_read_size; 4401 unsigned int rsize; 4402 struct cifs_sb_info *cifs_sb; 4403 struct cifs_tcon *tcon; 4404 struct TCP_Server_Info *server; 4405 unsigned int xid; 4406 char *cur_offset; 4407 struct cifsFileInfo *open_file; 4408 struct cifs_io_parms io_parms = {0}; 4409 int buf_type = CIFS_NO_BUFFER; 4410 __u32 pid; 4411 4412 xid = get_xid(); 4413 cifs_sb = CIFS_FILE_SB(file); 4414 4415 /* FIXME: set up handlers for larger reads and/or convert to async */ 4416 rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize); 4417 4418 if (file->private_data == NULL) { 4419 rc = -EBADF; 4420 free_xid(xid); 4421 return rc; 4422 } 4423 open_file = file->private_data; 4424 tcon = tlink_tcon(open_file->tlink); 4425 server = cifs_pick_channel(tcon->ses); 4426 4427 if (!server->ops->sync_read) { 4428 free_xid(xid); 4429 return -ENOSYS; 4430 } 4431 4432 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4433 pid = open_file->pid; 4434 else 4435 pid = current->tgid; 4436 4437 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 4438 cifs_dbg(FYI, "attempting read on write only file instance\n"); 4439 4440 for (total_read = 0, cur_offset = read_data; read_size > total_read; 4441 total_read += bytes_read, cur_offset += bytes_read) { 4442 do { 4443 current_read_size = min_t(uint, read_size - total_read, 4444 rsize); 4445 /* 4446 * For windows me and 9x we do not want to request more 4447 * than it negotiated since it will refuse the read 4448 * then. 4449 */ 4450 if (!(tcon->ses->capabilities & 4451 tcon->ses->server->vals->cap_large_files)) { 4452 current_read_size = min_t(uint, 4453 current_read_size, CIFSMaxBufSize); 4454 } 4455 if (open_file->invalidHandle) { 4456 rc = cifs_reopen_file(open_file, true); 4457 if (rc != 0) 4458 break; 4459 } 4460 io_parms.pid = pid; 4461 io_parms.tcon = tcon; 4462 io_parms.offset = *offset; 4463 io_parms.length = current_read_size; 4464 io_parms.server = server; 4465 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms, 4466 &bytes_read, &cur_offset, 4467 &buf_type); 4468 } while (rc == -EAGAIN); 4469 4470 if (rc || (bytes_read == 0)) { 4471 if (total_read) { 4472 break; 4473 } else { 4474 free_xid(xid); 4475 return rc; 4476 } 4477 } else { 4478 cifs_stats_bytes_read(tcon, total_read); 4479 *offset += bytes_read; 4480 } 4481 } 4482 free_xid(xid); 4483 return total_read; 4484 } 4485 4486 /* 4487 * If the page is mmap'ed into a process' page tables, then we need to make 4488 * sure that it doesn't change while being written back. 4489 */ 4490 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf) 4491 { 4492 struct folio *folio = page_folio(vmf->page); 4493 4494 /* Wait for the folio to be written to the cache before we allow it to 4495 * be modified. We then assume the entire folio will need writing back. 4496 */ 4497 #ifdef CONFIG_CIFS_FSCACHE 4498 if (folio_test_fscache(folio) && 4499 folio_wait_fscache_killable(folio) < 0) 4500 return VM_FAULT_RETRY; 4501 #endif 4502 4503 folio_wait_writeback(folio); 4504 4505 if (folio_lock_killable(folio) < 0) 4506 return VM_FAULT_RETRY; 4507 return VM_FAULT_LOCKED; 4508 } 4509 4510 static const struct vm_operations_struct cifs_file_vm_ops = { 4511 .fault = filemap_fault, 4512 .map_pages = filemap_map_pages, 4513 .page_mkwrite = cifs_page_mkwrite, 4514 }; 4515 4516 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) 4517 { 4518 int xid, rc = 0; 4519 struct inode *inode = file_inode(file); 4520 4521 xid = get_xid(); 4522 4523 if (!CIFS_CACHE_READ(CIFS_I(inode))) 4524 rc = cifs_zap_mapping(inode); 4525 if (!rc) 4526 rc = generic_file_mmap(file, vma); 4527 if (!rc) 4528 vma->vm_ops = &cifs_file_vm_ops; 4529 4530 free_xid(xid); 4531 return rc; 4532 } 4533 4534 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) 4535 { 4536 int rc, xid; 4537 4538 xid = get_xid(); 4539 4540 rc = cifs_revalidate_file(file); 4541 if (rc) 4542 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n", 4543 rc); 4544 if (!rc) 4545 rc = generic_file_mmap(file, vma); 4546 if (!rc) 4547 vma->vm_ops = &cifs_file_vm_ops; 4548 4549 free_xid(xid); 4550 return rc; 4551 } 4552 4553 /* 4554 * Unlock a bunch of folios in the pagecache. 4555 */ 4556 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last) 4557 { 4558 struct folio *folio; 4559 XA_STATE(xas, &mapping->i_pages, first); 4560 4561 rcu_read_lock(); 4562 xas_for_each(&xas, folio, last) { 4563 folio_unlock(folio); 4564 } 4565 rcu_read_unlock(); 4566 } 4567 4568 static void cifs_readahead_complete(struct work_struct *work) 4569 { 4570 struct cifs_readdata *rdata = container_of(work, 4571 struct cifs_readdata, work); 4572 struct folio *folio; 4573 pgoff_t last; 4574 bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes); 4575 4576 XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE); 4577 4578 if (good) 4579 cifs_readahead_to_fscache(rdata->mapping->host, 4580 rdata->offset, rdata->bytes); 4581 4582 if (iov_iter_count(&rdata->iter) > 0) 4583 iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter); 4584 4585 last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE; 4586 4587 rcu_read_lock(); 4588 xas_for_each(&xas, folio, last) { 4589 if (good) { 4590 flush_dcache_folio(folio); 4591 folio_mark_uptodate(folio); 4592 } 4593 folio_unlock(folio); 4594 } 4595 rcu_read_unlock(); 4596 4597 kref_put(&rdata->refcount, cifs_readdata_release); 4598 } 4599 4600 static void cifs_readahead(struct readahead_control *ractl) 4601 { 4602 struct cifsFileInfo *open_file = ractl->file->private_data; 4603 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file); 4604 struct TCP_Server_Info *server; 4605 unsigned int xid, nr_pages, cache_nr_pages = 0; 4606 unsigned int ra_pages; 4607 pgoff_t next_cached = ULONG_MAX, ra_index; 4608 bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) && 4609 cifs_inode_cookie(ractl->mapping->host)->cache_priv; 4610 bool check_cache = caching; 4611 pid_t pid; 4612 int rc = 0; 4613 4614 /* Note that readahead_count() lags behind our dequeuing of pages from 4615 * the ractl, wo we have to keep track for ourselves. 4616 */ 4617 ra_pages = readahead_count(ractl); 4618 ra_index = readahead_index(ractl); 4619 4620 xid = get_xid(); 4621 4622 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4623 pid = open_file->pid; 4624 else 4625 pid = current->tgid; 4626 4627 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 4628 4629 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n", 4630 __func__, ractl->file, ractl->mapping, ra_pages); 4631 4632 /* 4633 * Chop the readahead request up into rsize-sized read requests. 4634 */ 4635 while ((nr_pages = ra_pages)) { 4636 unsigned int i, rsize; 4637 struct cifs_readdata *rdata; 4638 struct cifs_credits credits_on_stack; 4639 struct cifs_credits *credits = &credits_on_stack; 4640 struct folio *folio; 4641 pgoff_t fsize; 4642 4643 /* 4644 * Find out if we have anything cached in the range of 4645 * interest, and if so, where the next chunk of cached data is. 4646 */ 4647 if (caching) { 4648 if (check_cache) { 4649 rc = cifs_fscache_query_occupancy( 4650 ractl->mapping->host, ra_index, nr_pages, 4651 &next_cached, &cache_nr_pages); 4652 if (rc < 0) 4653 caching = false; 4654 check_cache = false; 4655 } 4656 4657 if (ra_index == next_cached) { 4658 /* 4659 * TODO: Send a whole batch of pages to be read 4660 * by the cache. 4661 */ 4662 folio = readahead_folio(ractl); 4663 fsize = folio_nr_pages(folio); 4664 ra_pages -= fsize; 4665 ra_index += fsize; 4666 if (cifs_readpage_from_fscache(ractl->mapping->host, 4667 &folio->page) < 0) { 4668 /* 4669 * TODO: Deal with cache read failure 4670 * here, but for the moment, delegate 4671 * that to readpage. 4672 */ 4673 caching = false; 4674 } 4675 folio_unlock(folio); 4676 next_cached += fsize; 4677 cache_nr_pages -= fsize; 4678 if (cache_nr_pages == 0) 4679 check_cache = true; 4680 continue; 4681 } 4682 } 4683 4684 if (open_file->invalidHandle) { 4685 rc = cifs_reopen_file(open_file, true); 4686 if (rc) { 4687 if (rc == -EAGAIN) 4688 continue; 4689 break; 4690 } 4691 } 4692 4693 if (cifs_sb->ctx->rsize == 0) 4694 cifs_sb->ctx->rsize = 4695 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), 4696 cifs_sb->ctx); 4697 4698 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, 4699 &rsize, credits); 4700 if (rc) 4701 break; 4702 nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages); 4703 if (next_cached != ULONG_MAX) 4704 nr_pages = min_t(size_t, nr_pages, next_cached - ra_index); 4705 4706 /* 4707 * Give up immediately if rsize is too small to read an entire 4708 * page. The VFS will fall back to readpage. We should never 4709 * reach this point however since we set ra_pages to 0 when the 4710 * rsize is smaller than a cache page. 4711 */ 4712 if (unlikely(!nr_pages)) { 4713 add_credits_and_wake_if(server, credits, 0); 4714 break; 4715 } 4716 4717 rdata = cifs_readdata_alloc(cifs_readahead_complete); 4718 if (!rdata) { 4719 /* best to give up if we're out of mem */ 4720 add_credits_and_wake_if(server, credits, 0); 4721 break; 4722 } 4723 4724 rdata->offset = ra_index * PAGE_SIZE; 4725 rdata->bytes = nr_pages * PAGE_SIZE; 4726 rdata->cfile = cifsFileInfo_get(open_file); 4727 rdata->server = server; 4728 rdata->mapping = ractl->mapping; 4729 rdata->pid = pid; 4730 rdata->credits = credits_on_stack; 4731 4732 for (i = 0; i < nr_pages; i++) { 4733 if (!readahead_folio(ractl)) 4734 WARN_ON(1); 4735 } 4736 ra_pages -= nr_pages; 4737 ra_index += nr_pages; 4738 4739 iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages, 4740 rdata->offset, rdata->bytes); 4741 4742 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 4743 if (!rc) { 4744 if (rdata->cfile->invalidHandle) 4745 rc = -EAGAIN; 4746 else 4747 rc = server->ops->async_readv(rdata); 4748 } 4749 4750 if (rc) { 4751 add_credits_and_wake_if(server, &rdata->credits, 0); 4752 cifs_unlock_folios(rdata->mapping, 4753 rdata->offset / PAGE_SIZE, 4754 (rdata->offset + rdata->bytes - 1) / PAGE_SIZE); 4755 /* Fallback to the readpage in error/reconnect cases */ 4756 kref_put(&rdata->refcount, cifs_readdata_release); 4757 break; 4758 } 4759 4760 kref_put(&rdata->refcount, cifs_readdata_release); 4761 } 4762 4763 free_xid(xid); 4764 } 4765 4766 /* 4767 * cifs_readpage_worker must be called with the page pinned 4768 */ 4769 static int cifs_readpage_worker(struct file *file, struct page *page, 4770 loff_t *poffset) 4771 { 4772 struct inode *inode = file_inode(file); 4773 struct timespec64 atime, mtime; 4774 char *read_data; 4775 int rc; 4776 4777 /* Is the page cached? */ 4778 rc = cifs_readpage_from_fscache(inode, page); 4779 if (rc == 0) 4780 goto read_complete; 4781 4782 read_data = kmap(page); 4783 /* for reads over a certain size could initiate async read ahead */ 4784 4785 rc = cifs_read(file, read_data, PAGE_SIZE, poffset); 4786 4787 if (rc < 0) 4788 goto io_error; 4789 else 4790 cifs_dbg(FYI, "Bytes read %d\n", rc); 4791 4792 /* we do not want atime to be less than mtime, it broke some apps */ 4793 atime = inode_set_atime_to_ts(inode, current_time(inode)); 4794 mtime = inode_get_mtime(inode); 4795 if (timespec64_compare(&atime, &mtime) < 0) 4796 inode_set_atime_to_ts(inode, inode_get_mtime(inode)); 4797 4798 if (PAGE_SIZE > rc) 4799 memset(read_data + rc, 0, PAGE_SIZE - rc); 4800 4801 flush_dcache_page(page); 4802 SetPageUptodate(page); 4803 rc = 0; 4804 4805 io_error: 4806 kunmap(page); 4807 4808 read_complete: 4809 unlock_page(page); 4810 return rc; 4811 } 4812 4813 static int cifs_read_folio(struct file *file, struct folio *folio) 4814 { 4815 struct page *page = &folio->page; 4816 loff_t offset = page_file_offset(page); 4817 int rc = -EACCES; 4818 unsigned int xid; 4819 4820 xid = get_xid(); 4821 4822 if (file->private_data == NULL) { 4823 rc = -EBADF; 4824 free_xid(xid); 4825 return rc; 4826 } 4827 4828 cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n", 4829 page, (int)offset, (int)offset); 4830 4831 rc = cifs_readpage_worker(file, page, &offset); 4832 4833 free_xid(xid); 4834 return rc; 4835 } 4836 4837 static int is_inode_writable(struct cifsInodeInfo *cifs_inode) 4838 { 4839 struct cifsFileInfo *open_file; 4840 4841 spin_lock(&cifs_inode->open_file_lock); 4842 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 4843 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { 4844 spin_unlock(&cifs_inode->open_file_lock); 4845 return 1; 4846 } 4847 } 4848 spin_unlock(&cifs_inode->open_file_lock); 4849 return 0; 4850 } 4851 4852 /* We do not want to update the file size from server for inodes 4853 open for write - to avoid races with writepage extending 4854 the file - in the future we could consider allowing 4855 refreshing the inode only on increases in the file size 4856 but this is tricky to do without racing with writebehind 4857 page caching in the current Linux kernel design */ 4858 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file, 4859 bool from_readdir) 4860 { 4861 if (!cifsInode) 4862 return true; 4863 4864 if (is_inode_writable(cifsInode) || 4865 ((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) { 4866 /* This inode is open for write at least once */ 4867 struct cifs_sb_info *cifs_sb; 4868 4869 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb); 4870 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { 4871 /* since no page cache to corrupt on directio 4872 we can change size safely */ 4873 return true; 4874 } 4875 4876 if (i_size_read(&cifsInode->netfs.inode) < end_of_file) 4877 return true; 4878 4879 return false; 4880 } else 4881 return true; 4882 } 4883 4884 static int cifs_write_begin(struct file *file, struct address_space *mapping, 4885 loff_t pos, unsigned len, 4886 struct page **pagep, void **fsdata) 4887 { 4888 int oncethru = 0; 4889 pgoff_t index = pos >> PAGE_SHIFT; 4890 loff_t offset = pos & (PAGE_SIZE - 1); 4891 loff_t page_start = pos & PAGE_MASK; 4892 loff_t i_size; 4893 struct page *page; 4894 int rc = 0; 4895 4896 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len); 4897 4898 start: 4899 page = grab_cache_page_write_begin(mapping, index); 4900 if (!page) { 4901 rc = -ENOMEM; 4902 goto out; 4903 } 4904 4905 if (PageUptodate(page)) 4906 goto out; 4907 4908 /* 4909 * If we write a full page it will be up to date, no need to read from 4910 * the server. If the write is short, we'll end up doing a sync write 4911 * instead. 4912 */ 4913 if (len == PAGE_SIZE) 4914 goto out; 4915 4916 /* 4917 * optimize away the read when we have an oplock, and we're not 4918 * expecting to use any of the data we'd be reading in. That 4919 * is, when the page lies beyond the EOF, or straddles the EOF 4920 * and the write will cover all of the existing data. 4921 */ 4922 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) { 4923 i_size = i_size_read(mapping->host); 4924 if (page_start >= i_size || 4925 (offset == 0 && (pos + len) >= i_size)) { 4926 zero_user_segments(page, 0, offset, 4927 offset + len, 4928 PAGE_SIZE); 4929 /* 4930 * PageChecked means that the parts of the page 4931 * to which we're not writing are considered up 4932 * to date. Once the data is copied to the 4933 * page, it can be set uptodate. 4934 */ 4935 SetPageChecked(page); 4936 goto out; 4937 } 4938 } 4939 4940 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) { 4941 /* 4942 * might as well read a page, it is fast enough. If we get 4943 * an error, we don't need to return it. cifs_write_end will 4944 * do a sync write instead since PG_uptodate isn't set. 4945 */ 4946 cifs_readpage_worker(file, page, &page_start); 4947 put_page(page); 4948 oncethru = 1; 4949 goto start; 4950 } else { 4951 /* we could try using another file handle if there is one - 4952 but how would we lock it to prevent close of that handle 4953 racing with this read? In any case 4954 this will be written out by write_end so is fine */ 4955 } 4956 out: 4957 *pagep = page; 4958 return rc; 4959 } 4960 4961 static bool cifs_release_folio(struct folio *folio, gfp_t gfp) 4962 { 4963 if (folio_test_private(folio)) 4964 return 0; 4965 if (folio_test_fscache(folio)) { 4966 if (current_is_kswapd() || !(gfp & __GFP_FS)) 4967 return false; 4968 folio_wait_fscache(folio); 4969 } 4970 fscache_note_page_release(cifs_inode_cookie(folio->mapping->host)); 4971 return true; 4972 } 4973 4974 static void cifs_invalidate_folio(struct folio *folio, size_t offset, 4975 size_t length) 4976 { 4977 folio_wait_fscache(folio); 4978 } 4979 4980 static int cifs_launder_folio(struct folio *folio) 4981 { 4982 int rc = 0; 4983 loff_t range_start = folio_pos(folio); 4984 loff_t range_end = range_start + folio_size(folio); 4985 struct writeback_control wbc = { 4986 .sync_mode = WB_SYNC_ALL, 4987 .nr_to_write = 0, 4988 .range_start = range_start, 4989 .range_end = range_end, 4990 }; 4991 4992 cifs_dbg(FYI, "Launder page: %lu\n", folio->index); 4993 4994 if (folio_clear_dirty_for_io(folio)) 4995 rc = cifs_writepage_locked(&folio->page, &wbc); 4996 4997 folio_wait_fscache(folio); 4998 return rc; 4999 } 5000 5001 void cifs_oplock_break(struct work_struct *work) 5002 { 5003 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, 5004 oplock_break); 5005 struct inode *inode = d_inode(cfile->dentry); 5006 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 5007 struct cifsInodeInfo *cinode = CIFS_I(inode); 5008 struct cifs_tcon *tcon; 5009 struct TCP_Server_Info *server; 5010 struct tcon_link *tlink; 5011 int rc = 0; 5012 bool purge_cache = false, oplock_break_cancelled; 5013 __u64 persistent_fid, volatile_fid; 5014 __u16 net_fid; 5015 5016 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, 5017 TASK_UNINTERRUPTIBLE); 5018 5019 tlink = cifs_sb_tlink(cifs_sb); 5020 if (IS_ERR(tlink)) 5021 goto out; 5022 tcon = tlink_tcon(tlink); 5023 server = tcon->ses->server; 5024 5025 server->ops->downgrade_oplock(server, cinode, cfile->oplock_level, 5026 cfile->oplock_epoch, &purge_cache); 5027 5028 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) && 5029 cifs_has_mand_locks(cinode)) { 5030 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n", 5031 inode); 5032 cinode->oplock = 0; 5033 } 5034 5035 if (inode && S_ISREG(inode->i_mode)) { 5036 if (CIFS_CACHE_READ(cinode)) 5037 break_lease(inode, O_RDONLY); 5038 else 5039 break_lease(inode, O_WRONLY); 5040 rc = filemap_fdatawrite(inode->i_mapping); 5041 if (!CIFS_CACHE_READ(cinode) || purge_cache) { 5042 rc = filemap_fdatawait(inode->i_mapping); 5043 mapping_set_error(inode->i_mapping, rc); 5044 cifs_zap_mapping(inode); 5045 } 5046 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc); 5047 if (CIFS_CACHE_WRITE(cinode)) 5048 goto oplock_break_ack; 5049 } 5050 5051 rc = cifs_push_locks(cfile); 5052 if (rc) 5053 cifs_dbg(VFS, "Push locks rc = %d\n", rc); 5054 5055 oplock_break_ack: 5056 /* 5057 * When oplock break is received and there are no active 5058 * file handles but cached, then schedule deferred close immediately. 5059 * So, new open will not use cached handle. 5060 */ 5061 5062 if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes)) 5063 cifs_close_deferred_file(cinode); 5064 5065 persistent_fid = cfile->fid.persistent_fid; 5066 volatile_fid = cfile->fid.volatile_fid; 5067 net_fid = cfile->fid.netfid; 5068 oplock_break_cancelled = cfile->oplock_break_cancelled; 5069 5070 _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false); 5071 /* 5072 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require 5073 * an acknowledgment to be sent when the file has already been closed. 5074 */ 5075 spin_lock(&cinode->open_file_lock); 5076 /* check list empty since can race with kill_sb calling tree disconnect */ 5077 if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) { 5078 spin_unlock(&cinode->open_file_lock); 5079 rc = server->ops->oplock_response(tcon, persistent_fid, 5080 volatile_fid, net_fid, cinode); 5081 cifs_dbg(FYI, "Oplock release rc = %d\n", rc); 5082 } else 5083 spin_unlock(&cinode->open_file_lock); 5084 5085 cifs_put_tlink(tlink); 5086 out: 5087 cifs_done_oplock_break(cinode); 5088 } 5089 5090 /* 5091 * The presence of cifs_direct_io() in the address space ops vector 5092 * allowes open() O_DIRECT flags which would have failed otherwise. 5093 * 5094 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests 5095 * so this method should never be called. 5096 * 5097 * Direct IO is not yet supported in the cached mode. 5098 */ 5099 static ssize_t 5100 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter) 5101 { 5102 /* 5103 * FIXME 5104 * Eventually need to support direct IO for non forcedirectio mounts 5105 */ 5106 return -EINVAL; 5107 } 5108 5109 static int cifs_swap_activate(struct swap_info_struct *sis, 5110 struct file *swap_file, sector_t *span) 5111 { 5112 struct cifsFileInfo *cfile = swap_file->private_data; 5113 struct inode *inode = swap_file->f_mapping->host; 5114 unsigned long blocks; 5115 long long isize; 5116 5117 cifs_dbg(FYI, "swap activate\n"); 5118 5119 if (!swap_file->f_mapping->a_ops->swap_rw) 5120 /* Cannot support swap */ 5121 return -EINVAL; 5122 5123 spin_lock(&inode->i_lock); 5124 blocks = inode->i_blocks; 5125 isize = inode->i_size; 5126 spin_unlock(&inode->i_lock); 5127 if (blocks*512 < isize) { 5128 pr_warn("swap activate: swapfile has holes\n"); 5129 return -EINVAL; 5130 } 5131 *span = sis->pages; 5132 5133 pr_warn_once("Swap support over SMB3 is experimental\n"); 5134 5135 /* 5136 * TODO: consider adding ACL (or documenting how) to prevent other 5137 * users (on this or other systems) from reading it 5138 */ 5139 5140 5141 /* TODO: add sk_set_memalloc(inet) or similar */ 5142 5143 if (cfile) 5144 cfile->swapfile = true; 5145 /* 5146 * TODO: Since file already open, we can't open with DENY_ALL here 5147 * but we could add call to grab a byte range lock to prevent others 5148 * from reading or writing the file 5149 */ 5150 5151 sis->flags |= SWP_FS_OPS; 5152 return add_swap_extent(sis, 0, sis->max, 0); 5153 } 5154 5155 static void cifs_swap_deactivate(struct file *file) 5156 { 5157 struct cifsFileInfo *cfile = file->private_data; 5158 5159 cifs_dbg(FYI, "swap deactivate\n"); 5160 5161 /* TODO: undo sk_set_memalloc(inet) will eventually be needed */ 5162 5163 if (cfile) 5164 cfile->swapfile = false; 5165 5166 /* do we need to unpin (or unlock) the file */ 5167 } 5168 5169 /* 5170 * Mark a page as having been made dirty and thus needing writeback. We also 5171 * need to pin the cache object to write back to. 5172 */ 5173 #ifdef CONFIG_CIFS_FSCACHE 5174 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio) 5175 { 5176 return fscache_dirty_folio(mapping, folio, 5177 cifs_inode_cookie(mapping->host)); 5178 } 5179 #else 5180 #define cifs_dirty_folio filemap_dirty_folio 5181 #endif 5182 5183 const struct address_space_operations cifs_addr_ops = { 5184 .read_folio = cifs_read_folio, 5185 .readahead = cifs_readahead, 5186 .writepages = cifs_writepages, 5187 .write_begin = cifs_write_begin, 5188 .write_end = cifs_write_end, 5189 .dirty_folio = cifs_dirty_folio, 5190 .release_folio = cifs_release_folio, 5191 .direct_IO = cifs_direct_io, 5192 .invalidate_folio = cifs_invalidate_folio, 5193 .launder_folio = cifs_launder_folio, 5194 .migrate_folio = filemap_migrate_folio, 5195 /* 5196 * TODO: investigate and if useful we could add an is_dirty_writeback 5197 * helper if needed 5198 */ 5199 .swap_activate = cifs_swap_activate, 5200 .swap_deactivate = cifs_swap_deactivate, 5201 }; 5202 5203 /* 5204 * cifs_readahead requires the server to support a buffer large enough to 5205 * contain the header plus one complete page of data. Otherwise, we need 5206 * to leave cifs_readahead out of the address space operations. 5207 */ 5208 const struct address_space_operations cifs_addr_ops_smallbuf = { 5209 .read_folio = cifs_read_folio, 5210 .writepages = cifs_writepages, 5211 .write_begin = cifs_write_begin, 5212 .write_end = cifs_write_end, 5213 .dirty_folio = cifs_dirty_folio, 5214 .release_folio = cifs_release_folio, 5215 .invalidate_folio = cifs_invalidate_folio, 5216 .launder_folio = cifs_launder_folio, 5217 .migrate_folio = filemap_migrate_folio, 5218 }; 5219