1 // SPDX-License-Identifier: LGPL-2.1 2 /* 3 * 4 * vfs operations that deal with files 5 * 6 * Copyright (C) International Business Machines Corp., 2002,2010 7 * Author(s): Steve French (sfrench@us.ibm.com) 8 * Jeremy Allison (jra@samba.org) 9 * 10 */ 11 #include <linux/fs.h> 12 #include <linux/filelock.h> 13 #include <linux/backing-dev.h> 14 #include <linux/stat.h> 15 #include <linux/fcntl.h> 16 #include <linux/pagemap.h> 17 #include <linux/pagevec.h> 18 #include <linux/writeback.h> 19 #include <linux/task_io_accounting_ops.h> 20 #include <linux/delay.h> 21 #include <linux/mount.h> 22 #include <linux/slab.h> 23 #include <linux/swap.h> 24 #include <linux/mm.h> 25 #include <asm/div64.h> 26 #include "cifsfs.h" 27 #include "cifspdu.h" 28 #include "cifsglob.h" 29 #include "cifsproto.h" 30 #include "smb2proto.h" 31 #include "cifs_unicode.h" 32 #include "cifs_debug.h" 33 #include "cifs_fs_sb.h" 34 #include "fscache.h" 35 #include "smbdirect.h" 36 #include "fs_context.h" 37 #include "cifs_ioctl.h" 38 #include "cached_dir.h" 39 40 /* 41 * Remove the dirty flags from a span of pages. 42 */ 43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len) 44 { 45 struct address_space *mapping = inode->i_mapping; 46 struct folio *folio; 47 pgoff_t end; 48 49 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 50 51 rcu_read_lock(); 52 53 end = (start + len - 1) / PAGE_SIZE; 54 xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) { 55 if (xas_retry(&xas, folio)) 56 continue; 57 xas_pause(&xas); 58 rcu_read_unlock(); 59 folio_lock(folio); 60 folio_clear_dirty_for_io(folio); 61 folio_unlock(folio); 62 rcu_read_lock(); 63 } 64 65 rcu_read_unlock(); 66 } 67 68 /* 69 * Completion of write to server. 70 */ 71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len) 72 { 73 struct address_space *mapping = inode->i_mapping; 74 struct folio *folio; 75 pgoff_t end; 76 77 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 78 79 if (!len) 80 return; 81 82 rcu_read_lock(); 83 84 end = (start + len - 1) / PAGE_SIZE; 85 xas_for_each(&xas, folio, end) { 86 if (xas_retry(&xas, folio)) 87 continue; 88 if (!folio_test_writeback(folio)) { 89 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 90 len, start, folio->index, end); 91 continue; 92 } 93 94 folio_detach_private(folio); 95 folio_end_writeback(folio); 96 } 97 98 rcu_read_unlock(); 99 } 100 101 /* 102 * Failure of write to server. 103 */ 104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len) 105 { 106 struct address_space *mapping = inode->i_mapping; 107 struct folio *folio; 108 pgoff_t end; 109 110 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 111 112 if (!len) 113 return; 114 115 rcu_read_lock(); 116 117 end = (start + len - 1) / PAGE_SIZE; 118 xas_for_each(&xas, folio, end) { 119 if (xas_retry(&xas, folio)) 120 continue; 121 if (!folio_test_writeback(folio)) { 122 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 123 len, start, folio->index, end); 124 continue; 125 } 126 127 folio_set_error(folio); 128 folio_end_writeback(folio); 129 } 130 131 rcu_read_unlock(); 132 } 133 134 /* 135 * Redirty pages after a temporary failure. 136 */ 137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len) 138 { 139 struct address_space *mapping = inode->i_mapping; 140 struct folio *folio; 141 pgoff_t end; 142 143 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 144 145 if (!len) 146 return; 147 148 rcu_read_lock(); 149 150 end = (start + len - 1) / PAGE_SIZE; 151 xas_for_each(&xas, folio, end) { 152 if (!folio_test_writeback(folio)) { 153 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 154 len, start, folio->index, end); 155 continue; 156 } 157 158 filemap_dirty_folio(folio->mapping, folio); 159 folio_end_writeback(folio); 160 } 161 162 rcu_read_unlock(); 163 } 164 165 /* 166 * Mark as invalid, all open files on tree connections since they 167 * were closed when session to server was lost. 168 */ 169 void 170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon) 171 { 172 struct cifsFileInfo *open_file = NULL; 173 struct list_head *tmp; 174 struct list_head *tmp1; 175 176 /* only send once per connect */ 177 spin_lock(&tcon->tc_lock); 178 if (tcon->need_reconnect) 179 tcon->status = TID_NEED_RECON; 180 181 if (tcon->status != TID_NEED_RECON) { 182 spin_unlock(&tcon->tc_lock); 183 return; 184 } 185 tcon->status = TID_IN_FILES_INVALIDATE; 186 spin_unlock(&tcon->tc_lock); 187 188 /* list all files open on tree connection and mark them invalid */ 189 spin_lock(&tcon->open_file_lock); 190 list_for_each_safe(tmp, tmp1, &tcon->openFileList) { 191 open_file = list_entry(tmp, struct cifsFileInfo, tlist); 192 open_file->invalidHandle = true; 193 open_file->oplock_break_cancelled = true; 194 } 195 spin_unlock(&tcon->open_file_lock); 196 197 invalidate_all_cached_dirs(tcon); 198 spin_lock(&tcon->tc_lock); 199 if (tcon->status == TID_IN_FILES_INVALIDATE) 200 tcon->status = TID_NEED_TCON; 201 spin_unlock(&tcon->tc_lock); 202 203 /* 204 * BB Add call to invalidate_inodes(sb) for all superblocks mounted 205 * to this tcon. 206 */ 207 } 208 209 static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache) 210 { 211 if ((flags & O_ACCMODE) == O_RDONLY) 212 return GENERIC_READ; 213 else if ((flags & O_ACCMODE) == O_WRONLY) 214 return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE; 215 else if ((flags & O_ACCMODE) == O_RDWR) { 216 /* GENERIC_ALL is too much permission to request 217 can cause unnecessary access denied on create */ 218 /* return GENERIC_ALL; */ 219 return (GENERIC_READ | GENERIC_WRITE); 220 } 221 222 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES | 223 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA | 224 FILE_READ_DATA); 225 } 226 227 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 228 static u32 cifs_posix_convert_flags(unsigned int flags) 229 { 230 u32 posix_flags = 0; 231 232 if ((flags & O_ACCMODE) == O_RDONLY) 233 posix_flags = SMB_O_RDONLY; 234 else if ((flags & O_ACCMODE) == O_WRONLY) 235 posix_flags = SMB_O_WRONLY; 236 else if ((flags & O_ACCMODE) == O_RDWR) 237 posix_flags = SMB_O_RDWR; 238 239 if (flags & O_CREAT) { 240 posix_flags |= SMB_O_CREAT; 241 if (flags & O_EXCL) 242 posix_flags |= SMB_O_EXCL; 243 } else if (flags & O_EXCL) 244 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n", 245 current->comm, current->tgid); 246 247 if (flags & O_TRUNC) 248 posix_flags |= SMB_O_TRUNC; 249 /* be safe and imply O_SYNC for O_DSYNC */ 250 if (flags & O_DSYNC) 251 posix_flags |= SMB_O_SYNC; 252 if (flags & O_DIRECTORY) 253 posix_flags |= SMB_O_DIRECTORY; 254 if (flags & O_NOFOLLOW) 255 posix_flags |= SMB_O_NOFOLLOW; 256 if (flags & O_DIRECT) 257 posix_flags |= SMB_O_DIRECT; 258 259 return posix_flags; 260 } 261 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 262 263 static inline int cifs_get_disposition(unsigned int flags) 264 { 265 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) 266 return FILE_CREATE; 267 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) 268 return FILE_OVERWRITE_IF; 269 else if ((flags & O_CREAT) == O_CREAT) 270 return FILE_OPEN_IF; 271 else if ((flags & O_TRUNC) == O_TRUNC) 272 return FILE_OVERWRITE; 273 else 274 return FILE_OPEN; 275 } 276 277 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 278 int cifs_posix_open(const char *full_path, struct inode **pinode, 279 struct super_block *sb, int mode, unsigned int f_flags, 280 __u32 *poplock, __u16 *pnetfid, unsigned int xid) 281 { 282 int rc; 283 FILE_UNIX_BASIC_INFO *presp_data; 284 __u32 posix_flags = 0; 285 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 286 struct cifs_fattr fattr; 287 struct tcon_link *tlink; 288 struct cifs_tcon *tcon; 289 290 cifs_dbg(FYI, "posix open %s\n", full_path); 291 292 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); 293 if (presp_data == NULL) 294 return -ENOMEM; 295 296 tlink = cifs_sb_tlink(cifs_sb); 297 if (IS_ERR(tlink)) { 298 rc = PTR_ERR(tlink); 299 goto posix_open_ret; 300 } 301 302 tcon = tlink_tcon(tlink); 303 mode &= ~current_umask(); 304 305 posix_flags = cifs_posix_convert_flags(f_flags); 306 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data, 307 poplock, full_path, cifs_sb->local_nls, 308 cifs_remap(cifs_sb)); 309 cifs_put_tlink(tlink); 310 311 if (rc) 312 goto posix_open_ret; 313 314 if (presp_data->Type == cpu_to_le32(-1)) 315 goto posix_open_ret; /* open ok, caller does qpathinfo */ 316 317 if (!pinode) 318 goto posix_open_ret; /* caller does not need info */ 319 320 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb); 321 322 /* get new inode and set it up */ 323 if (*pinode == NULL) { 324 cifs_fill_uniqueid(sb, &fattr); 325 *pinode = cifs_iget(sb, &fattr); 326 if (!*pinode) { 327 rc = -ENOMEM; 328 goto posix_open_ret; 329 } 330 } else { 331 cifs_revalidate_mapping(*pinode); 332 rc = cifs_fattr_to_inode(*pinode, &fattr, false); 333 } 334 335 posix_open_ret: 336 kfree(presp_data); 337 return rc; 338 } 339 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 340 341 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, 342 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock, 343 struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf) 344 { 345 int rc; 346 int desired_access; 347 int disposition; 348 int create_options = CREATE_NOT_DIR; 349 struct TCP_Server_Info *server = tcon->ses->server; 350 struct cifs_open_parms oparms; 351 int rdwr_for_fscache = 0; 352 353 if (!server->ops->open) 354 return -ENOSYS; 355 356 /* If we're caching, we need to be able to fill in around partial writes. */ 357 if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY) 358 rdwr_for_fscache = 1; 359 360 desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache); 361 362 /********************************************************************* 363 * open flag mapping table: 364 * 365 * POSIX Flag CIFS Disposition 366 * ---------- ---------------- 367 * O_CREAT FILE_OPEN_IF 368 * O_CREAT | O_EXCL FILE_CREATE 369 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF 370 * O_TRUNC FILE_OVERWRITE 371 * none of the above FILE_OPEN 372 * 373 * Note that there is not a direct match between disposition 374 * FILE_SUPERSEDE (ie create whether or not file exists although 375 * O_CREAT | O_TRUNC is similar but truncates the existing 376 * file rather than creating a new file as FILE_SUPERSEDE does 377 * (which uses the attributes / metadata passed in on open call) 378 *? 379 *? O_SYNC is a reasonable match to CIFS writethrough flag 380 *? and the read write flags match reasonably. O_LARGEFILE 381 *? is irrelevant because largefile support is always used 382 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY, 383 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation 384 *********************************************************************/ 385 386 disposition = cifs_get_disposition(f_flags); 387 388 /* BB pass O_SYNC flag through on file attributes .. BB */ 389 390 /* O_SYNC also has bit for O_DSYNC so following check picks up either */ 391 if (f_flags & O_SYNC) 392 create_options |= CREATE_WRITE_THROUGH; 393 394 if (f_flags & O_DIRECT) 395 create_options |= CREATE_NO_BUFFER; 396 397 retry_open: 398 oparms = (struct cifs_open_parms) { 399 .tcon = tcon, 400 .cifs_sb = cifs_sb, 401 .desired_access = desired_access, 402 .create_options = cifs_create_options(cifs_sb, create_options), 403 .disposition = disposition, 404 .path = full_path, 405 .fid = fid, 406 }; 407 408 rc = server->ops->open(xid, &oparms, oplock, buf); 409 if (rc) { 410 if (rc == -EACCES && rdwr_for_fscache == 1) { 411 desired_access = cifs_convert_flags(f_flags, 0); 412 rdwr_for_fscache = 2; 413 goto retry_open; 414 } 415 return rc; 416 } 417 if (rdwr_for_fscache == 2) 418 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); 419 420 /* TODO: Add support for calling posix query info but with passing in fid */ 421 if (tcon->unix_ext) 422 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb, 423 xid); 424 else 425 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, 426 xid, fid); 427 428 if (rc) { 429 server->ops->close(xid, tcon, fid); 430 if (rc == -ESTALE) 431 rc = -EOPENSTALE; 432 } 433 434 return rc; 435 } 436 437 static bool 438 cifs_has_mand_locks(struct cifsInodeInfo *cinode) 439 { 440 struct cifs_fid_locks *cur; 441 bool has_locks = false; 442 443 down_read(&cinode->lock_sem); 444 list_for_each_entry(cur, &cinode->llist, llist) { 445 if (!list_empty(&cur->locks)) { 446 has_locks = true; 447 break; 448 } 449 } 450 up_read(&cinode->lock_sem); 451 return has_locks; 452 } 453 454 void 455 cifs_down_write(struct rw_semaphore *sem) 456 { 457 while (!down_write_trylock(sem)) 458 msleep(10); 459 } 460 461 static void cifsFileInfo_put_work(struct work_struct *work); 462 void serverclose_work(struct work_struct *work); 463 464 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, 465 struct tcon_link *tlink, __u32 oplock, 466 const char *symlink_target) 467 { 468 struct dentry *dentry = file_dentry(file); 469 struct inode *inode = d_inode(dentry); 470 struct cifsInodeInfo *cinode = CIFS_I(inode); 471 struct cifsFileInfo *cfile; 472 struct cifs_fid_locks *fdlocks; 473 struct cifs_tcon *tcon = tlink_tcon(tlink); 474 struct TCP_Server_Info *server = tcon->ses->server; 475 476 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); 477 if (cfile == NULL) 478 return cfile; 479 480 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL); 481 if (!fdlocks) { 482 kfree(cfile); 483 return NULL; 484 } 485 486 if (symlink_target) { 487 cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL); 488 if (!cfile->symlink_target) { 489 kfree(fdlocks); 490 kfree(cfile); 491 return NULL; 492 } 493 } 494 495 INIT_LIST_HEAD(&fdlocks->locks); 496 fdlocks->cfile = cfile; 497 cfile->llist = fdlocks; 498 499 cfile->count = 1; 500 cfile->pid = current->tgid; 501 cfile->uid = current_fsuid(); 502 cfile->dentry = dget(dentry); 503 cfile->f_flags = file->f_flags; 504 cfile->invalidHandle = false; 505 cfile->deferred_close_scheduled = false; 506 cfile->tlink = cifs_get_tlink(tlink); 507 INIT_WORK(&cfile->oplock_break, cifs_oplock_break); 508 INIT_WORK(&cfile->put, cifsFileInfo_put_work); 509 INIT_WORK(&cfile->serverclose, serverclose_work); 510 INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close); 511 mutex_init(&cfile->fh_mutex); 512 spin_lock_init(&cfile->file_info_lock); 513 514 cifs_sb_active(inode->i_sb); 515 516 /* 517 * If the server returned a read oplock and we have mandatory brlocks, 518 * set oplock level to None. 519 */ 520 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) { 521 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); 522 oplock = 0; 523 } 524 525 cifs_down_write(&cinode->lock_sem); 526 list_add(&fdlocks->llist, &cinode->llist); 527 up_write(&cinode->lock_sem); 528 529 spin_lock(&tcon->open_file_lock); 530 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock) 531 oplock = fid->pending_open->oplock; 532 list_del(&fid->pending_open->olist); 533 534 fid->purge_cache = false; 535 server->ops->set_fid(cfile, fid, oplock); 536 537 list_add(&cfile->tlist, &tcon->openFileList); 538 atomic_inc(&tcon->num_local_opens); 539 540 /* if readable file instance put first in list*/ 541 spin_lock(&cinode->open_file_lock); 542 if (file->f_mode & FMODE_READ) 543 list_add(&cfile->flist, &cinode->openFileList); 544 else 545 list_add_tail(&cfile->flist, &cinode->openFileList); 546 spin_unlock(&cinode->open_file_lock); 547 spin_unlock(&tcon->open_file_lock); 548 549 if (fid->purge_cache) 550 cifs_zap_mapping(inode); 551 552 file->private_data = cfile; 553 return cfile; 554 } 555 556 struct cifsFileInfo * 557 cifsFileInfo_get(struct cifsFileInfo *cifs_file) 558 { 559 spin_lock(&cifs_file->file_info_lock); 560 cifsFileInfo_get_locked(cifs_file); 561 spin_unlock(&cifs_file->file_info_lock); 562 return cifs_file; 563 } 564 565 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file) 566 { 567 struct inode *inode = d_inode(cifs_file->dentry); 568 struct cifsInodeInfo *cifsi = CIFS_I(inode); 569 struct cifsLockInfo *li, *tmp; 570 struct super_block *sb = inode->i_sb; 571 572 /* 573 * Delete any outstanding lock records. We'll lose them when the file 574 * is closed anyway. 575 */ 576 cifs_down_write(&cifsi->lock_sem); 577 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) { 578 list_del(&li->llist); 579 cifs_del_lock_waiters(li); 580 kfree(li); 581 } 582 list_del(&cifs_file->llist->llist); 583 kfree(cifs_file->llist); 584 up_write(&cifsi->lock_sem); 585 586 cifs_put_tlink(cifs_file->tlink); 587 dput(cifs_file->dentry); 588 cifs_sb_deactive(sb); 589 kfree(cifs_file->symlink_target); 590 kfree(cifs_file); 591 } 592 593 static void cifsFileInfo_put_work(struct work_struct *work) 594 { 595 struct cifsFileInfo *cifs_file = container_of(work, 596 struct cifsFileInfo, put); 597 598 cifsFileInfo_put_final(cifs_file); 599 } 600 601 void serverclose_work(struct work_struct *work) 602 { 603 struct cifsFileInfo *cifs_file = container_of(work, 604 struct cifsFileInfo, serverclose); 605 606 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); 607 608 struct TCP_Server_Info *server = tcon->ses->server; 609 int rc = 0; 610 int retries = 0; 611 int MAX_RETRIES = 4; 612 613 do { 614 if (server->ops->close_getattr) 615 rc = server->ops->close_getattr(0, tcon, cifs_file); 616 else if (server->ops->close) 617 rc = server->ops->close(0, tcon, &cifs_file->fid); 618 619 if (rc == -EBUSY || rc == -EAGAIN) { 620 retries++; 621 msleep(250); 622 } 623 } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES) 624 ); 625 626 if (retries == MAX_RETRIES) 627 pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES); 628 629 if (cifs_file->offload) 630 queue_work(fileinfo_put_wq, &cifs_file->put); 631 else 632 cifsFileInfo_put_final(cifs_file); 633 } 634 635 /** 636 * cifsFileInfo_put - release a reference of file priv data 637 * 638 * Always potentially wait for oplock handler. See _cifsFileInfo_put(). 639 * 640 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file 641 */ 642 void cifsFileInfo_put(struct cifsFileInfo *cifs_file) 643 { 644 _cifsFileInfo_put(cifs_file, true, true); 645 } 646 647 /** 648 * _cifsFileInfo_put - release a reference of file priv data 649 * 650 * This may involve closing the filehandle @cifs_file out on the 651 * server. Must be called without holding tcon->open_file_lock, 652 * cinode->open_file_lock and cifs_file->file_info_lock. 653 * 654 * If @wait_for_oplock_handler is true and we are releasing the last 655 * reference, wait for any running oplock break handler of the file 656 * and cancel any pending one. 657 * 658 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file 659 * @wait_oplock_handler: must be false if called from oplock_break_handler 660 * @offload: not offloaded on close and oplock breaks 661 * 662 */ 663 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, 664 bool wait_oplock_handler, bool offload) 665 { 666 struct inode *inode = d_inode(cifs_file->dentry); 667 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); 668 struct TCP_Server_Info *server = tcon->ses->server; 669 struct cifsInodeInfo *cifsi = CIFS_I(inode); 670 struct super_block *sb = inode->i_sb; 671 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 672 struct cifs_fid fid = {}; 673 struct cifs_pending_open open; 674 bool oplock_break_cancelled; 675 bool serverclose_offloaded = false; 676 677 spin_lock(&tcon->open_file_lock); 678 spin_lock(&cifsi->open_file_lock); 679 spin_lock(&cifs_file->file_info_lock); 680 681 cifs_file->offload = offload; 682 if (--cifs_file->count > 0) { 683 spin_unlock(&cifs_file->file_info_lock); 684 spin_unlock(&cifsi->open_file_lock); 685 spin_unlock(&tcon->open_file_lock); 686 return; 687 } 688 spin_unlock(&cifs_file->file_info_lock); 689 690 if (server->ops->get_lease_key) 691 server->ops->get_lease_key(inode, &fid); 692 693 /* store open in pending opens to make sure we don't miss lease break */ 694 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open); 695 696 /* remove it from the lists */ 697 list_del(&cifs_file->flist); 698 list_del(&cifs_file->tlist); 699 atomic_dec(&tcon->num_local_opens); 700 701 if (list_empty(&cifsi->openFileList)) { 702 cifs_dbg(FYI, "closing last open instance for inode %p\n", 703 d_inode(cifs_file->dentry)); 704 /* 705 * In strict cache mode we need invalidate mapping on the last 706 * close because it may cause a error when we open this file 707 * again and get at least level II oplock. 708 */ 709 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) 710 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags); 711 cifs_set_oplock_level(cifsi, 0); 712 } 713 714 spin_unlock(&cifsi->open_file_lock); 715 spin_unlock(&tcon->open_file_lock); 716 717 oplock_break_cancelled = wait_oplock_handler ? 718 cancel_work_sync(&cifs_file->oplock_break) : false; 719 720 if (!tcon->need_reconnect && !cifs_file->invalidHandle) { 721 struct TCP_Server_Info *server = tcon->ses->server; 722 unsigned int xid; 723 int rc = 0; 724 725 xid = get_xid(); 726 if (server->ops->close_getattr) 727 rc = server->ops->close_getattr(xid, tcon, cifs_file); 728 else if (server->ops->close) 729 rc = server->ops->close(xid, tcon, &cifs_file->fid); 730 _free_xid(xid); 731 732 if (rc == -EBUSY || rc == -EAGAIN) { 733 // Server close failed, hence offloading it as an async op 734 queue_work(serverclose_wq, &cifs_file->serverclose); 735 serverclose_offloaded = true; 736 } 737 } 738 739 if (oplock_break_cancelled) 740 cifs_done_oplock_break(cifsi); 741 742 cifs_del_pending_open(&open); 743 744 // if serverclose has been offloaded to wq (on failure), it will 745 // handle offloading put as well. If serverclose not offloaded, 746 // we need to handle offloading put here. 747 if (!serverclose_offloaded) { 748 if (offload) 749 queue_work(fileinfo_put_wq, &cifs_file->put); 750 else 751 cifsFileInfo_put_final(cifs_file); 752 } 753 } 754 755 int cifs_open(struct inode *inode, struct file *file) 756 757 { 758 int rc = -EACCES; 759 unsigned int xid; 760 __u32 oplock; 761 struct cifs_sb_info *cifs_sb; 762 struct TCP_Server_Info *server; 763 struct cifs_tcon *tcon; 764 struct tcon_link *tlink; 765 struct cifsFileInfo *cfile = NULL; 766 void *page; 767 const char *full_path; 768 bool posix_open_ok = false; 769 struct cifs_fid fid = {}; 770 struct cifs_pending_open open; 771 struct cifs_open_info_data data = {}; 772 773 xid = get_xid(); 774 775 cifs_sb = CIFS_SB(inode->i_sb); 776 if (unlikely(cifs_forced_shutdown(cifs_sb))) { 777 free_xid(xid); 778 return -EIO; 779 } 780 781 tlink = cifs_sb_tlink(cifs_sb); 782 if (IS_ERR(tlink)) { 783 free_xid(xid); 784 return PTR_ERR(tlink); 785 } 786 tcon = tlink_tcon(tlink); 787 server = tcon->ses->server; 788 789 page = alloc_dentry_path(); 790 full_path = build_path_from_dentry(file_dentry(file), page); 791 if (IS_ERR(full_path)) { 792 rc = PTR_ERR(full_path); 793 goto out; 794 } 795 796 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n", 797 inode, file->f_flags, full_path); 798 799 if (file->f_flags & O_DIRECT && 800 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { 801 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) 802 file->f_op = &cifs_file_direct_nobrl_ops; 803 else 804 file->f_op = &cifs_file_direct_ops; 805 } 806 807 /* Get the cached handle as SMB2 close is deferred */ 808 rc = cifs_get_readable_path(tcon, full_path, &cfile); 809 if (rc == 0) { 810 if (file->f_flags == cfile->f_flags) { 811 file->private_data = cfile; 812 spin_lock(&CIFS_I(inode)->deferred_lock); 813 cifs_del_deferred_close(cfile); 814 spin_unlock(&CIFS_I(inode)->deferred_lock); 815 goto use_cache; 816 } else { 817 _cifsFileInfo_put(cfile, true, false); 818 } 819 } 820 821 if (server->oplocks) 822 oplock = REQ_OPLOCK; 823 else 824 oplock = 0; 825 826 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 827 if (!tcon->broken_posix_open && tcon->unix_ext && 828 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & 829 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 830 /* can not refresh inode info since size could be stale */ 831 rc = cifs_posix_open(full_path, &inode, inode->i_sb, 832 cifs_sb->ctx->file_mode /* ignored */, 833 file->f_flags, &oplock, &fid.netfid, xid); 834 if (rc == 0) { 835 cifs_dbg(FYI, "posix open succeeded\n"); 836 posix_open_ok = true; 837 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 838 if (tcon->ses->serverNOS) 839 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n", 840 tcon->ses->ip_addr, 841 tcon->ses->serverNOS); 842 tcon->broken_posix_open = true; 843 } else if ((rc != -EIO) && (rc != -EREMOTE) && 844 (rc != -EOPNOTSUPP)) /* path not found or net err */ 845 goto out; 846 /* 847 * Else fallthrough to retry open the old way on network i/o 848 * or DFS errors. 849 */ 850 } 851 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 852 853 if (server->ops->get_lease_key) 854 server->ops->get_lease_key(inode, &fid); 855 856 cifs_add_pending_open(&fid, tlink, &open); 857 858 if (!posix_open_ok) { 859 if (server->ops->get_lease_key) 860 server->ops->get_lease_key(inode, &fid); 861 862 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid, 863 xid, &data); 864 if (rc) { 865 cifs_del_pending_open(&open); 866 goto out; 867 } 868 } 869 870 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target); 871 if (cfile == NULL) { 872 if (server->ops->close) 873 server->ops->close(xid, tcon, &fid); 874 cifs_del_pending_open(&open); 875 rc = -ENOMEM; 876 goto out; 877 } 878 879 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 880 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) { 881 /* 882 * Time to set mode which we can not set earlier due to 883 * problems creating new read-only files. 884 */ 885 struct cifs_unix_set_info_args args = { 886 .mode = inode->i_mode, 887 .uid = INVALID_UID, /* no change */ 888 .gid = INVALID_GID, /* no change */ 889 .ctime = NO_CHANGE_64, 890 .atime = NO_CHANGE_64, 891 .mtime = NO_CHANGE_64, 892 .device = 0, 893 }; 894 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid, 895 cfile->pid); 896 } 897 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 898 899 use_cache: 900 fscache_use_cookie(cifs_inode_cookie(file_inode(file)), 901 file->f_mode & FMODE_WRITE); 902 if (!(file->f_flags & O_DIRECT)) 903 goto out; 904 if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY) 905 goto out; 906 cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE); 907 908 out: 909 free_dentry_path(page); 910 free_xid(xid); 911 cifs_put_tlink(tlink); 912 cifs_free_open_info(&data); 913 return rc; 914 } 915 916 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 917 static int cifs_push_posix_locks(struct cifsFileInfo *cfile); 918 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 919 920 /* 921 * Try to reacquire byte range locks that were released when session 922 * to server was lost. 923 */ 924 static int 925 cifs_relock_file(struct cifsFileInfo *cfile) 926 { 927 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 928 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 929 int rc = 0; 930 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 931 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 932 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 933 934 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING); 935 if (cinode->can_cache_brlcks) { 936 /* can cache locks - no need to relock */ 937 up_read(&cinode->lock_sem); 938 return rc; 939 } 940 941 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 942 if (cap_unix(tcon->ses) && 943 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 944 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 945 rc = cifs_push_posix_locks(cfile); 946 else 947 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 948 rc = tcon->ses->server->ops->push_mand_locks(cfile); 949 950 up_read(&cinode->lock_sem); 951 return rc; 952 } 953 954 static int 955 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) 956 { 957 int rc = -EACCES; 958 unsigned int xid; 959 __u32 oplock; 960 struct cifs_sb_info *cifs_sb; 961 struct cifs_tcon *tcon; 962 struct TCP_Server_Info *server; 963 struct cifsInodeInfo *cinode; 964 struct inode *inode; 965 void *page; 966 const char *full_path; 967 int desired_access; 968 int disposition = FILE_OPEN; 969 int create_options = CREATE_NOT_DIR; 970 struct cifs_open_parms oparms; 971 int rdwr_for_fscache = 0; 972 973 xid = get_xid(); 974 mutex_lock(&cfile->fh_mutex); 975 if (!cfile->invalidHandle) { 976 mutex_unlock(&cfile->fh_mutex); 977 free_xid(xid); 978 return 0; 979 } 980 981 inode = d_inode(cfile->dentry); 982 cifs_sb = CIFS_SB(inode->i_sb); 983 tcon = tlink_tcon(cfile->tlink); 984 server = tcon->ses->server; 985 986 /* 987 * Can not grab rename sem here because various ops, including those 988 * that already have the rename sem can end up causing writepage to get 989 * called and if the server was down that means we end up here, and we 990 * can never tell if the caller already has the rename_sem. 991 */ 992 page = alloc_dentry_path(); 993 full_path = build_path_from_dentry(cfile->dentry, page); 994 if (IS_ERR(full_path)) { 995 mutex_unlock(&cfile->fh_mutex); 996 free_dentry_path(page); 997 free_xid(xid); 998 return PTR_ERR(full_path); 999 } 1000 1001 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n", 1002 inode, cfile->f_flags, full_path); 1003 1004 if (tcon->ses->server->oplocks) 1005 oplock = REQ_OPLOCK; 1006 else 1007 oplock = 0; 1008 1009 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1010 if (tcon->unix_ext && cap_unix(tcon->ses) && 1011 (CIFS_UNIX_POSIX_PATH_OPS_CAP & 1012 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 1013 /* 1014 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the 1015 * original open. Must mask them off for a reopen. 1016 */ 1017 unsigned int oflags = cfile->f_flags & 1018 ~(O_CREAT | O_EXCL | O_TRUNC); 1019 1020 rc = cifs_posix_open(full_path, NULL, inode->i_sb, 1021 cifs_sb->ctx->file_mode /* ignored */, 1022 oflags, &oplock, &cfile->fid.netfid, xid); 1023 if (rc == 0) { 1024 cifs_dbg(FYI, "posix reopen succeeded\n"); 1025 oparms.reconnect = true; 1026 goto reopen_success; 1027 } 1028 /* 1029 * fallthrough to retry open the old way on errors, especially 1030 * in the reconnect path it is important to retry hard 1031 */ 1032 } 1033 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1034 1035 /* If we're caching, we need to be able to fill in around partial writes. */ 1036 if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY) 1037 rdwr_for_fscache = 1; 1038 1039 desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache); 1040 1041 /* O_SYNC also has bit for O_DSYNC so following check picks up either */ 1042 if (cfile->f_flags & O_SYNC) 1043 create_options |= CREATE_WRITE_THROUGH; 1044 1045 if (cfile->f_flags & O_DIRECT) 1046 create_options |= CREATE_NO_BUFFER; 1047 1048 if (server->ops->get_lease_key) 1049 server->ops->get_lease_key(inode, &cfile->fid); 1050 1051 retry_open: 1052 oparms = (struct cifs_open_parms) { 1053 .tcon = tcon, 1054 .cifs_sb = cifs_sb, 1055 .desired_access = desired_access, 1056 .create_options = cifs_create_options(cifs_sb, create_options), 1057 .disposition = disposition, 1058 .path = full_path, 1059 .fid = &cfile->fid, 1060 .reconnect = true, 1061 }; 1062 1063 /* 1064 * Can not refresh inode by passing in file_info buf to be returned by 1065 * ops->open and then calling get_inode_info with returned buf since 1066 * file might have write behind data that needs to be flushed and server 1067 * version of file size can be stale. If we knew for sure that inode was 1068 * not dirty locally we could do this. 1069 */ 1070 rc = server->ops->open(xid, &oparms, &oplock, NULL); 1071 if (rc == -ENOENT && oparms.reconnect == false) { 1072 /* durable handle timeout is expired - open the file again */ 1073 rc = server->ops->open(xid, &oparms, &oplock, NULL); 1074 /* indicate that we need to relock the file */ 1075 oparms.reconnect = true; 1076 } 1077 if (rc == -EACCES && rdwr_for_fscache == 1) { 1078 desired_access = cifs_convert_flags(cfile->f_flags, 0); 1079 rdwr_for_fscache = 2; 1080 goto retry_open; 1081 } 1082 1083 if (rc) { 1084 mutex_unlock(&cfile->fh_mutex); 1085 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc); 1086 cifs_dbg(FYI, "oplock: %d\n", oplock); 1087 goto reopen_error_exit; 1088 } 1089 1090 if (rdwr_for_fscache == 2) 1091 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); 1092 1093 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1094 reopen_success: 1095 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1096 cfile->invalidHandle = false; 1097 mutex_unlock(&cfile->fh_mutex); 1098 cinode = CIFS_I(inode); 1099 1100 if (can_flush) { 1101 rc = filemap_write_and_wait(inode->i_mapping); 1102 if (!is_interrupt_error(rc)) 1103 mapping_set_error(inode->i_mapping, rc); 1104 1105 if (tcon->posix_extensions) { 1106 rc = smb311_posix_get_inode_info(&inode, full_path, 1107 NULL, inode->i_sb, xid); 1108 } else if (tcon->unix_ext) { 1109 rc = cifs_get_inode_info_unix(&inode, full_path, 1110 inode->i_sb, xid); 1111 } else { 1112 rc = cifs_get_inode_info(&inode, full_path, NULL, 1113 inode->i_sb, xid, NULL); 1114 } 1115 } 1116 /* 1117 * Else we are writing out data to server already and could deadlock if 1118 * we tried to flush data, and since we do not know if we have data that 1119 * would invalidate the current end of file on the server we can not go 1120 * to the server to get the new inode info. 1121 */ 1122 1123 /* 1124 * If the server returned a read oplock and we have mandatory brlocks, 1125 * set oplock level to None. 1126 */ 1127 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) { 1128 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); 1129 oplock = 0; 1130 } 1131 1132 server->ops->set_fid(cfile, &cfile->fid, oplock); 1133 if (oparms.reconnect) 1134 cifs_relock_file(cfile); 1135 1136 reopen_error_exit: 1137 free_dentry_path(page); 1138 free_xid(xid); 1139 return rc; 1140 } 1141 1142 void smb2_deferred_work_close(struct work_struct *work) 1143 { 1144 struct cifsFileInfo *cfile = container_of(work, 1145 struct cifsFileInfo, deferred.work); 1146 1147 spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); 1148 cifs_del_deferred_close(cfile); 1149 cfile->deferred_close_scheduled = false; 1150 spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); 1151 _cifsFileInfo_put(cfile, true, false); 1152 } 1153 1154 int cifs_close(struct inode *inode, struct file *file) 1155 { 1156 struct cifsFileInfo *cfile; 1157 struct cifsInodeInfo *cinode = CIFS_I(inode); 1158 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1159 struct cifs_deferred_close *dclose; 1160 1161 cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE); 1162 1163 if (file->private_data != NULL) { 1164 cfile = file->private_data; 1165 file->private_data = NULL; 1166 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL); 1167 if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG) 1168 && cinode->lease_granted && 1169 !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) && 1170 dclose) { 1171 if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) { 1172 inode_set_mtime_to_ts(inode, 1173 inode_set_ctime_current(inode)); 1174 } 1175 spin_lock(&cinode->deferred_lock); 1176 cifs_add_deferred_close(cfile, dclose); 1177 if (cfile->deferred_close_scheduled && 1178 delayed_work_pending(&cfile->deferred)) { 1179 /* 1180 * If there is no pending work, mod_delayed_work queues new work. 1181 * So, Increase the ref count to avoid use-after-free. 1182 */ 1183 if (!mod_delayed_work(deferredclose_wq, 1184 &cfile->deferred, cifs_sb->ctx->closetimeo)) 1185 cifsFileInfo_get(cfile); 1186 } else { 1187 /* Deferred close for files */ 1188 queue_delayed_work(deferredclose_wq, 1189 &cfile->deferred, cifs_sb->ctx->closetimeo); 1190 cfile->deferred_close_scheduled = true; 1191 spin_unlock(&cinode->deferred_lock); 1192 return 0; 1193 } 1194 spin_unlock(&cinode->deferred_lock); 1195 _cifsFileInfo_put(cfile, true, false); 1196 } else { 1197 _cifsFileInfo_put(cfile, true, false); 1198 kfree(dclose); 1199 } 1200 } 1201 1202 /* return code from the ->release op is always ignored */ 1203 return 0; 1204 } 1205 1206 void 1207 cifs_reopen_persistent_handles(struct cifs_tcon *tcon) 1208 { 1209 struct cifsFileInfo *open_file, *tmp; 1210 struct list_head tmp_list; 1211 1212 if (!tcon->use_persistent || !tcon->need_reopen_files) 1213 return; 1214 1215 tcon->need_reopen_files = false; 1216 1217 cifs_dbg(FYI, "Reopen persistent handles\n"); 1218 INIT_LIST_HEAD(&tmp_list); 1219 1220 /* list all files open on tree connection, reopen resilient handles */ 1221 spin_lock(&tcon->open_file_lock); 1222 list_for_each_entry(open_file, &tcon->openFileList, tlist) { 1223 if (!open_file->invalidHandle) 1224 continue; 1225 cifsFileInfo_get(open_file); 1226 list_add_tail(&open_file->rlist, &tmp_list); 1227 } 1228 spin_unlock(&tcon->open_file_lock); 1229 1230 list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) { 1231 if (cifs_reopen_file(open_file, false /* do not flush */)) 1232 tcon->need_reopen_files = true; 1233 list_del_init(&open_file->rlist); 1234 cifsFileInfo_put(open_file); 1235 } 1236 } 1237 1238 int cifs_closedir(struct inode *inode, struct file *file) 1239 { 1240 int rc = 0; 1241 unsigned int xid; 1242 struct cifsFileInfo *cfile = file->private_data; 1243 struct cifs_tcon *tcon; 1244 struct TCP_Server_Info *server; 1245 char *buf; 1246 1247 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode); 1248 1249 if (cfile == NULL) 1250 return rc; 1251 1252 xid = get_xid(); 1253 tcon = tlink_tcon(cfile->tlink); 1254 server = tcon->ses->server; 1255 1256 cifs_dbg(FYI, "Freeing private data in close dir\n"); 1257 spin_lock(&cfile->file_info_lock); 1258 if (server->ops->dir_needs_close(cfile)) { 1259 cfile->invalidHandle = true; 1260 spin_unlock(&cfile->file_info_lock); 1261 if (server->ops->close_dir) 1262 rc = server->ops->close_dir(xid, tcon, &cfile->fid); 1263 else 1264 rc = -ENOSYS; 1265 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc); 1266 /* not much we can do if it fails anyway, ignore rc */ 1267 rc = 0; 1268 } else 1269 spin_unlock(&cfile->file_info_lock); 1270 1271 buf = cfile->srch_inf.ntwrk_buf_start; 1272 if (buf) { 1273 cifs_dbg(FYI, "closedir free smb buf in srch struct\n"); 1274 cfile->srch_inf.ntwrk_buf_start = NULL; 1275 if (cfile->srch_inf.smallBuf) 1276 cifs_small_buf_release(buf); 1277 else 1278 cifs_buf_release(buf); 1279 } 1280 1281 cifs_put_tlink(cfile->tlink); 1282 kfree(file->private_data); 1283 file->private_data = NULL; 1284 /* BB can we lock the filestruct while this is going on? */ 1285 free_xid(xid); 1286 return rc; 1287 } 1288 1289 static struct cifsLockInfo * 1290 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags) 1291 { 1292 struct cifsLockInfo *lock = 1293 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL); 1294 if (!lock) 1295 return lock; 1296 lock->offset = offset; 1297 lock->length = length; 1298 lock->type = type; 1299 lock->pid = current->tgid; 1300 lock->flags = flags; 1301 INIT_LIST_HEAD(&lock->blist); 1302 init_waitqueue_head(&lock->block_q); 1303 return lock; 1304 } 1305 1306 void 1307 cifs_del_lock_waiters(struct cifsLockInfo *lock) 1308 { 1309 struct cifsLockInfo *li, *tmp; 1310 list_for_each_entry_safe(li, tmp, &lock->blist, blist) { 1311 list_del_init(&li->blist); 1312 wake_up(&li->block_q); 1313 } 1314 } 1315 1316 #define CIFS_LOCK_OP 0 1317 #define CIFS_READ_OP 1 1318 #define CIFS_WRITE_OP 2 1319 1320 /* @rw_check : 0 - no op, 1 - read, 2 - write */ 1321 static bool 1322 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset, 1323 __u64 length, __u8 type, __u16 flags, 1324 struct cifsFileInfo *cfile, 1325 struct cifsLockInfo **conf_lock, int rw_check) 1326 { 1327 struct cifsLockInfo *li; 1328 struct cifsFileInfo *cur_cfile = fdlocks->cfile; 1329 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 1330 1331 list_for_each_entry(li, &fdlocks->locks, llist) { 1332 if (offset + length <= li->offset || 1333 offset >= li->offset + li->length) 1334 continue; 1335 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid && 1336 server->ops->compare_fids(cfile, cur_cfile)) { 1337 /* shared lock prevents write op through the same fid */ 1338 if (!(li->type & server->vals->shared_lock_type) || 1339 rw_check != CIFS_WRITE_OP) 1340 continue; 1341 } 1342 if ((type & server->vals->shared_lock_type) && 1343 ((server->ops->compare_fids(cfile, cur_cfile) && 1344 current->tgid == li->pid) || type == li->type)) 1345 continue; 1346 if (rw_check == CIFS_LOCK_OP && 1347 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) && 1348 server->ops->compare_fids(cfile, cur_cfile)) 1349 continue; 1350 if (conf_lock) 1351 *conf_lock = li; 1352 return true; 1353 } 1354 return false; 1355 } 1356 1357 bool 1358 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length, 1359 __u8 type, __u16 flags, 1360 struct cifsLockInfo **conf_lock, int rw_check) 1361 { 1362 bool rc = false; 1363 struct cifs_fid_locks *cur; 1364 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1365 1366 list_for_each_entry(cur, &cinode->llist, llist) { 1367 rc = cifs_find_fid_lock_conflict(cur, offset, length, type, 1368 flags, cfile, conf_lock, 1369 rw_check); 1370 if (rc) 1371 break; 1372 } 1373 1374 return rc; 1375 } 1376 1377 /* 1378 * Check if there is another lock that prevents us to set the lock (mandatory 1379 * style). If such a lock exists, update the flock structure with its 1380 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks 1381 * or leave it the same if we can't. Returns 0 if we don't need to request to 1382 * the server or 1 otherwise. 1383 */ 1384 static int 1385 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length, 1386 __u8 type, struct file_lock *flock) 1387 { 1388 int rc = 0; 1389 struct cifsLockInfo *conf_lock; 1390 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1391 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 1392 bool exist; 1393 1394 down_read(&cinode->lock_sem); 1395 1396 exist = cifs_find_lock_conflict(cfile, offset, length, type, 1397 flock->fl_flags, &conf_lock, 1398 CIFS_LOCK_OP); 1399 if (exist) { 1400 flock->fl_start = conf_lock->offset; 1401 flock->fl_end = conf_lock->offset + conf_lock->length - 1; 1402 flock->fl_pid = conf_lock->pid; 1403 if (conf_lock->type & server->vals->shared_lock_type) 1404 flock->fl_type = F_RDLCK; 1405 else 1406 flock->fl_type = F_WRLCK; 1407 } else if (!cinode->can_cache_brlcks) 1408 rc = 1; 1409 else 1410 flock->fl_type = F_UNLCK; 1411 1412 up_read(&cinode->lock_sem); 1413 return rc; 1414 } 1415 1416 static void 1417 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock) 1418 { 1419 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1420 cifs_down_write(&cinode->lock_sem); 1421 list_add_tail(&lock->llist, &cfile->llist->locks); 1422 up_write(&cinode->lock_sem); 1423 } 1424 1425 /* 1426 * Set the byte-range lock (mandatory style). Returns: 1427 * 1) 0, if we set the lock and don't need to request to the server; 1428 * 2) 1, if no locks prevent us but we need to request to the server; 1429 * 3) -EACCES, if there is a lock that prevents us and wait is false. 1430 */ 1431 static int 1432 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock, 1433 bool wait) 1434 { 1435 struct cifsLockInfo *conf_lock; 1436 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1437 bool exist; 1438 int rc = 0; 1439 1440 try_again: 1441 exist = false; 1442 cifs_down_write(&cinode->lock_sem); 1443 1444 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, 1445 lock->type, lock->flags, &conf_lock, 1446 CIFS_LOCK_OP); 1447 if (!exist && cinode->can_cache_brlcks) { 1448 list_add_tail(&lock->llist, &cfile->llist->locks); 1449 up_write(&cinode->lock_sem); 1450 return rc; 1451 } 1452 1453 if (!exist) 1454 rc = 1; 1455 else if (!wait) 1456 rc = -EACCES; 1457 else { 1458 list_add_tail(&lock->blist, &conf_lock->blist); 1459 up_write(&cinode->lock_sem); 1460 rc = wait_event_interruptible(lock->block_q, 1461 (lock->blist.prev == &lock->blist) && 1462 (lock->blist.next == &lock->blist)); 1463 if (!rc) 1464 goto try_again; 1465 cifs_down_write(&cinode->lock_sem); 1466 list_del_init(&lock->blist); 1467 } 1468 1469 up_write(&cinode->lock_sem); 1470 return rc; 1471 } 1472 1473 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1474 /* 1475 * Check if there is another lock that prevents us to set the lock (posix 1476 * style). If such a lock exists, update the flock structure with its 1477 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks 1478 * or leave it the same if we can't. Returns 0 if we don't need to request to 1479 * the server or 1 otherwise. 1480 */ 1481 static int 1482 cifs_posix_lock_test(struct file *file, struct file_lock *flock) 1483 { 1484 int rc = 0; 1485 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); 1486 unsigned char saved_type = flock->fl_type; 1487 1488 if ((flock->fl_flags & FL_POSIX) == 0) 1489 return 1; 1490 1491 down_read(&cinode->lock_sem); 1492 posix_test_lock(file, flock); 1493 1494 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) { 1495 flock->fl_type = saved_type; 1496 rc = 1; 1497 } 1498 1499 up_read(&cinode->lock_sem); 1500 return rc; 1501 } 1502 1503 /* 1504 * Set the byte-range lock (posix style). Returns: 1505 * 1) <0, if the error occurs while setting the lock; 1506 * 2) 0, if we set the lock and don't need to request to the server; 1507 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock; 1508 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server. 1509 */ 1510 static int 1511 cifs_posix_lock_set(struct file *file, struct file_lock *flock) 1512 { 1513 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); 1514 int rc = FILE_LOCK_DEFERRED + 1; 1515 1516 if ((flock->fl_flags & FL_POSIX) == 0) 1517 return rc; 1518 1519 cifs_down_write(&cinode->lock_sem); 1520 if (!cinode->can_cache_brlcks) { 1521 up_write(&cinode->lock_sem); 1522 return rc; 1523 } 1524 1525 rc = posix_lock_file(file, flock, NULL); 1526 up_write(&cinode->lock_sem); 1527 return rc; 1528 } 1529 1530 int 1531 cifs_push_mandatory_locks(struct cifsFileInfo *cfile) 1532 { 1533 unsigned int xid; 1534 int rc = 0, stored_rc; 1535 struct cifsLockInfo *li, *tmp; 1536 struct cifs_tcon *tcon; 1537 unsigned int num, max_num, max_buf; 1538 LOCKING_ANDX_RANGE *buf, *cur; 1539 static const int types[] = { 1540 LOCKING_ANDX_LARGE_FILES, 1541 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES 1542 }; 1543 int i; 1544 1545 xid = get_xid(); 1546 tcon = tlink_tcon(cfile->tlink); 1547 1548 /* 1549 * Accessing maxBuf is racy with cifs_reconnect - need to store value 1550 * and check it before using. 1551 */ 1552 max_buf = tcon->ses->server->maxBuf; 1553 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) { 1554 free_xid(xid); 1555 return -EINVAL; 1556 } 1557 1558 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > 1559 PAGE_SIZE); 1560 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), 1561 PAGE_SIZE); 1562 max_num = (max_buf - sizeof(struct smb_hdr)) / 1563 sizeof(LOCKING_ANDX_RANGE); 1564 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 1565 if (!buf) { 1566 free_xid(xid); 1567 return -ENOMEM; 1568 } 1569 1570 for (i = 0; i < 2; i++) { 1571 cur = buf; 1572 num = 0; 1573 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { 1574 if (li->type != types[i]) 1575 continue; 1576 cur->Pid = cpu_to_le16(li->pid); 1577 cur->LengthLow = cpu_to_le32((u32)li->length); 1578 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); 1579 cur->OffsetLow = cpu_to_le32((u32)li->offset); 1580 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); 1581 if (++num == max_num) { 1582 stored_rc = cifs_lockv(xid, tcon, 1583 cfile->fid.netfid, 1584 (__u8)li->type, 0, num, 1585 buf); 1586 if (stored_rc) 1587 rc = stored_rc; 1588 cur = buf; 1589 num = 0; 1590 } else 1591 cur++; 1592 } 1593 1594 if (num) { 1595 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, 1596 (__u8)types[i], 0, num, buf); 1597 if (stored_rc) 1598 rc = stored_rc; 1599 } 1600 } 1601 1602 kfree(buf); 1603 free_xid(xid); 1604 return rc; 1605 } 1606 1607 static __u32 1608 hash_lockowner(fl_owner_t owner) 1609 { 1610 return cifs_lock_secret ^ hash32_ptr((const void *)owner); 1611 } 1612 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1613 1614 struct lock_to_push { 1615 struct list_head llist; 1616 __u64 offset; 1617 __u64 length; 1618 __u32 pid; 1619 __u16 netfid; 1620 __u8 type; 1621 }; 1622 1623 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1624 static int 1625 cifs_push_posix_locks(struct cifsFileInfo *cfile) 1626 { 1627 struct inode *inode = d_inode(cfile->dentry); 1628 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1629 struct file_lock *flock; 1630 struct file_lock_context *flctx = locks_inode_context(inode); 1631 unsigned int count = 0, i; 1632 int rc = 0, xid, type; 1633 struct list_head locks_to_send, *el; 1634 struct lock_to_push *lck, *tmp; 1635 __u64 length; 1636 1637 xid = get_xid(); 1638 1639 if (!flctx) 1640 goto out; 1641 1642 spin_lock(&flctx->flc_lock); 1643 list_for_each(el, &flctx->flc_posix) { 1644 count++; 1645 } 1646 spin_unlock(&flctx->flc_lock); 1647 1648 INIT_LIST_HEAD(&locks_to_send); 1649 1650 /* 1651 * Allocating count locks is enough because no FL_POSIX locks can be 1652 * added to the list while we are holding cinode->lock_sem that 1653 * protects locking operations of this inode. 1654 */ 1655 for (i = 0; i < count; i++) { 1656 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); 1657 if (!lck) { 1658 rc = -ENOMEM; 1659 goto err_out; 1660 } 1661 list_add_tail(&lck->llist, &locks_to_send); 1662 } 1663 1664 el = locks_to_send.next; 1665 spin_lock(&flctx->flc_lock); 1666 list_for_each_entry(flock, &flctx->flc_posix, fl_list) { 1667 if (el == &locks_to_send) { 1668 /* 1669 * The list ended. We don't have enough allocated 1670 * structures - something is really wrong. 1671 */ 1672 cifs_dbg(VFS, "Can't push all brlocks!\n"); 1673 break; 1674 } 1675 length = cifs_flock_len(flock); 1676 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK) 1677 type = CIFS_RDLCK; 1678 else 1679 type = CIFS_WRLCK; 1680 lck = list_entry(el, struct lock_to_push, llist); 1681 lck->pid = hash_lockowner(flock->fl_owner); 1682 lck->netfid = cfile->fid.netfid; 1683 lck->length = length; 1684 lck->type = type; 1685 lck->offset = flock->fl_start; 1686 } 1687 spin_unlock(&flctx->flc_lock); 1688 1689 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { 1690 int stored_rc; 1691 1692 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid, 1693 lck->offset, lck->length, NULL, 1694 lck->type, 0); 1695 if (stored_rc) 1696 rc = stored_rc; 1697 list_del(&lck->llist); 1698 kfree(lck); 1699 } 1700 1701 out: 1702 free_xid(xid); 1703 return rc; 1704 err_out: 1705 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { 1706 list_del(&lck->llist); 1707 kfree(lck); 1708 } 1709 goto out; 1710 } 1711 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1712 1713 static int 1714 cifs_push_locks(struct cifsFileInfo *cfile) 1715 { 1716 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1717 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1718 int rc = 0; 1719 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1720 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 1721 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1722 1723 /* we are going to update can_cache_brlcks here - need a write access */ 1724 cifs_down_write(&cinode->lock_sem); 1725 if (!cinode->can_cache_brlcks) { 1726 up_write(&cinode->lock_sem); 1727 return rc; 1728 } 1729 1730 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1731 if (cap_unix(tcon->ses) && 1732 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 1733 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 1734 rc = cifs_push_posix_locks(cfile); 1735 else 1736 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1737 rc = tcon->ses->server->ops->push_mand_locks(cfile); 1738 1739 cinode->can_cache_brlcks = false; 1740 up_write(&cinode->lock_sem); 1741 return rc; 1742 } 1743 1744 static void 1745 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock, 1746 bool *wait_flag, struct TCP_Server_Info *server) 1747 { 1748 if (flock->fl_flags & FL_POSIX) 1749 cifs_dbg(FYI, "Posix\n"); 1750 if (flock->fl_flags & FL_FLOCK) 1751 cifs_dbg(FYI, "Flock\n"); 1752 if (flock->fl_flags & FL_SLEEP) { 1753 cifs_dbg(FYI, "Blocking lock\n"); 1754 *wait_flag = true; 1755 } 1756 if (flock->fl_flags & FL_ACCESS) 1757 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n"); 1758 if (flock->fl_flags & FL_LEASE) 1759 cifs_dbg(FYI, "Lease on file - not implemented yet\n"); 1760 if (flock->fl_flags & 1761 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | 1762 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK))) 1763 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags); 1764 1765 *type = server->vals->large_lock_type; 1766 if (flock->fl_type == F_WRLCK) { 1767 cifs_dbg(FYI, "F_WRLCK\n"); 1768 *type |= server->vals->exclusive_lock_type; 1769 *lock = 1; 1770 } else if (flock->fl_type == F_UNLCK) { 1771 cifs_dbg(FYI, "F_UNLCK\n"); 1772 *type |= server->vals->unlock_lock_type; 1773 *unlock = 1; 1774 /* Check if unlock includes more than one lock range */ 1775 } else if (flock->fl_type == F_RDLCK) { 1776 cifs_dbg(FYI, "F_RDLCK\n"); 1777 *type |= server->vals->shared_lock_type; 1778 *lock = 1; 1779 } else if (flock->fl_type == F_EXLCK) { 1780 cifs_dbg(FYI, "F_EXLCK\n"); 1781 *type |= server->vals->exclusive_lock_type; 1782 *lock = 1; 1783 } else if (flock->fl_type == F_SHLCK) { 1784 cifs_dbg(FYI, "F_SHLCK\n"); 1785 *type |= server->vals->shared_lock_type; 1786 *lock = 1; 1787 } else 1788 cifs_dbg(FYI, "Unknown type of lock\n"); 1789 } 1790 1791 static int 1792 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, 1793 bool wait_flag, bool posix_lck, unsigned int xid) 1794 { 1795 int rc = 0; 1796 __u64 length = cifs_flock_len(flock); 1797 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 1798 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1799 struct TCP_Server_Info *server = tcon->ses->server; 1800 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1801 __u16 netfid = cfile->fid.netfid; 1802 1803 if (posix_lck) { 1804 int posix_lock_type; 1805 1806 rc = cifs_posix_lock_test(file, flock); 1807 if (!rc) 1808 return rc; 1809 1810 if (type & server->vals->shared_lock_type) 1811 posix_lock_type = CIFS_RDLCK; 1812 else 1813 posix_lock_type = CIFS_WRLCK; 1814 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1815 hash_lockowner(flock->fl_owner), 1816 flock->fl_start, length, flock, 1817 posix_lock_type, wait_flag); 1818 return rc; 1819 } 1820 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1821 1822 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock); 1823 if (!rc) 1824 return rc; 1825 1826 /* BB we could chain these into one lock request BB */ 1827 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type, 1828 1, 0, false); 1829 if (rc == 0) { 1830 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1831 type, 0, 1, false); 1832 flock->fl_type = F_UNLCK; 1833 if (rc != 0) 1834 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", 1835 rc); 1836 return 0; 1837 } 1838 1839 if (type & server->vals->shared_lock_type) { 1840 flock->fl_type = F_WRLCK; 1841 return 0; 1842 } 1843 1844 type &= ~server->vals->exclusive_lock_type; 1845 1846 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1847 type | server->vals->shared_lock_type, 1848 1, 0, false); 1849 if (rc == 0) { 1850 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1851 type | server->vals->shared_lock_type, 0, 1, false); 1852 flock->fl_type = F_RDLCK; 1853 if (rc != 0) 1854 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", 1855 rc); 1856 } else 1857 flock->fl_type = F_WRLCK; 1858 1859 return 0; 1860 } 1861 1862 void 1863 cifs_move_llist(struct list_head *source, struct list_head *dest) 1864 { 1865 struct list_head *li, *tmp; 1866 list_for_each_safe(li, tmp, source) 1867 list_move(li, dest); 1868 } 1869 1870 void 1871 cifs_free_llist(struct list_head *llist) 1872 { 1873 struct cifsLockInfo *li, *tmp; 1874 list_for_each_entry_safe(li, tmp, llist, llist) { 1875 cifs_del_lock_waiters(li); 1876 list_del(&li->llist); 1877 kfree(li); 1878 } 1879 } 1880 1881 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1882 int 1883 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, 1884 unsigned int xid) 1885 { 1886 int rc = 0, stored_rc; 1887 static const int types[] = { 1888 LOCKING_ANDX_LARGE_FILES, 1889 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES 1890 }; 1891 unsigned int i; 1892 unsigned int max_num, num, max_buf; 1893 LOCKING_ANDX_RANGE *buf, *cur; 1894 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1895 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1896 struct cifsLockInfo *li, *tmp; 1897 __u64 length = cifs_flock_len(flock); 1898 struct list_head tmp_llist; 1899 1900 INIT_LIST_HEAD(&tmp_llist); 1901 1902 /* 1903 * Accessing maxBuf is racy with cifs_reconnect - need to store value 1904 * and check it before using. 1905 */ 1906 max_buf = tcon->ses->server->maxBuf; 1907 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) 1908 return -EINVAL; 1909 1910 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > 1911 PAGE_SIZE); 1912 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), 1913 PAGE_SIZE); 1914 max_num = (max_buf - sizeof(struct smb_hdr)) / 1915 sizeof(LOCKING_ANDX_RANGE); 1916 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 1917 if (!buf) 1918 return -ENOMEM; 1919 1920 cifs_down_write(&cinode->lock_sem); 1921 for (i = 0; i < 2; i++) { 1922 cur = buf; 1923 num = 0; 1924 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { 1925 if (flock->fl_start > li->offset || 1926 (flock->fl_start + length) < 1927 (li->offset + li->length)) 1928 continue; 1929 if (current->tgid != li->pid) 1930 continue; 1931 if (types[i] != li->type) 1932 continue; 1933 if (cinode->can_cache_brlcks) { 1934 /* 1935 * We can cache brlock requests - simply remove 1936 * a lock from the file's list. 1937 */ 1938 list_del(&li->llist); 1939 cifs_del_lock_waiters(li); 1940 kfree(li); 1941 continue; 1942 } 1943 cur->Pid = cpu_to_le16(li->pid); 1944 cur->LengthLow = cpu_to_le32((u32)li->length); 1945 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); 1946 cur->OffsetLow = cpu_to_le32((u32)li->offset); 1947 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); 1948 /* 1949 * We need to save a lock here to let us add it again to 1950 * the file's list if the unlock range request fails on 1951 * the server. 1952 */ 1953 list_move(&li->llist, &tmp_llist); 1954 if (++num == max_num) { 1955 stored_rc = cifs_lockv(xid, tcon, 1956 cfile->fid.netfid, 1957 li->type, num, 0, buf); 1958 if (stored_rc) { 1959 /* 1960 * We failed on the unlock range 1961 * request - add all locks from the tmp 1962 * list to the head of the file's list. 1963 */ 1964 cifs_move_llist(&tmp_llist, 1965 &cfile->llist->locks); 1966 rc = stored_rc; 1967 } else 1968 /* 1969 * The unlock range request succeed - 1970 * free the tmp list. 1971 */ 1972 cifs_free_llist(&tmp_llist); 1973 cur = buf; 1974 num = 0; 1975 } else 1976 cur++; 1977 } 1978 if (num) { 1979 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, 1980 types[i], num, 0, buf); 1981 if (stored_rc) { 1982 cifs_move_llist(&tmp_llist, 1983 &cfile->llist->locks); 1984 rc = stored_rc; 1985 } else 1986 cifs_free_llist(&tmp_llist); 1987 } 1988 } 1989 1990 up_write(&cinode->lock_sem); 1991 kfree(buf); 1992 return rc; 1993 } 1994 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1995 1996 static int 1997 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, 1998 bool wait_flag, bool posix_lck, int lock, int unlock, 1999 unsigned int xid) 2000 { 2001 int rc = 0; 2002 __u64 length = cifs_flock_len(flock); 2003 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 2004 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 2005 struct TCP_Server_Info *server = tcon->ses->server; 2006 struct inode *inode = d_inode(cfile->dentry); 2007 2008 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 2009 if (posix_lck) { 2010 int posix_lock_type; 2011 2012 rc = cifs_posix_lock_set(file, flock); 2013 if (rc <= FILE_LOCK_DEFERRED) 2014 return rc; 2015 2016 if (type & server->vals->shared_lock_type) 2017 posix_lock_type = CIFS_RDLCK; 2018 else 2019 posix_lock_type = CIFS_WRLCK; 2020 2021 if (unlock == 1) 2022 posix_lock_type = CIFS_UNLCK; 2023 2024 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid, 2025 hash_lockowner(flock->fl_owner), 2026 flock->fl_start, length, 2027 NULL, posix_lock_type, wait_flag); 2028 goto out; 2029 } 2030 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 2031 if (lock) { 2032 struct cifsLockInfo *lock; 2033 2034 lock = cifs_lock_init(flock->fl_start, length, type, 2035 flock->fl_flags); 2036 if (!lock) 2037 return -ENOMEM; 2038 2039 rc = cifs_lock_add_if(cfile, lock, wait_flag); 2040 if (rc < 0) { 2041 kfree(lock); 2042 return rc; 2043 } 2044 if (!rc) 2045 goto out; 2046 2047 /* 2048 * Windows 7 server can delay breaking lease from read to None 2049 * if we set a byte-range lock on a file - break it explicitly 2050 * before sending the lock to the server to be sure the next 2051 * read won't conflict with non-overlapted locks due to 2052 * pagereading. 2053 */ 2054 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) && 2055 CIFS_CACHE_READ(CIFS_I(inode))) { 2056 cifs_zap_mapping(inode); 2057 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n", 2058 inode); 2059 CIFS_I(inode)->oplock = 0; 2060 } 2061 2062 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 2063 type, 1, 0, wait_flag); 2064 if (rc) { 2065 kfree(lock); 2066 return rc; 2067 } 2068 2069 cifs_lock_add(cfile, lock); 2070 } else if (unlock) 2071 rc = server->ops->mand_unlock_range(cfile, flock, xid); 2072 2073 out: 2074 if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) { 2075 /* 2076 * If this is a request to remove all locks because we 2077 * are closing the file, it doesn't matter if the 2078 * unlocking failed as both cifs.ko and the SMB server 2079 * remove the lock on file close 2080 */ 2081 if (rc) { 2082 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc); 2083 if (!(flock->fl_flags & FL_CLOSE)) 2084 return rc; 2085 } 2086 rc = locks_lock_file_wait(file, flock); 2087 } 2088 return rc; 2089 } 2090 2091 int cifs_flock(struct file *file, int cmd, struct file_lock *fl) 2092 { 2093 int rc, xid; 2094 int lock = 0, unlock = 0; 2095 bool wait_flag = false; 2096 bool posix_lck = false; 2097 struct cifs_sb_info *cifs_sb; 2098 struct cifs_tcon *tcon; 2099 struct cifsFileInfo *cfile; 2100 __u32 type; 2101 2102 xid = get_xid(); 2103 2104 if (!(fl->fl_flags & FL_FLOCK)) { 2105 rc = -ENOLCK; 2106 free_xid(xid); 2107 return rc; 2108 } 2109 2110 cfile = (struct cifsFileInfo *)file->private_data; 2111 tcon = tlink_tcon(cfile->tlink); 2112 2113 cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag, 2114 tcon->ses->server); 2115 cifs_sb = CIFS_FILE_SB(file); 2116 2117 if (cap_unix(tcon->ses) && 2118 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 2119 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2120 posix_lck = true; 2121 2122 if (!lock && !unlock) { 2123 /* 2124 * if no lock or unlock then nothing to do since we do not 2125 * know what it is 2126 */ 2127 rc = -EOPNOTSUPP; 2128 free_xid(xid); 2129 return rc; 2130 } 2131 2132 rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock, 2133 xid); 2134 free_xid(xid); 2135 return rc; 2136 2137 2138 } 2139 2140 int cifs_lock(struct file *file, int cmd, struct file_lock *flock) 2141 { 2142 int rc, xid; 2143 int lock = 0, unlock = 0; 2144 bool wait_flag = false; 2145 bool posix_lck = false; 2146 struct cifs_sb_info *cifs_sb; 2147 struct cifs_tcon *tcon; 2148 struct cifsFileInfo *cfile; 2149 __u32 type; 2150 2151 rc = -EACCES; 2152 xid = get_xid(); 2153 2154 cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd, 2155 flock->fl_flags, flock->fl_type, (long long)flock->fl_start, 2156 (long long)flock->fl_end); 2157 2158 cfile = (struct cifsFileInfo *)file->private_data; 2159 tcon = tlink_tcon(cfile->tlink); 2160 2161 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag, 2162 tcon->ses->server); 2163 cifs_sb = CIFS_FILE_SB(file); 2164 set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags); 2165 2166 if (cap_unix(tcon->ses) && 2167 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 2168 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2169 posix_lck = true; 2170 /* 2171 * BB add code here to normalize offset and length to account for 2172 * negative length which we can not accept over the wire. 2173 */ 2174 if (IS_GETLK(cmd)) { 2175 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid); 2176 free_xid(xid); 2177 return rc; 2178 } 2179 2180 if (!lock && !unlock) { 2181 /* 2182 * if no lock or unlock then nothing to do since we do not 2183 * know what it is 2184 */ 2185 free_xid(xid); 2186 return -EOPNOTSUPP; 2187 } 2188 2189 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock, 2190 xid); 2191 free_xid(xid); 2192 return rc; 2193 } 2194 2195 /* 2196 * update the file size (if needed) after a write. Should be called with 2197 * the inode->i_lock held 2198 */ 2199 void 2200 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, 2201 unsigned int bytes_written) 2202 { 2203 loff_t end_of_write = offset + bytes_written; 2204 2205 if (end_of_write > cifsi->server_eof) 2206 cifsi->server_eof = end_of_write; 2207 } 2208 2209 static ssize_t 2210 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data, 2211 size_t write_size, loff_t *offset) 2212 { 2213 int rc = 0; 2214 unsigned int bytes_written = 0; 2215 unsigned int total_written; 2216 struct cifs_tcon *tcon; 2217 struct TCP_Server_Info *server; 2218 unsigned int xid; 2219 struct dentry *dentry = open_file->dentry; 2220 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry)); 2221 struct cifs_io_parms io_parms = {0}; 2222 2223 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n", 2224 write_size, *offset, dentry); 2225 2226 tcon = tlink_tcon(open_file->tlink); 2227 server = tcon->ses->server; 2228 2229 if (!server->ops->sync_write) 2230 return -ENOSYS; 2231 2232 xid = get_xid(); 2233 2234 for (total_written = 0; write_size > total_written; 2235 total_written += bytes_written) { 2236 rc = -EAGAIN; 2237 while (rc == -EAGAIN) { 2238 struct kvec iov[2]; 2239 unsigned int len; 2240 2241 if (open_file->invalidHandle) { 2242 /* we could deadlock if we called 2243 filemap_fdatawait from here so tell 2244 reopen_file not to flush data to 2245 server now */ 2246 rc = cifs_reopen_file(open_file, false); 2247 if (rc != 0) 2248 break; 2249 } 2250 2251 len = min(server->ops->wp_retry_size(d_inode(dentry)), 2252 (unsigned int)write_size - total_written); 2253 /* iov[0] is reserved for smb header */ 2254 iov[1].iov_base = (char *)write_data + total_written; 2255 iov[1].iov_len = len; 2256 io_parms.pid = pid; 2257 io_parms.tcon = tcon; 2258 io_parms.offset = *offset; 2259 io_parms.length = len; 2260 rc = server->ops->sync_write(xid, &open_file->fid, 2261 &io_parms, &bytes_written, iov, 1); 2262 } 2263 if (rc || (bytes_written == 0)) { 2264 if (total_written) 2265 break; 2266 else { 2267 free_xid(xid); 2268 return rc; 2269 } 2270 } else { 2271 spin_lock(&d_inode(dentry)->i_lock); 2272 cifs_update_eof(cifsi, *offset, bytes_written); 2273 spin_unlock(&d_inode(dentry)->i_lock); 2274 *offset += bytes_written; 2275 } 2276 } 2277 2278 cifs_stats_bytes_written(tcon, total_written); 2279 2280 if (total_written > 0) { 2281 spin_lock(&d_inode(dentry)->i_lock); 2282 if (*offset > d_inode(dentry)->i_size) { 2283 i_size_write(d_inode(dentry), *offset); 2284 d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9; 2285 } 2286 spin_unlock(&d_inode(dentry)->i_lock); 2287 } 2288 mark_inode_dirty_sync(d_inode(dentry)); 2289 free_xid(xid); 2290 return total_written; 2291 } 2292 2293 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, 2294 bool fsuid_only) 2295 { 2296 struct cifsFileInfo *open_file = NULL; 2297 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); 2298 2299 /* only filter by fsuid on multiuser mounts */ 2300 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 2301 fsuid_only = false; 2302 2303 spin_lock(&cifs_inode->open_file_lock); 2304 /* we could simply get the first_list_entry since write-only entries 2305 are always at the end of the list but since the first entry might 2306 have a close pending, we go through the whole list */ 2307 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 2308 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) 2309 continue; 2310 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) { 2311 if ((!open_file->invalidHandle)) { 2312 /* found a good file */ 2313 /* lock it so it will not be closed on us */ 2314 cifsFileInfo_get(open_file); 2315 spin_unlock(&cifs_inode->open_file_lock); 2316 return open_file; 2317 } /* else might as well continue, and look for 2318 another, or simply have the caller reopen it 2319 again rather than trying to fix this handle */ 2320 } else /* write only file */ 2321 break; /* write only files are last so must be done */ 2322 } 2323 spin_unlock(&cifs_inode->open_file_lock); 2324 return NULL; 2325 } 2326 2327 /* Return -EBADF if no handle is found and general rc otherwise */ 2328 int 2329 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, 2330 struct cifsFileInfo **ret_file) 2331 { 2332 struct cifsFileInfo *open_file, *inv_file = NULL; 2333 struct cifs_sb_info *cifs_sb; 2334 bool any_available = false; 2335 int rc = -EBADF; 2336 unsigned int refind = 0; 2337 bool fsuid_only = flags & FIND_WR_FSUID_ONLY; 2338 bool with_delete = flags & FIND_WR_WITH_DELETE; 2339 *ret_file = NULL; 2340 2341 /* 2342 * Having a null inode here (because mapping->host was set to zero by 2343 * the VFS or MM) should not happen but we had reports of on oops (due 2344 * to it being zero) during stress testcases so we need to check for it 2345 */ 2346 2347 if (cifs_inode == NULL) { 2348 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n"); 2349 dump_stack(); 2350 return rc; 2351 } 2352 2353 cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); 2354 2355 /* only filter by fsuid on multiuser mounts */ 2356 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 2357 fsuid_only = false; 2358 2359 spin_lock(&cifs_inode->open_file_lock); 2360 refind_writable: 2361 if (refind > MAX_REOPEN_ATT) { 2362 spin_unlock(&cifs_inode->open_file_lock); 2363 return rc; 2364 } 2365 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 2366 if (!any_available && open_file->pid != current->tgid) 2367 continue; 2368 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) 2369 continue; 2370 if (with_delete && !(open_file->fid.access & DELETE)) 2371 continue; 2372 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { 2373 if (!open_file->invalidHandle) { 2374 /* found a good writable file */ 2375 cifsFileInfo_get(open_file); 2376 spin_unlock(&cifs_inode->open_file_lock); 2377 *ret_file = open_file; 2378 return 0; 2379 } else { 2380 if (!inv_file) 2381 inv_file = open_file; 2382 } 2383 } 2384 } 2385 /* couldn't find useable FH with same pid, try any available */ 2386 if (!any_available) { 2387 any_available = true; 2388 goto refind_writable; 2389 } 2390 2391 if (inv_file) { 2392 any_available = false; 2393 cifsFileInfo_get(inv_file); 2394 } 2395 2396 spin_unlock(&cifs_inode->open_file_lock); 2397 2398 if (inv_file) { 2399 rc = cifs_reopen_file(inv_file, false); 2400 if (!rc) { 2401 *ret_file = inv_file; 2402 return 0; 2403 } 2404 2405 spin_lock(&cifs_inode->open_file_lock); 2406 list_move_tail(&inv_file->flist, &cifs_inode->openFileList); 2407 spin_unlock(&cifs_inode->open_file_lock); 2408 cifsFileInfo_put(inv_file); 2409 ++refind; 2410 inv_file = NULL; 2411 spin_lock(&cifs_inode->open_file_lock); 2412 goto refind_writable; 2413 } 2414 2415 return rc; 2416 } 2417 2418 struct cifsFileInfo * 2419 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags) 2420 { 2421 struct cifsFileInfo *cfile; 2422 int rc; 2423 2424 rc = cifs_get_writable_file(cifs_inode, flags, &cfile); 2425 if (rc) 2426 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc); 2427 2428 return cfile; 2429 } 2430 2431 int 2432 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, 2433 int flags, 2434 struct cifsFileInfo **ret_file) 2435 { 2436 struct cifsFileInfo *cfile; 2437 void *page = alloc_dentry_path(); 2438 2439 *ret_file = NULL; 2440 2441 spin_lock(&tcon->open_file_lock); 2442 list_for_each_entry(cfile, &tcon->openFileList, tlist) { 2443 struct cifsInodeInfo *cinode; 2444 const char *full_path = build_path_from_dentry(cfile->dentry, page); 2445 if (IS_ERR(full_path)) { 2446 spin_unlock(&tcon->open_file_lock); 2447 free_dentry_path(page); 2448 return PTR_ERR(full_path); 2449 } 2450 if (strcmp(full_path, name)) 2451 continue; 2452 2453 cinode = CIFS_I(d_inode(cfile->dentry)); 2454 spin_unlock(&tcon->open_file_lock); 2455 free_dentry_path(page); 2456 return cifs_get_writable_file(cinode, flags, ret_file); 2457 } 2458 2459 spin_unlock(&tcon->open_file_lock); 2460 free_dentry_path(page); 2461 return -ENOENT; 2462 } 2463 2464 int 2465 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, 2466 struct cifsFileInfo **ret_file) 2467 { 2468 struct cifsFileInfo *cfile; 2469 void *page = alloc_dentry_path(); 2470 2471 *ret_file = NULL; 2472 2473 spin_lock(&tcon->open_file_lock); 2474 list_for_each_entry(cfile, &tcon->openFileList, tlist) { 2475 struct cifsInodeInfo *cinode; 2476 const char *full_path = build_path_from_dentry(cfile->dentry, page); 2477 if (IS_ERR(full_path)) { 2478 spin_unlock(&tcon->open_file_lock); 2479 free_dentry_path(page); 2480 return PTR_ERR(full_path); 2481 } 2482 if (strcmp(full_path, name)) 2483 continue; 2484 2485 cinode = CIFS_I(d_inode(cfile->dentry)); 2486 spin_unlock(&tcon->open_file_lock); 2487 free_dentry_path(page); 2488 *ret_file = find_readable_file(cinode, 0); 2489 return *ret_file ? 0 : -ENOENT; 2490 } 2491 2492 spin_unlock(&tcon->open_file_lock); 2493 free_dentry_path(page); 2494 return -ENOENT; 2495 } 2496 2497 void 2498 cifs_writedata_release(struct kref *refcount) 2499 { 2500 struct cifs_writedata *wdata = container_of(refcount, 2501 struct cifs_writedata, refcount); 2502 #ifdef CONFIG_CIFS_SMB_DIRECT 2503 if (wdata->mr) { 2504 smbd_deregister_mr(wdata->mr); 2505 wdata->mr = NULL; 2506 } 2507 #endif 2508 2509 if (wdata->cfile) 2510 cifsFileInfo_put(wdata->cfile); 2511 2512 kfree(wdata); 2513 } 2514 2515 /* 2516 * Write failed with a retryable error. Resend the write request. It's also 2517 * possible that the page was redirtied so re-clean the page. 2518 */ 2519 static void 2520 cifs_writev_requeue(struct cifs_writedata *wdata) 2521 { 2522 int rc = 0; 2523 struct inode *inode = d_inode(wdata->cfile->dentry); 2524 struct TCP_Server_Info *server; 2525 unsigned int rest_len = wdata->bytes; 2526 loff_t fpos = wdata->offset; 2527 2528 server = tlink_tcon(wdata->cfile->tlink)->ses->server; 2529 do { 2530 struct cifs_writedata *wdata2; 2531 unsigned int wsize, cur_len; 2532 2533 wsize = server->ops->wp_retry_size(inode); 2534 if (wsize < rest_len) { 2535 if (wsize < PAGE_SIZE) { 2536 rc = -EOPNOTSUPP; 2537 break; 2538 } 2539 cur_len = min(round_down(wsize, PAGE_SIZE), rest_len); 2540 } else { 2541 cur_len = rest_len; 2542 } 2543 2544 wdata2 = cifs_writedata_alloc(cifs_writev_complete); 2545 if (!wdata2) { 2546 rc = -ENOMEM; 2547 break; 2548 } 2549 2550 wdata2->sync_mode = wdata->sync_mode; 2551 wdata2->offset = fpos; 2552 wdata2->bytes = cur_len; 2553 wdata2->iter = wdata->iter; 2554 2555 iov_iter_advance(&wdata2->iter, fpos - wdata->offset); 2556 iov_iter_truncate(&wdata2->iter, wdata2->bytes); 2557 2558 if (iov_iter_is_xarray(&wdata2->iter)) 2559 /* Check for pages having been redirtied and clean 2560 * them. We can do this by walking the xarray. If 2561 * it's not an xarray, then it's a DIO and we shouldn't 2562 * be mucking around with the page bits. 2563 */ 2564 cifs_undirty_folios(inode, fpos, cur_len); 2565 2566 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, 2567 &wdata2->cfile); 2568 if (!wdata2->cfile) { 2569 cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n", 2570 rc); 2571 if (!is_retryable_error(rc)) 2572 rc = -EBADF; 2573 } else { 2574 wdata2->pid = wdata2->cfile->pid; 2575 rc = server->ops->async_writev(wdata2, 2576 cifs_writedata_release); 2577 } 2578 2579 kref_put(&wdata2->refcount, cifs_writedata_release); 2580 if (rc) { 2581 if (is_retryable_error(rc)) 2582 continue; 2583 fpos += cur_len; 2584 rest_len -= cur_len; 2585 break; 2586 } 2587 2588 fpos += cur_len; 2589 rest_len -= cur_len; 2590 } while (rest_len > 0); 2591 2592 /* Clean up remaining pages from the original wdata */ 2593 if (iov_iter_is_xarray(&wdata->iter)) 2594 cifs_pages_write_failed(inode, fpos, rest_len); 2595 2596 if (rc != 0 && !is_retryable_error(rc)) 2597 mapping_set_error(inode->i_mapping, rc); 2598 kref_put(&wdata->refcount, cifs_writedata_release); 2599 } 2600 2601 void 2602 cifs_writev_complete(struct work_struct *work) 2603 { 2604 struct cifs_writedata *wdata = container_of(work, 2605 struct cifs_writedata, work); 2606 struct inode *inode = d_inode(wdata->cfile->dentry); 2607 2608 if (wdata->result == 0) { 2609 spin_lock(&inode->i_lock); 2610 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes); 2611 spin_unlock(&inode->i_lock); 2612 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink), 2613 wdata->bytes); 2614 } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN) 2615 return cifs_writev_requeue(wdata); 2616 2617 if (wdata->result == -EAGAIN) 2618 cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes); 2619 else if (wdata->result < 0) 2620 cifs_pages_write_failed(inode, wdata->offset, wdata->bytes); 2621 else 2622 cifs_pages_written_back(inode, wdata->offset, wdata->bytes); 2623 2624 if (wdata->result != -EAGAIN) 2625 mapping_set_error(inode->i_mapping, wdata->result); 2626 kref_put(&wdata->refcount, cifs_writedata_release); 2627 } 2628 2629 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete) 2630 { 2631 struct cifs_writedata *wdata; 2632 2633 wdata = kzalloc(sizeof(*wdata), GFP_NOFS); 2634 if (wdata != NULL) { 2635 kref_init(&wdata->refcount); 2636 INIT_LIST_HEAD(&wdata->list); 2637 init_completion(&wdata->done); 2638 INIT_WORK(&wdata->work, complete); 2639 } 2640 return wdata; 2641 } 2642 2643 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) 2644 { 2645 struct address_space *mapping = page->mapping; 2646 loff_t offset = (loff_t)page->index << PAGE_SHIFT; 2647 char *write_data; 2648 int rc = -EFAULT; 2649 int bytes_written = 0; 2650 struct inode *inode; 2651 struct cifsFileInfo *open_file; 2652 2653 if (!mapping || !mapping->host) 2654 return -EFAULT; 2655 2656 inode = page->mapping->host; 2657 2658 offset += (loff_t)from; 2659 write_data = kmap(page); 2660 write_data += from; 2661 2662 if ((to > PAGE_SIZE) || (from > to)) { 2663 kunmap(page); 2664 return -EIO; 2665 } 2666 2667 /* racing with truncate? */ 2668 if (offset > mapping->host->i_size) { 2669 kunmap(page); 2670 return 0; /* don't care */ 2671 } 2672 2673 /* check to make sure that we are not extending the file */ 2674 if (mapping->host->i_size - offset < (loff_t)to) 2675 to = (unsigned)(mapping->host->i_size - offset); 2676 2677 rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY, 2678 &open_file); 2679 if (!rc) { 2680 bytes_written = cifs_write(open_file, open_file->pid, 2681 write_data, to - from, &offset); 2682 cifsFileInfo_put(open_file); 2683 /* Does mm or vfs already set times? */ 2684 simple_inode_init_ts(inode); 2685 if ((bytes_written > 0) && (offset)) 2686 rc = 0; 2687 else if (bytes_written < 0) 2688 rc = bytes_written; 2689 else 2690 rc = -EFAULT; 2691 } else { 2692 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc); 2693 if (!is_retryable_error(rc)) 2694 rc = -EIO; 2695 } 2696 2697 kunmap(page); 2698 return rc; 2699 } 2700 2701 /* 2702 * Extend the region to be written back to include subsequent contiguously 2703 * dirty pages if possible, but don't sleep while doing so. 2704 */ 2705 static void cifs_extend_writeback(struct address_space *mapping, 2706 struct xa_state *xas, 2707 long *_count, 2708 loff_t start, 2709 int max_pages, 2710 loff_t max_len, 2711 size_t *_len) 2712 { 2713 struct folio_batch batch; 2714 struct folio *folio; 2715 unsigned int nr_pages; 2716 pgoff_t index = (start + *_len) / PAGE_SIZE; 2717 size_t len; 2718 bool stop = true; 2719 unsigned int i; 2720 2721 folio_batch_init(&batch); 2722 2723 do { 2724 /* Firstly, we gather up a batch of contiguous dirty pages 2725 * under the RCU read lock - but we can't clear the dirty flags 2726 * there if any of those pages are mapped. 2727 */ 2728 rcu_read_lock(); 2729 2730 xas_for_each(xas, folio, ULONG_MAX) { 2731 stop = true; 2732 if (xas_retry(xas, folio)) 2733 continue; 2734 if (xa_is_value(folio)) 2735 break; 2736 if (folio->index != index) { 2737 xas_reset(xas); 2738 break; 2739 } 2740 2741 if (!folio_try_get_rcu(folio)) { 2742 xas_reset(xas); 2743 continue; 2744 } 2745 nr_pages = folio_nr_pages(folio); 2746 if (nr_pages > max_pages) { 2747 xas_reset(xas); 2748 break; 2749 } 2750 2751 /* Has the page moved or been split? */ 2752 if (unlikely(folio != xas_reload(xas))) { 2753 folio_put(folio); 2754 xas_reset(xas); 2755 break; 2756 } 2757 2758 if (!folio_trylock(folio)) { 2759 folio_put(folio); 2760 xas_reset(xas); 2761 break; 2762 } 2763 if (!folio_test_dirty(folio) || 2764 folio_test_writeback(folio)) { 2765 folio_unlock(folio); 2766 folio_put(folio); 2767 xas_reset(xas); 2768 break; 2769 } 2770 2771 max_pages -= nr_pages; 2772 len = folio_size(folio); 2773 stop = false; 2774 2775 index += nr_pages; 2776 *_count -= nr_pages; 2777 *_len += len; 2778 if (max_pages <= 0 || *_len >= max_len || *_count <= 0) 2779 stop = true; 2780 2781 if (!folio_batch_add(&batch, folio)) 2782 break; 2783 if (stop) 2784 break; 2785 } 2786 2787 xas_pause(xas); 2788 rcu_read_unlock(); 2789 2790 /* Now, if we obtained any pages, we can shift them to being 2791 * writable and mark them for caching. 2792 */ 2793 if (!folio_batch_count(&batch)) 2794 break; 2795 2796 for (i = 0; i < folio_batch_count(&batch); i++) { 2797 folio = batch.folios[i]; 2798 /* The folio should be locked, dirty and not undergoing 2799 * writeback from the loop above. 2800 */ 2801 if (!folio_clear_dirty_for_io(folio)) 2802 WARN_ON(1); 2803 folio_start_writeback(folio); 2804 folio_unlock(folio); 2805 } 2806 2807 folio_batch_release(&batch); 2808 cond_resched(); 2809 } while (!stop); 2810 } 2811 2812 /* 2813 * Write back the locked page and any subsequent non-locked dirty pages. 2814 */ 2815 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping, 2816 struct writeback_control *wbc, 2817 struct xa_state *xas, 2818 struct folio *folio, 2819 unsigned long long start, 2820 unsigned long long end) 2821 { 2822 struct inode *inode = mapping->host; 2823 struct TCP_Server_Info *server; 2824 struct cifs_writedata *wdata; 2825 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 2826 struct cifs_credits credits_on_stack; 2827 struct cifs_credits *credits = &credits_on_stack; 2828 struct cifsFileInfo *cfile = NULL; 2829 unsigned long long i_size = i_size_read(inode), max_len; 2830 unsigned int xid, wsize; 2831 size_t len = folio_size(folio); 2832 long count = wbc->nr_to_write; 2833 int rc; 2834 2835 /* The folio should be locked, dirty and not undergoing writeback. */ 2836 if (!folio_clear_dirty_for_io(folio)) 2837 WARN_ON_ONCE(1); 2838 folio_start_writeback(folio); 2839 2840 count -= folio_nr_pages(folio); 2841 2842 xid = get_xid(); 2843 server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses); 2844 2845 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile); 2846 if (rc) { 2847 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc); 2848 goto err_xid; 2849 } 2850 2851 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, 2852 &wsize, credits); 2853 if (rc != 0) 2854 goto err_close; 2855 2856 wdata = cifs_writedata_alloc(cifs_writev_complete); 2857 if (!wdata) { 2858 rc = -ENOMEM; 2859 goto err_uncredit; 2860 } 2861 2862 wdata->sync_mode = wbc->sync_mode; 2863 wdata->offset = folio_pos(folio); 2864 wdata->pid = cfile->pid; 2865 wdata->credits = credits_on_stack; 2866 wdata->cfile = cfile; 2867 wdata->server = server; 2868 cfile = NULL; 2869 2870 /* Find all consecutive lockable dirty pages that have contiguous 2871 * written regions, stopping when we find a page that is not 2872 * immediately lockable, is not dirty or is missing, or we reach the 2873 * end of the range. 2874 */ 2875 if (start < i_size) { 2876 /* Trim the write to the EOF; the extra data is ignored. Also 2877 * put an upper limit on the size of a single storedata op. 2878 */ 2879 max_len = wsize; 2880 max_len = min_t(unsigned long long, max_len, end - start + 1); 2881 max_len = min_t(unsigned long long, max_len, i_size - start); 2882 2883 if (len < max_len) { 2884 int max_pages = INT_MAX; 2885 2886 #ifdef CONFIG_CIFS_SMB_DIRECT 2887 if (server->smbd_conn) 2888 max_pages = server->smbd_conn->max_frmr_depth; 2889 #endif 2890 max_pages -= folio_nr_pages(folio); 2891 2892 if (max_pages > 0) 2893 cifs_extend_writeback(mapping, xas, &count, start, 2894 max_pages, max_len, &len); 2895 } 2896 } 2897 len = min_t(unsigned long long, len, i_size - start); 2898 2899 /* We now have a contiguous set of dirty pages, each with writeback 2900 * set; the first page is still locked at this point, but all the rest 2901 * have been unlocked. 2902 */ 2903 folio_unlock(folio); 2904 wdata->bytes = len; 2905 2906 if (start < i_size) { 2907 iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages, 2908 start, len); 2909 2910 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes); 2911 if (rc) 2912 goto err_wdata; 2913 2914 if (wdata->cfile->invalidHandle) 2915 rc = -EAGAIN; 2916 else 2917 rc = wdata->server->ops->async_writev(wdata, 2918 cifs_writedata_release); 2919 if (rc >= 0) { 2920 kref_put(&wdata->refcount, cifs_writedata_release); 2921 goto err_close; 2922 } 2923 } else { 2924 /* The dirty region was entirely beyond the EOF. */ 2925 cifs_pages_written_back(inode, start, len); 2926 rc = 0; 2927 } 2928 2929 err_wdata: 2930 kref_put(&wdata->refcount, cifs_writedata_release); 2931 err_uncredit: 2932 add_credits_and_wake_if(server, credits, 0); 2933 err_close: 2934 if (cfile) 2935 cifsFileInfo_put(cfile); 2936 err_xid: 2937 free_xid(xid); 2938 if (rc == 0) { 2939 wbc->nr_to_write = count; 2940 rc = len; 2941 } else if (is_retryable_error(rc)) { 2942 cifs_pages_write_redirty(inode, start, len); 2943 } else { 2944 cifs_pages_write_failed(inode, start, len); 2945 mapping_set_error(mapping, rc); 2946 } 2947 /* Indication to update ctime and mtime as close is deferred */ 2948 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 2949 return rc; 2950 } 2951 2952 /* 2953 * write a region of pages back to the server 2954 */ 2955 static ssize_t cifs_writepages_begin(struct address_space *mapping, 2956 struct writeback_control *wbc, 2957 struct xa_state *xas, 2958 unsigned long long *_start, 2959 unsigned long long end) 2960 { 2961 struct folio *folio; 2962 unsigned long long start = *_start; 2963 ssize_t ret; 2964 int skips = 0; 2965 2966 search_again: 2967 /* Find the first dirty page. */ 2968 rcu_read_lock(); 2969 2970 for (;;) { 2971 folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY); 2972 if (xas_retry(xas, folio) || xa_is_value(folio)) 2973 continue; 2974 if (!folio) 2975 break; 2976 2977 if (!folio_try_get_rcu(folio)) { 2978 xas_reset(xas); 2979 continue; 2980 } 2981 2982 if (unlikely(folio != xas_reload(xas))) { 2983 folio_put(folio); 2984 xas_reset(xas); 2985 continue; 2986 } 2987 2988 xas_pause(xas); 2989 break; 2990 } 2991 rcu_read_unlock(); 2992 if (!folio) 2993 return 0; 2994 2995 start = folio_pos(folio); /* May regress with THPs */ 2996 2997 /* At this point we hold neither the i_pages lock nor the page lock: 2998 * the page may be truncated or invalidated (changing page->mapping to 2999 * NULL), or even swizzled back from swapper_space to tmpfs file 3000 * mapping 3001 */ 3002 lock_again: 3003 if (wbc->sync_mode != WB_SYNC_NONE) { 3004 ret = folio_lock_killable(folio); 3005 if (ret < 0) 3006 return ret; 3007 } else { 3008 if (!folio_trylock(folio)) 3009 goto search_again; 3010 } 3011 3012 if (folio->mapping != mapping || 3013 !folio_test_dirty(folio)) { 3014 start += folio_size(folio); 3015 folio_unlock(folio); 3016 goto search_again; 3017 } 3018 3019 if (folio_test_writeback(folio) || 3020 folio_test_fscache(folio)) { 3021 folio_unlock(folio); 3022 if (wbc->sync_mode != WB_SYNC_NONE) { 3023 folio_wait_writeback(folio); 3024 #ifdef CONFIG_CIFS_FSCACHE 3025 folio_wait_fscache(folio); 3026 #endif 3027 goto lock_again; 3028 } 3029 3030 start += folio_size(folio); 3031 if (wbc->sync_mode == WB_SYNC_NONE) { 3032 if (skips >= 5 || need_resched()) { 3033 ret = 0; 3034 goto out; 3035 } 3036 skips++; 3037 } 3038 goto search_again; 3039 } 3040 3041 ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end); 3042 out: 3043 if (ret > 0) 3044 *_start = start + ret; 3045 return ret; 3046 } 3047 3048 /* 3049 * Write a region of pages back to the server 3050 */ 3051 static int cifs_writepages_region(struct address_space *mapping, 3052 struct writeback_control *wbc, 3053 unsigned long long *_start, 3054 unsigned long long end) 3055 { 3056 ssize_t ret; 3057 3058 XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE); 3059 3060 do { 3061 ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end); 3062 if (ret > 0 && wbc->nr_to_write > 0) 3063 cond_resched(); 3064 } while (ret > 0 && wbc->nr_to_write > 0); 3065 3066 return ret > 0 ? 0 : ret; 3067 } 3068 3069 /* 3070 * Write some of the pending data back to the server 3071 */ 3072 static int cifs_writepages(struct address_space *mapping, 3073 struct writeback_control *wbc) 3074 { 3075 loff_t start, end; 3076 int ret; 3077 3078 /* We have to be careful as we can end up racing with setattr() 3079 * truncating the pagecache since the caller doesn't take a lock here 3080 * to prevent it. 3081 */ 3082 3083 if (wbc->range_cyclic && mapping->writeback_index) { 3084 start = mapping->writeback_index * PAGE_SIZE; 3085 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX); 3086 if (ret < 0) 3087 goto out; 3088 3089 if (wbc->nr_to_write <= 0) { 3090 mapping->writeback_index = start / PAGE_SIZE; 3091 goto out; 3092 } 3093 3094 start = 0; 3095 end = mapping->writeback_index * PAGE_SIZE; 3096 mapping->writeback_index = 0; 3097 ret = cifs_writepages_region(mapping, wbc, &start, end); 3098 if (ret == 0) 3099 mapping->writeback_index = start / PAGE_SIZE; 3100 } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { 3101 start = 0; 3102 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX); 3103 if (wbc->nr_to_write > 0 && ret == 0) 3104 mapping->writeback_index = start / PAGE_SIZE; 3105 } else { 3106 start = wbc->range_start; 3107 ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end); 3108 } 3109 3110 out: 3111 return ret; 3112 } 3113 3114 static int 3115 cifs_writepage_locked(struct page *page, struct writeback_control *wbc) 3116 { 3117 int rc; 3118 unsigned int xid; 3119 3120 xid = get_xid(); 3121 /* BB add check for wbc flags */ 3122 get_page(page); 3123 if (!PageUptodate(page)) 3124 cifs_dbg(FYI, "ppw - page not up to date\n"); 3125 3126 /* 3127 * Set the "writeback" flag, and clear "dirty" in the radix tree. 3128 * 3129 * A writepage() implementation always needs to do either this, 3130 * or re-dirty the page with "redirty_page_for_writepage()" in 3131 * the case of a failure. 3132 * 3133 * Just unlocking the page will cause the radix tree tag-bits 3134 * to fail to update with the state of the page correctly. 3135 */ 3136 set_page_writeback(page); 3137 retry_write: 3138 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE); 3139 if (is_retryable_error(rc)) { 3140 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) 3141 goto retry_write; 3142 redirty_page_for_writepage(wbc, page); 3143 } else if (rc != 0) { 3144 SetPageError(page); 3145 mapping_set_error(page->mapping, rc); 3146 } else { 3147 SetPageUptodate(page); 3148 } 3149 end_page_writeback(page); 3150 put_page(page); 3151 free_xid(xid); 3152 return rc; 3153 } 3154 3155 static int cifs_write_end(struct file *file, struct address_space *mapping, 3156 loff_t pos, unsigned len, unsigned copied, 3157 struct page *page, void *fsdata) 3158 { 3159 int rc; 3160 struct inode *inode = mapping->host; 3161 struct cifsFileInfo *cfile = file->private_data; 3162 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 3163 struct folio *folio = page_folio(page); 3164 __u32 pid; 3165 3166 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3167 pid = cfile->pid; 3168 else 3169 pid = current->tgid; 3170 3171 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n", 3172 page, pos, copied); 3173 3174 if (folio_test_checked(folio)) { 3175 if (copied == len) 3176 folio_mark_uptodate(folio); 3177 folio_clear_checked(folio); 3178 } else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE) 3179 folio_mark_uptodate(folio); 3180 3181 if (!folio_test_uptodate(folio)) { 3182 char *page_data; 3183 unsigned offset = pos & (PAGE_SIZE - 1); 3184 unsigned int xid; 3185 3186 xid = get_xid(); 3187 /* this is probably better than directly calling 3188 partialpage_write since in this function the file handle is 3189 known which we might as well leverage */ 3190 /* BB check if anything else missing out of ppw 3191 such as updating last write time */ 3192 page_data = kmap(page); 3193 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos); 3194 /* if (rc < 0) should we set writebehind rc? */ 3195 kunmap(page); 3196 3197 free_xid(xid); 3198 } else { 3199 rc = copied; 3200 pos += copied; 3201 set_page_dirty(page); 3202 } 3203 3204 if (rc > 0) { 3205 spin_lock(&inode->i_lock); 3206 if (pos > inode->i_size) { 3207 i_size_write(inode, pos); 3208 inode->i_blocks = (512 - 1 + pos) >> 9; 3209 } 3210 spin_unlock(&inode->i_lock); 3211 } 3212 3213 unlock_page(page); 3214 put_page(page); 3215 /* Indication to update ctime and mtime as close is deferred */ 3216 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 3217 3218 return rc; 3219 } 3220 3221 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, 3222 int datasync) 3223 { 3224 unsigned int xid; 3225 int rc = 0; 3226 struct cifs_tcon *tcon; 3227 struct TCP_Server_Info *server; 3228 struct cifsFileInfo *smbfile = file->private_data; 3229 struct inode *inode = file_inode(file); 3230 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 3231 3232 rc = file_write_and_wait_range(file, start, end); 3233 if (rc) { 3234 trace_cifs_fsync_err(inode->i_ino, rc); 3235 return rc; 3236 } 3237 3238 xid = get_xid(); 3239 3240 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", 3241 file, datasync); 3242 3243 if (!CIFS_CACHE_READ(CIFS_I(inode))) { 3244 rc = cifs_zap_mapping(inode); 3245 if (rc) { 3246 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc); 3247 rc = 0; /* don't care about it in fsync */ 3248 } 3249 } 3250 3251 tcon = tlink_tcon(smbfile->tlink); 3252 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { 3253 server = tcon->ses->server; 3254 if (server->ops->flush == NULL) { 3255 rc = -ENOSYS; 3256 goto strict_fsync_exit; 3257 } 3258 3259 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { 3260 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); 3261 if (smbfile) { 3262 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3263 cifsFileInfo_put(smbfile); 3264 } else 3265 cifs_dbg(FYI, "ignore fsync for file not open for write\n"); 3266 } else 3267 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3268 } 3269 3270 strict_fsync_exit: 3271 free_xid(xid); 3272 return rc; 3273 } 3274 3275 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) 3276 { 3277 unsigned int xid; 3278 int rc = 0; 3279 struct cifs_tcon *tcon; 3280 struct TCP_Server_Info *server; 3281 struct cifsFileInfo *smbfile = file->private_data; 3282 struct inode *inode = file_inode(file); 3283 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); 3284 3285 rc = file_write_and_wait_range(file, start, end); 3286 if (rc) { 3287 trace_cifs_fsync_err(file_inode(file)->i_ino, rc); 3288 return rc; 3289 } 3290 3291 xid = get_xid(); 3292 3293 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", 3294 file, datasync); 3295 3296 tcon = tlink_tcon(smbfile->tlink); 3297 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { 3298 server = tcon->ses->server; 3299 if (server->ops->flush == NULL) { 3300 rc = -ENOSYS; 3301 goto fsync_exit; 3302 } 3303 3304 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { 3305 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); 3306 if (smbfile) { 3307 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3308 cifsFileInfo_put(smbfile); 3309 } else 3310 cifs_dbg(FYI, "ignore fsync for file not open for write\n"); 3311 } else 3312 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3313 } 3314 3315 fsync_exit: 3316 free_xid(xid); 3317 return rc; 3318 } 3319 3320 /* 3321 * As file closes, flush all cached write data for this inode checking 3322 * for write behind errors. 3323 */ 3324 int cifs_flush(struct file *file, fl_owner_t id) 3325 { 3326 struct inode *inode = file_inode(file); 3327 int rc = 0; 3328 3329 if (file->f_mode & FMODE_WRITE) 3330 rc = filemap_write_and_wait(inode->i_mapping); 3331 3332 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc); 3333 if (rc) { 3334 /* get more nuanced writeback errors */ 3335 rc = filemap_check_wb_err(file->f_mapping, 0); 3336 trace_cifs_flush_err(inode->i_ino, rc); 3337 } 3338 return rc; 3339 } 3340 3341 static void 3342 cifs_uncached_writedata_release(struct kref *refcount) 3343 { 3344 struct cifs_writedata *wdata = container_of(refcount, 3345 struct cifs_writedata, refcount); 3346 3347 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release); 3348 cifs_writedata_release(refcount); 3349 } 3350 3351 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx); 3352 3353 static void 3354 cifs_uncached_writev_complete(struct work_struct *work) 3355 { 3356 struct cifs_writedata *wdata = container_of(work, 3357 struct cifs_writedata, work); 3358 struct inode *inode = d_inode(wdata->cfile->dentry); 3359 struct cifsInodeInfo *cifsi = CIFS_I(inode); 3360 3361 spin_lock(&inode->i_lock); 3362 cifs_update_eof(cifsi, wdata->offset, wdata->bytes); 3363 if (cifsi->server_eof > inode->i_size) 3364 i_size_write(inode, cifsi->server_eof); 3365 spin_unlock(&inode->i_lock); 3366 3367 complete(&wdata->done); 3368 collect_uncached_write_data(wdata->ctx); 3369 /* the below call can possibly free the last ref to aio ctx */ 3370 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3371 } 3372 3373 static int 3374 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list, 3375 struct cifs_aio_ctx *ctx) 3376 { 3377 unsigned int wsize; 3378 struct cifs_credits credits; 3379 int rc; 3380 struct TCP_Server_Info *server = wdata->server; 3381 3382 do { 3383 if (wdata->cfile->invalidHandle) { 3384 rc = cifs_reopen_file(wdata->cfile, false); 3385 if (rc == -EAGAIN) 3386 continue; 3387 else if (rc) 3388 break; 3389 } 3390 3391 3392 /* 3393 * Wait for credits to resend this wdata. 3394 * Note: we are attempting to resend the whole wdata not in 3395 * segments 3396 */ 3397 do { 3398 rc = server->ops->wait_mtu_credits(server, wdata->bytes, 3399 &wsize, &credits); 3400 if (rc) 3401 goto fail; 3402 3403 if (wsize < wdata->bytes) { 3404 add_credits_and_wake_if(server, &credits, 0); 3405 msleep(1000); 3406 } 3407 } while (wsize < wdata->bytes); 3408 wdata->credits = credits; 3409 3410 rc = adjust_credits(server, &wdata->credits, wdata->bytes); 3411 3412 if (!rc) { 3413 if (wdata->cfile->invalidHandle) 3414 rc = -EAGAIN; 3415 else { 3416 #ifdef CONFIG_CIFS_SMB_DIRECT 3417 if (wdata->mr) { 3418 wdata->mr->need_invalidate = true; 3419 smbd_deregister_mr(wdata->mr); 3420 wdata->mr = NULL; 3421 } 3422 #endif 3423 rc = server->ops->async_writev(wdata, 3424 cifs_uncached_writedata_release); 3425 } 3426 } 3427 3428 /* If the write was successfully sent, we are done */ 3429 if (!rc) { 3430 list_add_tail(&wdata->list, wdata_list); 3431 return 0; 3432 } 3433 3434 /* Roll back credits and retry if needed */ 3435 add_credits_and_wake_if(server, &wdata->credits, 0); 3436 } while (rc == -EAGAIN); 3437 3438 fail: 3439 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3440 return rc; 3441 } 3442 3443 /* 3444 * Select span of a bvec iterator we're going to use. Limit it by both maximum 3445 * size and maximum number of segments. 3446 */ 3447 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size, 3448 size_t max_segs, unsigned int *_nsegs) 3449 { 3450 const struct bio_vec *bvecs = iter->bvec; 3451 unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0; 3452 size_t len, span = 0, n = iter->count; 3453 size_t skip = iter->iov_offset; 3454 3455 if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0) 3456 return 0; 3457 3458 while (n && ix < nbv && skip) { 3459 len = bvecs[ix].bv_len; 3460 if (skip < len) 3461 break; 3462 skip -= len; 3463 n -= len; 3464 ix++; 3465 } 3466 3467 while (n && ix < nbv) { 3468 len = min3(n, bvecs[ix].bv_len - skip, max_size); 3469 span += len; 3470 max_size -= len; 3471 nsegs++; 3472 ix++; 3473 if (max_size == 0 || nsegs >= max_segs) 3474 break; 3475 skip = 0; 3476 n -= len; 3477 } 3478 3479 *_nsegs = nsegs; 3480 return span; 3481 } 3482 3483 static int 3484 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from, 3485 struct cifsFileInfo *open_file, 3486 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list, 3487 struct cifs_aio_ctx *ctx) 3488 { 3489 int rc = 0; 3490 size_t cur_len, max_len; 3491 struct cifs_writedata *wdata; 3492 pid_t pid; 3493 struct TCP_Server_Info *server; 3494 unsigned int xid, max_segs = INT_MAX; 3495 3496 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3497 pid = open_file->pid; 3498 else 3499 pid = current->tgid; 3500 3501 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 3502 xid = get_xid(); 3503 3504 #ifdef CONFIG_CIFS_SMB_DIRECT 3505 if (server->smbd_conn) 3506 max_segs = server->smbd_conn->max_frmr_depth; 3507 #endif 3508 3509 do { 3510 struct cifs_credits credits_on_stack; 3511 struct cifs_credits *credits = &credits_on_stack; 3512 unsigned int wsize, nsegs = 0; 3513 3514 if (signal_pending(current)) { 3515 rc = -EINTR; 3516 break; 3517 } 3518 3519 if (open_file->invalidHandle) { 3520 rc = cifs_reopen_file(open_file, false); 3521 if (rc == -EAGAIN) 3522 continue; 3523 else if (rc) 3524 break; 3525 } 3526 3527 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, 3528 &wsize, credits); 3529 if (rc) 3530 break; 3531 3532 max_len = min_t(const size_t, len, wsize); 3533 if (!max_len) { 3534 rc = -EAGAIN; 3535 add_credits_and_wake_if(server, credits, 0); 3536 break; 3537 } 3538 3539 cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs); 3540 cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n", 3541 cur_len, max_len, nsegs, from->nr_segs, max_segs); 3542 if (cur_len == 0) { 3543 rc = -EIO; 3544 add_credits_and_wake_if(server, credits, 0); 3545 break; 3546 } 3547 3548 wdata = cifs_writedata_alloc(cifs_uncached_writev_complete); 3549 if (!wdata) { 3550 rc = -ENOMEM; 3551 add_credits_and_wake_if(server, credits, 0); 3552 break; 3553 } 3554 3555 wdata->sync_mode = WB_SYNC_ALL; 3556 wdata->offset = (__u64)fpos; 3557 wdata->cfile = cifsFileInfo_get(open_file); 3558 wdata->server = server; 3559 wdata->pid = pid; 3560 wdata->bytes = cur_len; 3561 wdata->credits = credits_on_stack; 3562 wdata->iter = *from; 3563 wdata->ctx = ctx; 3564 kref_get(&ctx->refcount); 3565 3566 iov_iter_truncate(&wdata->iter, cur_len); 3567 3568 rc = adjust_credits(server, &wdata->credits, wdata->bytes); 3569 3570 if (!rc) { 3571 if (wdata->cfile->invalidHandle) 3572 rc = -EAGAIN; 3573 else 3574 rc = server->ops->async_writev(wdata, 3575 cifs_uncached_writedata_release); 3576 } 3577 3578 if (rc) { 3579 add_credits_and_wake_if(server, &wdata->credits, 0); 3580 kref_put(&wdata->refcount, 3581 cifs_uncached_writedata_release); 3582 if (rc == -EAGAIN) 3583 continue; 3584 break; 3585 } 3586 3587 list_add_tail(&wdata->list, wdata_list); 3588 iov_iter_advance(from, cur_len); 3589 fpos += cur_len; 3590 len -= cur_len; 3591 } while (len > 0); 3592 3593 free_xid(xid); 3594 return rc; 3595 } 3596 3597 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx) 3598 { 3599 struct cifs_writedata *wdata, *tmp; 3600 struct cifs_tcon *tcon; 3601 struct cifs_sb_info *cifs_sb; 3602 struct dentry *dentry = ctx->cfile->dentry; 3603 ssize_t rc; 3604 3605 tcon = tlink_tcon(ctx->cfile->tlink); 3606 cifs_sb = CIFS_SB(dentry->d_sb); 3607 3608 mutex_lock(&ctx->aio_mutex); 3609 3610 if (list_empty(&ctx->list)) { 3611 mutex_unlock(&ctx->aio_mutex); 3612 return; 3613 } 3614 3615 rc = ctx->rc; 3616 /* 3617 * Wait for and collect replies for any successful sends in order of 3618 * increasing offset. Once an error is hit, then return without waiting 3619 * for any more replies. 3620 */ 3621 restart_loop: 3622 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) { 3623 if (!rc) { 3624 if (!try_wait_for_completion(&wdata->done)) { 3625 mutex_unlock(&ctx->aio_mutex); 3626 return; 3627 } 3628 3629 if (wdata->result) 3630 rc = wdata->result; 3631 else 3632 ctx->total_len += wdata->bytes; 3633 3634 /* resend call if it's a retryable error */ 3635 if (rc == -EAGAIN) { 3636 struct list_head tmp_list; 3637 struct iov_iter tmp_from = ctx->iter; 3638 3639 INIT_LIST_HEAD(&tmp_list); 3640 list_del_init(&wdata->list); 3641 3642 if (ctx->direct_io) 3643 rc = cifs_resend_wdata( 3644 wdata, &tmp_list, ctx); 3645 else { 3646 iov_iter_advance(&tmp_from, 3647 wdata->offset - ctx->pos); 3648 3649 rc = cifs_write_from_iter(wdata->offset, 3650 wdata->bytes, &tmp_from, 3651 ctx->cfile, cifs_sb, &tmp_list, 3652 ctx); 3653 3654 kref_put(&wdata->refcount, 3655 cifs_uncached_writedata_release); 3656 } 3657 3658 list_splice(&tmp_list, &ctx->list); 3659 goto restart_loop; 3660 } 3661 } 3662 list_del_init(&wdata->list); 3663 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3664 } 3665 3666 cifs_stats_bytes_written(tcon, ctx->total_len); 3667 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags); 3668 3669 ctx->rc = (rc == 0) ? ctx->total_len : rc; 3670 3671 mutex_unlock(&ctx->aio_mutex); 3672 3673 if (ctx->iocb && ctx->iocb->ki_complete) 3674 ctx->iocb->ki_complete(ctx->iocb, ctx->rc); 3675 else 3676 complete(&ctx->done); 3677 } 3678 3679 static ssize_t __cifs_writev( 3680 struct kiocb *iocb, struct iov_iter *from, bool direct) 3681 { 3682 struct file *file = iocb->ki_filp; 3683 ssize_t total_written = 0; 3684 struct cifsFileInfo *cfile; 3685 struct cifs_tcon *tcon; 3686 struct cifs_sb_info *cifs_sb; 3687 struct cifs_aio_ctx *ctx; 3688 int rc; 3689 3690 rc = generic_write_checks(iocb, from); 3691 if (rc <= 0) 3692 return rc; 3693 3694 cifs_sb = CIFS_FILE_SB(file); 3695 cfile = file->private_data; 3696 tcon = tlink_tcon(cfile->tlink); 3697 3698 if (!tcon->ses->server->ops->async_writev) 3699 return -ENOSYS; 3700 3701 ctx = cifs_aio_ctx_alloc(); 3702 if (!ctx) 3703 return -ENOMEM; 3704 3705 ctx->cfile = cifsFileInfo_get(cfile); 3706 3707 if (!is_sync_kiocb(iocb)) 3708 ctx->iocb = iocb; 3709 3710 ctx->pos = iocb->ki_pos; 3711 ctx->direct_io = direct; 3712 ctx->nr_pinned_pages = 0; 3713 3714 if (user_backed_iter(from)) { 3715 /* 3716 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as 3717 * they contain references to the calling process's virtual 3718 * memory layout which won't be available in an async worker 3719 * thread. This also takes a pin on every folio involved. 3720 */ 3721 rc = netfs_extract_user_iter(from, iov_iter_count(from), 3722 &ctx->iter, 0); 3723 if (rc < 0) { 3724 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3725 return rc; 3726 } 3727 3728 ctx->nr_pinned_pages = rc; 3729 ctx->bv = (void *)ctx->iter.bvec; 3730 ctx->bv_need_unpin = iov_iter_extract_will_pin(from); 3731 } else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) && 3732 !is_sync_kiocb(iocb)) { 3733 /* 3734 * If the op is asynchronous, we need to copy the list attached 3735 * to a BVEC/KVEC-type iterator, but we assume that the storage 3736 * will be pinned by the caller; in any case, we may or may not 3737 * be able to pin the pages, so we don't try. 3738 */ 3739 ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL); 3740 if (!ctx->bv) { 3741 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3742 return -ENOMEM; 3743 } 3744 } else { 3745 /* 3746 * Otherwise, we just pass the iterator down as-is and rely on 3747 * the caller to make sure the pages referred to by the 3748 * iterator don't evaporate. 3749 */ 3750 ctx->iter = *from; 3751 } 3752 3753 ctx->len = iov_iter_count(&ctx->iter); 3754 3755 /* grab a lock here due to read response handlers can access ctx */ 3756 mutex_lock(&ctx->aio_mutex); 3757 3758 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter, 3759 cfile, cifs_sb, &ctx->list, ctx); 3760 3761 /* 3762 * If at least one write was successfully sent, then discard any rc 3763 * value from the later writes. If the other write succeeds, then 3764 * we'll end up returning whatever was written. If it fails, then 3765 * we'll get a new rc value from that. 3766 */ 3767 if (!list_empty(&ctx->list)) 3768 rc = 0; 3769 3770 mutex_unlock(&ctx->aio_mutex); 3771 3772 if (rc) { 3773 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3774 return rc; 3775 } 3776 3777 if (!is_sync_kiocb(iocb)) { 3778 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3779 return -EIOCBQUEUED; 3780 } 3781 3782 rc = wait_for_completion_killable(&ctx->done); 3783 if (rc) { 3784 mutex_lock(&ctx->aio_mutex); 3785 ctx->rc = rc = -EINTR; 3786 total_written = ctx->total_len; 3787 mutex_unlock(&ctx->aio_mutex); 3788 } else { 3789 rc = ctx->rc; 3790 total_written = ctx->total_len; 3791 } 3792 3793 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3794 3795 if (unlikely(!total_written)) 3796 return rc; 3797 3798 iocb->ki_pos += total_written; 3799 return total_written; 3800 } 3801 3802 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from) 3803 { 3804 struct file *file = iocb->ki_filp; 3805 3806 cifs_revalidate_mapping(file->f_inode); 3807 return __cifs_writev(iocb, from, true); 3808 } 3809 3810 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) 3811 { 3812 return __cifs_writev(iocb, from, false); 3813 } 3814 3815 static ssize_t 3816 cifs_writev(struct kiocb *iocb, struct iov_iter *from) 3817 { 3818 struct file *file = iocb->ki_filp; 3819 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 3820 struct inode *inode = file->f_mapping->host; 3821 struct cifsInodeInfo *cinode = CIFS_I(inode); 3822 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 3823 ssize_t rc; 3824 3825 inode_lock(inode); 3826 /* 3827 * We need to hold the sem to be sure nobody modifies lock list 3828 * with a brlock that prevents writing. 3829 */ 3830 down_read(&cinode->lock_sem); 3831 3832 rc = generic_write_checks(iocb, from); 3833 if (rc <= 0) 3834 goto out; 3835 3836 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), 3837 server->vals->exclusive_lock_type, 0, 3838 NULL, CIFS_WRITE_OP)) 3839 rc = __generic_file_write_iter(iocb, from); 3840 else 3841 rc = -EACCES; 3842 out: 3843 up_read(&cinode->lock_sem); 3844 inode_unlock(inode); 3845 3846 if (rc > 0) 3847 rc = generic_write_sync(iocb, rc); 3848 return rc; 3849 } 3850 3851 ssize_t 3852 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from) 3853 { 3854 struct inode *inode = file_inode(iocb->ki_filp); 3855 struct cifsInodeInfo *cinode = CIFS_I(inode); 3856 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 3857 struct cifsFileInfo *cfile = (struct cifsFileInfo *) 3858 iocb->ki_filp->private_data; 3859 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 3860 ssize_t written; 3861 3862 written = cifs_get_writer(cinode); 3863 if (written) 3864 return written; 3865 3866 if (CIFS_CACHE_WRITE(cinode)) { 3867 if (cap_unix(tcon->ses) && 3868 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) 3869 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { 3870 written = generic_file_write_iter(iocb, from); 3871 goto out; 3872 } 3873 written = cifs_writev(iocb, from); 3874 goto out; 3875 } 3876 /* 3877 * For non-oplocked files in strict cache mode we need to write the data 3878 * to the server exactly from the pos to pos+len-1 rather than flush all 3879 * affected pages because it may cause a error with mandatory locks on 3880 * these pages but not on the region from pos to ppos+len-1. 3881 */ 3882 written = cifs_user_writev(iocb, from); 3883 if (CIFS_CACHE_READ(cinode)) { 3884 /* 3885 * We have read level caching and we have just sent a write 3886 * request to the server thus making data in the cache stale. 3887 * Zap the cache and set oplock/lease level to NONE to avoid 3888 * reading stale data from the cache. All subsequent read 3889 * operations will read new data from the server. 3890 */ 3891 cifs_zap_mapping(inode); 3892 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n", 3893 inode); 3894 cinode->oplock = 0; 3895 } 3896 out: 3897 cifs_put_writer(cinode); 3898 return written; 3899 } 3900 3901 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete) 3902 { 3903 struct cifs_readdata *rdata; 3904 3905 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL); 3906 if (rdata) { 3907 kref_init(&rdata->refcount); 3908 INIT_LIST_HEAD(&rdata->list); 3909 init_completion(&rdata->done); 3910 INIT_WORK(&rdata->work, complete); 3911 } 3912 3913 return rdata; 3914 } 3915 3916 void 3917 cifs_readdata_release(struct kref *refcount) 3918 { 3919 struct cifs_readdata *rdata = container_of(refcount, 3920 struct cifs_readdata, refcount); 3921 3922 if (rdata->ctx) 3923 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release); 3924 #ifdef CONFIG_CIFS_SMB_DIRECT 3925 if (rdata->mr) { 3926 smbd_deregister_mr(rdata->mr); 3927 rdata->mr = NULL; 3928 } 3929 #endif 3930 if (rdata->cfile) 3931 cifsFileInfo_put(rdata->cfile); 3932 3933 kfree(rdata); 3934 } 3935 3936 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx); 3937 3938 static void 3939 cifs_uncached_readv_complete(struct work_struct *work) 3940 { 3941 struct cifs_readdata *rdata = container_of(work, 3942 struct cifs_readdata, work); 3943 3944 complete(&rdata->done); 3945 collect_uncached_read_data(rdata->ctx); 3946 /* the below call can possibly free the last ref to aio ctx */ 3947 kref_put(&rdata->refcount, cifs_readdata_release); 3948 } 3949 3950 static int cifs_resend_rdata(struct cifs_readdata *rdata, 3951 struct list_head *rdata_list, 3952 struct cifs_aio_ctx *ctx) 3953 { 3954 unsigned int rsize; 3955 struct cifs_credits credits; 3956 int rc; 3957 struct TCP_Server_Info *server; 3958 3959 /* XXX: should we pick a new channel here? */ 3960 server = rdata->server; 3961 3962 do { 3963 if (rdata->cfile->invalidHandle) { 3964 rc = cifs_reopen_file(rdata->cfile, true); 3965 if (rc == -EAGAIN) 3966 continue; 3967 else if (rc) 3968 break; 3969 } 3970 3971 /* 3972 * Wait for credits to resend this rdata. 3973 * Note: we are attempting to resend the whole rdata not in 3974 * segments 3975 */ 3976 do { 3977 rc = server->ops->wait_mtu_credits(server, rdata->bytes, 3978 &rsize, &credits); 3979 3980 if (rc) 3981 goto fail; 3982 3983 if (rsize < rdata->bytes) { 3984 add_credits_and_wake_if(server, &credits, 0); 3985 msleep(1000); 3986 } 3987 } while (rsize < rdata->bytes); 3988 rdata->credits = credits; 3989 3990 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 3991 if (!rc) { 3992 if (rdata->cfile->invalidHandle) 3993 rc = -EAGAIN; 3994 else { 3995 #ifdef CONFIG_CIFS_SMB_DIRECT 3996 if (rdata->mr) { 3997 rdata->mr->need_invalidate = true; 3998 smbd_deregister_mr(rdata->mr); 3999 rdata->mr = NULL; 4000 } 4001 #endif 4002 rc = server->ops->async_readv(rdata); 4003 } 4004 } 4005 4006 /* If the read was successfully sent, we are done */ 4007 if (!rc) { 4008 /* Add to aio pending list */ 4009 list_add_tail(&rdata->list, rdata_list); 4010 return 0; 4011 } 4012 4013 /* Roll back credits and retry if needed */ 4014 add_credits_and_wake_if(server, &rdata->credits, 0); 4015 } while (rc == -EAGAIN); 4016 4017 fail: 4018 kref_put(&rdata->refcount, cifs_readdata_release); 4019 return rc; 4020 } 4021 4022 static int 4023 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file, 4024 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list, 4025 struct cifs_aio_ctx *ctx) 4026 { 4027 struct cifs_readdata *rdata; 4028 unsigned int rsize, nsegs, max_segs = INT_MAX; 4029 struct cifs_credits credits_on_stack; 4030 struct cifs_credits *credits = &credits_on_stack; 4031 size_t cur_len, max_len; 4032 int rc; 4033 pid_t pid; 4034 struct TCP_Server_Info *server; 4035 4036 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 4037 4038 #ifdef CONFIG_CIFS_SMB_DIRECT 4039 if (server->smbd_conn) 4040 max_segs = server->smbd_conn->max_frmr_depth; 4041 #endif 4042 4043 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4044 pid = open_file->pid; 4045 else 4046 pid = current->tgid; 4047 4048 do { 4049 if (open_file->invalidHandle) { 4050 rc = cifs_reopen_file(open_file, true); 4051 if (rc == -EAGAIN) 4052 continue; 4053 else if (rc) 4054 break; 4055 } 4056 4057 if (cifs_sb->ctx->rsize == 0) 4058 cifs_sb->ctx->rsize = 4059 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), 4060 cifs_sb->ctx); 4061 4062 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, 4063 &rsize, credits); 4064 if (rc) 4065 break; 4066 4067 max_len = min_t(size_t, len, rsize); 4068 4069 cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len, 4070 max_segs, &nsegs); 4071 cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n", 4072 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs); 4073 if (cur_len == 0) { 4074 rc = -EIO; 4075 add_credits_and_wake_if(server, credits, 0); 4076 break; 4077 } 4078 4079 rdata = cifs_readdata_alloc(cifs_uncached_readv_complete); 4080 if (!rdata) { 4081 add_credits_and_wake_if(server, credits, 0); 4082 rc = -ENOMEM; 4083 break; 4084 } 4085 4086 rdata->server = server; 4087 rdata->cfile = cifsFileInfo_get(open_file); 4088 rdata->offset = fpos; 4089 rdata->bytes = cur_len; 4090 rdata->pid = pid; 4091 rdata->credits = credits_on_stack; 4092 rdata->ctx = ctx; 4093 kref_get(&ctx->refcount); 4094 4095 rdata->iter = ctx->iter; 4096 iov_iter_truncate(&rdata->iter, cur_len); 4097 4098 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 4099 4100 if (!rc) { 4101 if (rdata->cfile->invalidHandle) 4102 rc = -EAGAIN; 4103 else 4104 rc = server->ops->async_readv(rdata); 4105 } 4106 4107 if (rc) { 4108 add_credits_and_wake_if(server, &rdata->credits, 0); 4109 kref_put(&rdata->refcount, cifs_readdata_release); 4110 if (rc == -EAGAIN) 4111 continue; 4112 break; 4113 } 4114 4115 list_add_tail(&rdata->list, rdata_list); 4116 iov_iter_advance(&ctx->iter, cur_len); 4117 fpos += cur_len; 4118 len -= cur_len; 4119 } while (len > 0); 4120 4121 return rc; 4122 } 4123 4124 static void 4125 collect_uncached_read_data(struct cifs_aio_ctx *ctx) 4126 { 4127 struct cifs_readdata *rdata, *tmp; 4128 struct cifs_sb_info *cifs_sb; 4129 int rc; 4130 4131 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb); 4132 4133 mutex_lock(&ctx->aio_mutex); 4134 4135 if (list_empty(&ctx->list)) { 4136 mutex_unlock(&ctx->aio_mutex); 4137 return; 4138 } 4139 4140 rc = ctx->rc; 4141 /* the loop below should proceed in the order of increasing offsets */ 4142 again: 4143 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) { 4144 if (!rc) { 4145 if (!try_wait_for_completion(&rdata->done)) { 4146 mutex_unlock(&ctx->aio_mutex); 4147 return; 4148 } 4149 4150 if (rdata->result == -EAGAIN) { 4151 /* resend call if it's a retryable error */ 4152 struct list_head tmp_list; 4153 unsigned int got_bytes = rdata->got_bytes; 4154 4155 list_del_init(&rdata->list); 4156 INIT_LIST_HEAD(&tmp_list); 4157 4158 if (ctx->direct_io) { 4159 /* 4160 * Re-use rdata as this is a 4161 * direct I/O 4162 */ 4163 rc = cifs_resend_rdata( 4164 rdata, 4165 &tmp_list, ctx); 4166 } else { 4167 rc = cifs_send_async_read( 4168 rdata->offset + got_bytes, 4169 rdata->bytes - got_bytes, 4170 rdata->cfile, cifs_sb, 4171 &tmp_list, ctx); 4172 4173 kref_put(&rdata->refcount, 4174 cifs_readdata_release); 4175 } 4176 4177 list_splice(&tmp_list, &ctx->list); 4178 4179 goto again; 4180 } else if (rdata->result) 4181 rc = rdata->result; 4182 4183 /* if there was a short read -- discard anything left */ 4184 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes) 4185 rc = -ENODATA; 4186 4187 ctx->total_len += rdata->got_bytes; 4188 } 4189 list_del_init(&rdata->list); 4190 kref_put(&rdata->refcount, cifs_readdata_release); 4191 } 4192 4193 /* mask nodata case */ 4194 if (rc == -ENODATA) 4195 rc = 0; 4196 4197 ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc; 4198 4199 mutex_unlock(&ctx->aio_mutex); 4200 4201 if (ctx->iocb && ctx->iocb->ki_complete) 4202 ctx->iocb->ki_complete(ctx->iocb, ctx->rc); 4203 else 4204 complete(&ctx->done); 4205 } 4206 4207 static ssize_t __cifs_readv( 4208 struct kiocb *iocb, struct iov_iter *to, bool direct) 4209 { 4210 size_t len; 4211 struct file *file = iocb->ki_filp; 4212 struct cifs_sb_info *cifs_sb; 4213 struct cifsFileInfo *cfile; 4214 struct cifs_tcon *tcon; 4215 ssize_t rc, total_read = 0; 4216 loff_t offset = iocb->ki_pos; 4217 struct cifs_aio_ctx *ctx; 4218 4219 len = iov_iter_count(to); 4220 if (!len) 4221 return 0; 4222 4223 cifs_sb = CIFS_FILE_SB(file); 4224 cfile = file->private_data; 4225 tcon = tlink_tcon(cfile->tlink); 4226 4227 if (!tcon->ses->server->ops->async_readv) 4228 return -ENOSYS; 4229 4230 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 4231 cifs_dbg(FYI, "attempting read on write only file instance\n"); 4232 4233 ctx = cifs_aio_ctx_alloc(); 4234 if (!ctx) 4235 return -ENOMEM; 4236 4237 ctx->pos = offset; 4238 ctx->direct_io = direct; 4239 ctx->len = len; 4240 ctx->cfile = cifsFileInfo_get(cfile); 4241 ctx->nr_pinned_pages = 0; 4242 4243 if (!is_sync_kiocb(iocb)) 4244 ctx->iocb = iocb; 4245 4246 if (user_backed_iter(to)) { 4247 /* 4248 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as 4249 * they contain references to the calling process's virtual 4250 * memory layout which won't be available in an async worker 4251 * thread. This also takes a pin on every folio involved. 4252 */ 4253 rc = netfs_extract_user_iter(to, iov_iter_count(to), 4254 &ctx->iter, 0); 4255 if (rc < 0) { 4256 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4257 return rc; 4258 } 4259 4260 ctx->nr_pinned_pages = rc; 4261 ctx->bv = (void *)ctx->iter.bvec; 4262 ctx->bv_need_unpin = iov_iter_extract_will_pin(to); 4263 ctx->should_dirty = true; 4264 } else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) && 4265 !is_sync_kiocb(iocb)) { 4266 /* 4267 * If the op is asynchronous, we need to copy the list attached 4268 * to a BVEC/KVEC-type iterator, but we assume that the storage 4269 * will be retained by the caller; in any case, we may or may 4270 * not be able to pin the pages, so we don't try. 4271 */ 4272 ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL); 4273 if (!ctx->bv) { 4274 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4275 return -ENOMEM; 4276 } 4277 } else { 4278 /* 4279 * Otherwise, we just pass the iterator down as-is and rely on 4280 * the caller to make sure the pages referred to by the 4281 * iterator don't evaporate. 4282 */ 4283 ctx->iter = *to; 4284 } 4285 4286 if (direct) { 4287 rc = filemap_write_and_wait_range(file->f_inode->i_mapping, 4288 offset, offset + len - 1); 4289 if (rc) { 4290 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4291 return -EAGAIN; 4292 } 4293 } 4294 4295 /* grab a lock here due to read response handlers can access ctx */ 4296 mutex_lock(&ctx->aio_mutex); 4297 4298 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx); 4299 4300 /* if at least one read request send succeeded, then reset rc */ 4301 if (!list_empty(&ctx->list)) 4302 rc = 0; 4303 4304 mutex_unlock(&ctx->aio_mutex); 4305 4306 if (rc) { 4307 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4308 return rc; 4309 } 4310 4311 if (!is_sync_kiocb(iocb)) { 4312 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4313 return -EIOCBQUEUED; 4314 } 4315 4316 rc = wait_for_completion_killable(&ctx->done); 4317 if (rc) { 4318 mutex_lock(&ctx->aio_mutex); 4319 ctx->rc = rc = -EINTR; 4320 total_read = ctx->total_len; 4321 mutex_unlock(&ctx->aio_mutex); 4322 } else { 4323 rc = ctx->rc; 4324 total_read = ctx->total_len; 4325 } 4326 4327 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4328 4329 if (total_read) { 4330 iocb->ki_pos += total_read; 4331 return total_read; 4332 } 4333 return rc; 4334 } 4335 4336 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to) 4337 { 4338 return __cifs_readv(iocb, to, true); 4339 } 4340 4341 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) 4342 { 4343 return __cifs_readv(iocb, to, false); 4344 } 4345 4346 ssize_t 4347 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) 4348 { 4349 struct inode *inode = file_inode(iocb->ki_filp); 4350 struct cifsInodeInfo *cinode = CIFS_I(inode); 4351 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 4352 struct cifsFileInfo *cfile = (struct cifsFileInfo *) 4353 iocb->ki_filp->private_data; 4354 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 4355 int rc = -EACCES; 4356 4357 /* 4358 * In strict cache mode we need to read from the server all the time 4359 * if we don't have level II oplock because the server can delay mtime 4360 * change - so we can't make a decision about inode invalidating. 4361 * And we can also fail with pagereading if there are mandatory locks 4362 * on pages affected by this read but not on the region from pos to 4363 * pos+len-1. 4364 */ 4365 if (!CIFS_CACHE_READ(cinode)) 4366 return cifs_user_readv(iocb, to); 4367 4368 if (cap_unix(tcon->ses) && 4369 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 4370 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 4371 return generic_file_read_iter(iocb, to); 4372 4373 /* 4374 * We need to hold the sem to be sure nobody modifies lock list 4375 * with a brlock that prevents reading. 4376 */ 4377 down_read(&cinode->lock_sem); 4378 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to), 4379 tcon->ses->server->vals->shared_lock_type, 4380 0, NULL, CIFS_READ_OP)) 4381 rc = generic_file_read_iter(iocb, to); 4382 up_read(&cinode->lock_sem); 4383 return rc; 4384 } 4385 4386 static ssize_t 4387 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) 4388 { 4389 int rc = -EACCES; 4390 unsigned int bytes_read = 0; 4391 unsigned int total_read; 4392 unsigned int current_read_size; 4393 unsigned int rsize; 4394 struct cifs_sb_info *cifs_sb; 4395 struct cifs_tcon *tcon; 4396 struct TCP_Server_Info *server; 4397 unsigned int xid; 4398 char *cur_offset; 4399 struct cifsFileInfo *open_file; 4400 struct cifs_io_parms io_parms = {0}; 4401 int buf_type = CIFS_NO_BUFFER; 4402 __u32 pid; 4403 4404 xid = get_xid(); 4405 cifs_sb = CIFS_FILE_SB(file); 4406 4407 /* FIXME: set up handlers for larger reads and/or convert to async */ 4408 rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize); 4409 4410 if (file->private_data == NULL) { 4411 rc = -EBADF; 4412 free_xid(xid); 4413 return rc; 4414 } 4415 open_file = file->private_data; 4416 tcon = tlink_tcon(open_file->tlink); 4417 server = cifs_pick_channel(tcon->ses); 4418 4419 if (!server->ops->sync_read) { 4420 free_xid(xid); 4421 return -ENOSYS; 4422 } 4423 4424 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4425 pid = open_file->pid; 4426 else 4427 pid = current->tgid; 4428 4429 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 4430 cifs_dbg(FYI, "attempting read on write only file instance\n"); 4431 4432 for (total_read = 0, cur_offset = read_data; read_size > total_read; 4433 total_read += bytes_read, cur_offset += bytes_read) { 4434 do { 4435 current_read_size = min_t(uint, read_size - total_read, 4436 rsize); 4437 /* 4438 * For windows me and 9x we do not want to request more 4439 * than it negotiated since it will refuse the read 4440 * then. 4441 */ 4442 if (!(tcon->ses->capabilities & 4443 tcon->ses->server->vals->cap_large_files)) { 4444 current_read_size = min_t(uint, 4445 current_read_size, CIFSMaxBufSize); 4446 } 4447 if (open_file->invalidHandle) { 4448 rc = cifs_reopen_file(open_file, true); 4449 if (rc != 0) 4450 break; 4451 } 4452 io_parms.pid = pid; 4453 io_parms.tcon = tcon; 4454 io_parms.offset = *offset; 4455 io_parms.length = current_read_size; 4456 io_parms.server = server; 4457 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms, 4458 &bytes_read, &cur_offset, 4459 &buf_type); 4460 } while (rc == -EAGAIN); 4461 4462 if (rc || (bytes_read == 0)) { 4463 if (total_read) { 4464 break; 4465 } else { 4466 free_xid(xid); 4467 return rc; 4468 } 4469 } else { 4470 cifs_stats_bytes_read(tcon, total_read); 4471 *offset += bytes_read; 4472 } 4473 } 4474 free_xid(xid); 4475 return total_read; 4476 } 4477 4478 /* 4479 * If the page is mmap'ed into a process' page tables, then we need to make 4480 * sure that it doesn't change while being written back. 4481 */ 4482 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf) 4483 { 4484 struct folio *folio = page_folio(vmf->page); 4485 4486 /* Wait for the folio to be written to the cache before we allow it to 4487 * be modified. We then assume the entire folio will need writing back. 4488 */ 4489 #ifdef CONFIG_CIFS_FSCACHE 4490 if (folio_test_fscache(folio) && 4491 folio_wait_fscache_killable(folio) < 0) 4492 return VM_FAULT_RETRY; 4493 #endif 4494 4495 folio_wait_writeback(folio); 4496 4497 if (folio_lock_killable(folio) < 0) 4498 return VM_FAULT_RETRY; 4499 return VM_FAULT_LOCKED; 4500 } 4501 4502 static const struct vm_operations_struct cifs_file_vm_ops = { 4503 .fault = filemap_fault, 4504 .map_pages = filemap_map_pages, 4505 .page_mkwrite = cifs_page_mkwrite, 4506 }; 4507 4508 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) 4509 { 4510 int xid, rc = 0; 4511 struct inode *inode = file_inode(file); 4512 4513 xid = get_xid(); 4514 4515 if (!CIFS_CACHE_READ(CIFS_I(inode))) 4516 rc = cifs_zap_mapping(inode); 4517 if (!rc) 4518 rc = generic_file_mmap(file, vma); 4519 if (!rc) 4520 vma->vm_ops = &cifs_file_vm_ops; 4521 4522 free_xid(xid); 4523 return rc; 4524 } 4525 4526 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) 4527 { 4528 int rc, xid; 4529 4530 xid = get_xid(); 4531 4532 rc = cifs_revalidate_file(file); 4533 if (rc) 4534 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n", 4535 rc); 4536 if (!rc) 4537 rc = generic_file_mmap(file, vma); 4538 if (!rc) 4539 vma->vm_ops = &cifs_file_vm_ops; 4540 4541 free_xid(xid); 4542 return rc; 4543 } 4544 4545 /* 4546 * Unlock a bunch of folios in the pagecache. 4547 */ 4548 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last) 4549 { 4550 struct folio *folio; 4551 XA_STATE(xas, &mapping->i_pages, first); 4552 4553 rcu_read_lock(); 4554 xas_for_each(&xas, folio, last) { 4555 folio_unlock(folio); 4556 } 4557 rcu_read_unlock(); 4558 } 4559 4560 static void cifs_readahead_complete(struct work_struct *work) 4561 { 4562 struct cifs_readdata *rdata = container_of(work, 4563 struct cifs_readdata, work); 4564 struct folio *folio; 4565 pgoff_t last; 4566 bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes); 4567 4568 XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE); 4569 4570 if (good) 4571 cifs_readahead_to_fscache(rdata->mapping->host, 4572 rdata->offset, rdata->bytes); 4573 4574 if (iov_iter_count(&rdata->iter) > 0) 4575 iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter); 4576 4577 last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE; 4578 4579 rcu_read_lock(); 4580 xas_for_each(&xas, folio, last) { 4581 if (good) { 4582 flush_dcache_folio(folio); 4583 folio_mark_uptodate(folio); 4584 } 4585 folio_unlock(folio); 4586 } 4587 rcu_read_unlock(); 4588 4589 kref_put(&rdata->refcount, cifs_readdata_release); 4590 } 4591 4592 static void cifs_readahead(struct readahead_control *ractl) 4593 { 4594 struct cifsFileInfo *open_file = ractl->file->private_data; 4595 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file); 4596 struct TCP_Server_Info *server; 4597 unsigned int xid, nr_pages, cache_nr_pages = 0; 4598 unsigned int ra_pages; 4599 pgoff_t next_cached = ULONG_MAX, ra_index; 4600 bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) && 4601 cifs_inode_cookie(ractl->mapping->host)->cache_priv; 4602 bool check_cache = caching; 4603 pid_t pid; 4604 int rc = 0; 4605 4606 /* Note that readahead_count() lags behind our dequeuing of pages from 4607 * the ractl, wo we have to keep track for ourselves. 4608 */ 4609 ra_pages = readahead_count(ractl); 4610 ra_index = readahead_index(ractl); 4611 4612 xid = get_xid(); 4613 4614 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4615 pid = open_file->pid; 4616 else 4617 pid = current->tgid; 4618 4619 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 4620 4621 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n", 4622 __func__, ractl->file, ractl->mapping, ra_pages); 4623 4624 /* 4625 * Chop the readahead request up into rsize-sized read requests. 4626 */ 4627 while ((nr_pages = ra_pages)) { 4628 unsigned int i, rsize; 4629 struct cifs_readdata *rdata; 4630 struct cifs_credits credits_on_stack; 4631 struct cifs_credits *credits = &credits_on_stack; 4632 struct folio *folio; 4633 pgoff_t fsize; 4634 4635 /* 4636 * Find out if we have anything cached in the range of 4637 * interest, and if so, where the next chunk of cached data is. 4638 */ 4639 if (caching) { 4640 if (check_cache) { 4641 rc = cifs_fscache_query_occupancy( 4642 ractl->mapping->host, ra_index, nr_pages, 4643 &next_cached, &cache_nr_pages); 4644 if (rc < 0) 4645 caching = false; 4646 check_cache = false; 4647 } 4648 4649 if (ra_index == next_cached) { 4650 /* 4651 * TODO: Send a whole batch of pages to be read 4652 * by the cache. 4653 */ 4654 folio = readahead_folio(ractl); 4655 fsize = folio_nr_pages(folio); 4656 ra_pages -= fsize; 4657 ra_index += fsize; 4658 if (cifs_readpage_from_fscache(ractl->mapping->host, 4659 &folio->page) < 0) { 4660 /* 4661 * TODO: Deal with cache read failure 4662 * here, but for the moment, delegate 4663 * that to readpage. 4664 */ 4665 caching = false; 4666 } 4667 folio_unlock(folio); 4668 next_cached += fsize; 4669 cache_nr_pages -= fsize; 4670 if (cache_nr_pages == 0) 4671 check_cache = true; 4672 continue; 4673 } 4674 } 4675 4676 if (open_file->invalidHandle) { 4677 rc = cifs_reopen_file(open_file, true); 4678 if (rc) { 4679 if (rc == -EAGAIN) 4680 continue; 4681 break; 4682 } 4683 } 4684 4685 if (cifs_sb->ctx->rsize == 0) 4686 cifs_sb->ctx->rsize = 4687 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), 4688 cifs_sb->ctx); 4689 4690 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, 4691 &rsize, credits); 4692 if (rc) 4693 break; 4694 nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages); 4695 if (next_cached != ULONG_MAX) 4696 nr_pages = min_t(size_t, nr_pages, next_cached - ra_index); 4697 4698 /* 4699 * Give up immediately if rsize is too small to read an entire 4700 * page. The VFS will fall back to readpage. We should never 4701 * reach this point however since we set ra_pages to 0 when the 4702 * rsize is smaller than a cache page. 4703 */ 4704 if (unlikely(!nr_pages)) { 4705 add_credits_and_wake_if(server, credits, 0); 4706 break; 4707 } 4708 4709 rdata = cifs_readdata_alloc(cifs_readahead_complete); 4710 if (!rdata) { 4711 /* best to give up if we're out of mem */ 4712 add_credits_and_wake_if(server, credits, 0); 4713 break; 4714 } 4715 4716 rdata->offset = ra_index * PAGE_SIZE; 4717 rdata->bytes = nr_pages * PAGE_SIZE; 4718 rdata->cfile = cifsFileInfo_get(open_file); 4719 rdata->server = server; 4720 rdata->mapping = ractl->mapping; 4721 rdata->pid = pid; 4722 rdata->credits = credits_on_stack; 4723 4724 for (i = 0; i < nr_pages; i++) { 4725 if (!readahead_folio(ractl)) 4726 WARN_ON(1); 4727 } 4728 ra_pages -= nr_pages; 4729 ra_index += nr_pages; 4730 4731 iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages, 4732 rdata->offset, rdata->bytes); 4733 4734 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 4735 if (!rc) { 4736 if (rdata->cfile->invalidHandle) 4737 rc = -EAGAIN; 4738 else 4739 rc = server->ops->async_readv(rdata); 4740 } 4741 4742 if (rc) { 4743 add_credits_and_wake_if(server, &rdata->credits, 0); 4744 cifs_unlock_folios(rdata->mapping, 4745 rdata->offset / PAGE_SIZE, 4746 (rdata->offset + rdata->bytes - 1) / PAGE_SIZE); 4747 /* Fallback to the readpage in error/reconnect cases */ 4748 kref_put(&rdata->refcount, cifs_readdata_release); 4749 break; 4750 } 4751 4752 kref_put(&rdata->refcount, cifs_readdata_release); 4753 } 4754 4755 free_xid(xid); 4756 } 4757 4758 /* 4759 * cifs_readpage_worker must be called with the page pinned 4760 */ 4761 static int cifs_readpage_worker(struct file *file, struct page *page, 4762 loff_t *poffset) 4763 { 4764 struct inode *inode = file_inode(file); 4765 struct timespec64 atime, mtime; 4766 char *read_data; 4767 int rc; 4768 4769 /* Is the page cached? */ 4770 rc = cifs_readpage_from_fscache(inode, page); 4771 if (rc == 0) 4772 goto read_complete; 4773 4774 read_data = kmap(page); 4775 /* for reads over a certain size could initiate async read ahead */ 4776 4777 rc = cifs_read(file, read_data, PAGE_SIZE, poffset); 4778 4779 if (rc < 0) 4780 goto io_error; 4781 else 4782 cifs_dbg(FYI, "Bytes read %d\n", rc); 4783 4784 /* we do not want atime to be less than mtime, it broke some apps */ 4785 atime = inode_set_atime_to_ts(inode, current_time(inode)); 4786 mtime = inode_get_mtime(inode); 4787 if (timespec64_compare(&atime, &mtime) < 0) 4788 inode_set_atime_to_ts(inode, inode_get_mtime(inode)); 4789 4790 if (PAGE_SIZE > rc) 4791 memset(read_data + rc, 0, PAGE_SIZE - rc); 4792 4793 flush_dcache_page(page); 4794 SetPageUptodate(page); 4795 rc = 0; 4796 4797 io_error: 4798 kunmap(page); 4799 4800 read_complete: 4801 unlock_page(page); 4802 return rc; 4803 } 4804 4805 static int cifs_read_folio(struct file *file, struct folio *folio) 4806 { 4807 struct page *page = &folio->page; 4808 loff_t offset = page_file_offset(page); 4809 int rc = -EACCES; 4810 unsigned int xid; 4811 4812 xid = get_xid(); 4813 4814 if (file->private_data == NULL) { 4815 rc = -EBADF; 4816 free_xid(xid); 4817 return rc; 4818 } 4819 4820 cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n", 4821 page, (int)offset, (int)offset); 4822 4823 rc = cifs_readpage_worker(file, page, &offset); 4824 4825 free_xid(xid); 4826 return rc; 4827 } 4828 4829 static int is_inode_writable(struct cifsInodeInfo *cifs_inode) 4830 { 4831 struct cifsFileInfo *open_file; 4832 4833 spin_lock(&cifs_inode->open_file_lock); 4834 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 4835 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { 4836 spin_unlock(&cifs_inode->open_file_lock); 4837 return 1; 4838 } 4839 } 4840 spin_unlock(&cifs_inode->open_file_lock); 4841 return 0; 4842 } 4843 4844 /* We do not want to update the file size from server for inodes 4845 open for write - to avoid races with writepage extending 4846 the file - in the future we could consider allowing 4847 refreshing the inode only on increases in the file size 4848 but this is tricky to do without racing with writebehind 4849 page caching in the current Linux kernel design */ 4850 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file, 4851 bool from_readdir) 4852 { 4853 if (!cifsInode) 4854 return true; 4855 4856 if (is_inode_writable(cifsInode) || 4857 ((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) { 4858 /* This inode is open for write at least once */ 4859 struct cifs_sb_info *cifs_sb; 4860 4861 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb); 4862 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { 4863 /* since no page cache to corrupt on directio 4864 we can change size safely */ 4865 return true; 4866 } 4867 4868 if (i_size_read(&cifsInode->netfs.inode) < end_of_file) 4869 return true; 4870 4871 return false; 4872 } else 4873 return true; 4874 } 4875 4876 static int cifs_write_begin(struct file *file, struct address_space *mapping, 4877 loff_t pos, unsigned len, 4878 struct page **pagep, void **fsdata) 4879 { 4880 int oncethru = 0; 4881 pgoff_t index = pos >> PAGE_SHIFT; 4882 loff_t offset = pos & (PAGE_SIZE - 1); 4883 loff_t page_start = pos & PAGE_MASK; 4884 loff_t i_size; 4885 struct page *page; 4886 int rc = 0; 4887 4888 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len); 4889 4890 start: 4891 page = grab_cache_page_write_begin(mapping, index); 4892 if (!page) { 4893 rc = -ENOMEM; 4894 goto out; 4895 } 4896 4897 if (PageUptodate(page)) 4898 goto out; 4899 4900 /* 4901 * If we write a full page it will be up to date, no need to read from 4902 * the server. If the write is short, we'll end up doing a sync write 4903 * instead. 4904 */ 4905 if (len == PAGE_SIZE) 4906 goto out; 4907 4908 /* 4909 * optimize away the read when we have an oplock, and we're not 4910 * expecting to use any of the data we'd be reading in. That 4911 * is, when the page lies beyond the EOF, or straddles the EOF 4912 * and the write will cover all of the existing data. 4913 */ 4914 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) { 4915 i_size = i_size_read(mapping->host); 4916 if (page_start >= i_size || 4917 (offset == 0 && (pos + len) >= i_size)) { 4918 zero_user_segments(page, 0, offset, 4919 offset + len, 4920 PAGE_SIZE); 4921 /* 4922 * PageChecked means that the parts of the page 4923 * to which we're not writing are considered up 4924 * to date. Once the data is copied to the 4925 * page, it can be set uptodate. 4926 */ 4927 SetPageChecked(page); 4928 goto out; 4929 } 4930 } 4931 4932 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) { 4933 /* 4934 * might as well read a page, it is fast enough. If we get 4935 * an error, we don't need to return it. cifs_write_end will 4936 * do a sync write instead since PG_uptodate isn't set. 4937 */ 4938 cifs_readpage_worker(file, page, &page_start); 4939 put_page(page); 4940 oncethru = 1; 4941 goto start; 4942 } else { 4943 /* we could try using another file handle if there is one - 4944 but how would we lock it to prevent close of that handle 4945 racing with this read? In any case 4946 this will be written out by write_end so is fine */ 4947 } 4948 out: 4949 *pagep = page; 4950 return rc; 4951 } 4952 4953 static bool cifs_release_folio(struct folio *folio, gfp_t gfp) 4954 { 4955 if (folio_test_private(folio)) 4956 return 0; 4957 if (folio_test_fscache(folio)) { 4958 if (current_is_kswapd() || !(gfp & __GFP_FS)) 4959 return false; 4960 folio_wait_fscache(folio); 4961 } 4962 fscache_note_page_release(cifs_inode_cookie(folio->mapping->host)); 4963 return true; 4964 } 4965 4966 static void cifs_invalidate_folio(struct folio *folio, size_t offset, 4967 size_t length) 4968 { 4969 folio_wait_fscache(folio); 4970 } 4971 4972 static int cifs_launder_folio(struct folio *folio) 4973 { 4974 int rc = 0; 4975 loff_t range_start = folio_pos(folio); 4976 loff_t range_end = range_start + folio_size(folio); 4977 struct writeback_control wbc = { 4978 .sync_mode = WB_SYNC_ALL, 4979 .nr_to_write = 0, 4980 .range_start = range_start, 4981 .range_end = range_end, 4982 }; 4983 4984 cifs_dbg(FYI, "Launder page: %lu\n", folio->index); 4985 4986 if (folio_clear_dirty_for_io(folio)) 4987 rc = cifs_writepage_locked(&folio->page, &wbc); 4988 4989 folio_wait_fscache(folio); 4990 return rc; 4991 } 4992 4993 void cifs_oplock_break(struct work_struct *work) 4994 { 4995 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, 4996 oplock_break); 4997 struct inode *inode = d_inode(cfile->dentry); 4998 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 4999 struct cifsInodeInfo *cinode = CIFS_I(inode); 5000 struct cifs_tcon *tcon; 5001 struct TCP_Server_Info *server; 5002 struct tcon_link *tlink; 5003 int rc = 0; 5004 bool purge_cache = false, oplock_break_cancelled; 5005 __u64 persistent_fid, volatile_fid; 5006 __u16 net_fid; 5007 5008 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, 5009 TASK_UNINTERRUPTIBLE); 5010 5011 tlink = cifs_sb_tlink(cifs_sb); 5012 if (IS_ERR(tlink)) 5013 goto out; 5014 tcon = tlink_tcon(tlink); 5015 server = tcon->ses->server; 5016 5017 server->ops->downgrade_oplock(server, cinode, cfile->oplock_level, 5018 cfile->oplock_epoch, &purge_cache); 5019 5020 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) && 5021 cifs_has_mand_locks(cinode)) { 5022 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n", 5023 inode); 5024 cinode->oplock = 0; 5025 } 5026 5027 if (inode && S_ISREG(inode->i_mode)) { 5028 if (CIFS_CACHE_READ(cinode)) 5029 break_lease(inode, O_RDONLY); 5030 else 5031 break_lease(inode, O_WRONLY); 5032 rc = filemap_fdatawrite(inode->i_mapping); 5033 if (!CIFS_CACHE_READ(cinode) || purge_cache) { 5034 rc = filemap_fdatawait(inode->i_mapping); 5035 mapping_set_error(inode->i_mapping, rc); 5036 cifs_zap_mapping(inode); 5037 } 5038 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc); 5039 if (CIFS_CACHE_WRITE(cinode)) 5040 goto oplock_break_ack; 5041 } 5042 5043 rc = cifs_push_locks(cfile); 5044 if (rc) 5045 cifs_dbg(VFS, "Push locks rc = %d\n", rc); 5046 5047 oplock_break_ack: 5048 /* 5049 * When oplock break is received and there are no active 5050 * file handles but cached, then schedule deferred close immediately. 5051 * So, new open will not use cached handle. 5052 */ 5053 5054 if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes)) 5055 cifs_close_deferred_file(cinode); 5056 5057 persistent_fid = cfile->fid.persistent_fid; 5058 volatile_fid = cfile->fid.volatile_fid; 5059 net_fid = cfile->fid.netfid; 5060 oplock_break_cancelled = cfile->oplock_break_cancelled; 5061 5062 _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false); 5063 /* 5064 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require 5065 * an acknowledgment to be sent when the file has already been closed. 5066 */ 5067 spin_lock(&cinode->open_file_lock); 5068 /* check list empty since can race with kill_sb calling tree disconnect */ 5069 if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) { 5070 spin_unlock(&cinode->open_file_lock); 5071 rc = server->ops->oplock_response(tcon, persistent_fid, 5072 volatile_fid, net_fid, cinode); 5073 cifs_dbg(FYI, "Oplock release rc = %d\n", rc); 5074 } else 5075 spin_unlock(&cinode->open_file_lock); 5076 5077 cifs_put_tlink(tlink); 5078 out: 5079 cifs_done_oplock_break(cinode); 5080 } 5081 5082 /* 5083 * The presence of cifs_direct_io() in the address space ops vector 5084 * allowes open() O_DIRECT flags which would have failed otherwise. 5085 * 5086 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests 5087 * so this method should never be called. 5088 * 5089 * Direct IO is not yet supported in the cached mode. 5090 */ 5091 static ssize_t 5092 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter) 5093 { 5094 /* 5095 * FIXME 5096 * Eventually need to support direct IO for non forcedirectio mounts 5097 */ 5098 return -EINVAL; 5099 } 5100 5101 static int cifs_swap_activate(struct swap_info_struct *sis, 5102 struct file *swap_file, sector_t *span) 5103 { 5104 struct cifsFileInfo *cfile = swap_file->private_data; 5105 struct inode *inode = swap_file->f_mapping->host; 5106 unsigned long blocks; 5107 long long isize; 5108 5109 cifs_dbg(FYI, "swap activate\n"); 5110 5111 if (!swap_file->f_mapping->a_ops->swap_rw) 5112 /* Cannot support swap */ 5113 return -EINVAL; 5114 5115 spin_lock(&inode->i_lock); 5116 blocks = inode->i_blocks; 5117 isize = inode->i_size; 5118 spin_unlock(&inode->i_lock); 5119 if (blocks*512 < isize) { 5120 pr_warn("swap activate: swapfile has holes\n"); 5121 return -EINVAL; 5122 } 5123 *span = sis->pages; 5124 5125 pr_warn_once("Swap support over SMB3 is experimental\n"); 5126 5127 /* 5128 * TODO: consider adding ACL (or documenting how) to prevent other 5129 * users (on this or other systems) from reading it 5130 */ 5131 5132 5133 /* TODO: add sk_set_memalloc(inet) or similar */ 5134 5135 if (cfile) 5136 cfile->swapfile = true; 5137 /* 5138 * TODO: Since file already open, we can't open with DENY_ALL here 5139 * but we could add call to grab a byte range lock to prevent others 5140 * from reading or writing the file 5141 */ 5142 5143 sis->flags |= SWP_FS_OPS; 5144 return add_swap_extent(sis, 0, sis->max, 0); 5145 } 5146 5147 static void cifs_swap_deactivate(struct file *file) 5148 { 5149 struct cifsFileInfo *cfile = file->private_data; 5150 5151 cifs_dbg(FYI, "swap deactivate\n"); 5152 5153 /* TODO: undo sk_set_memalloc(inet) will eventually be needed */ 5154 5155 if (cfile) 5156 cfile->swapfile = false; 5157 5158 /* do we need to unpin (or unlock) the file */ 5159 } 5160 5161 /* 5162 * Mark a page as having been made dirty and thus needing writeback. We also 5163 * need to pin the cache object to write back to. 5164 */ 5165 #ifdef CONFIG_CIFS_FSCACHE 5166 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio) 5167 { 5168 return fscache_dirty_folio(mapping, folio, 5169 cifs_inode_cookie(mapping->host)); 5170 } 5171 #else 5172 #define cifs_dirty_folio filemap_dirty_folio 5173 #endif 5174 5175 const struct address_space_operations cifs_addr_ops = { 5176 .read_folio = cifs_read_folio, 5177 .readahead = cifs_readahead, 5178 .writepages = cifs_writepages, 5179 .write_begin = cifs_write_begin, 5180 .write_end = cifs_write_end, 5181 .dirty_folio = cifs_dirty_folio, 5182 .release_folio = cifs_release_folio, 5183 .direct_IO = cifs_direct_io, 5184 .invalidate_folio = cifs_invalidate_folio, 5185 .launder_folio = cifs_launder_folio, 5186 .migrate_folio = filemap_migrate_folio, 5187 /* 5188 * TODO: investigate and if useful we could add an is_dirty_writeback 5189 * helper if needed 5190 */ 5191 .swap_activate = cifs_swap_activate, 5192 .swap_deactivate = cifs_swap_deactivate, 5193 }; 5194 5195 /* 5196 * cifs_readahead requires the server to support a buffer large enough to 5197 * contain the header plus one complete page of data. Otherwise, we need 5198 * to leave cifs_readahead out of the address space operations. 5199 */ 5200 const struct address_space_operations cifs_addr_ops_smallbuf = { 5201 .read_folio = cifs_read_folio, 5202 .writepages = cifs_writepages, 5203 .write_begin = cifs_write_begin, 5204 .write_end = cifs_write_end, 5205 .dirty_folio = cifs_dirty_folio, 5206 .release_folio = cifs_release_folio, 5207 .invalidate_folio = cifs_invalidate_folio, 5208 .launder_folio = cifs_launder_folio, 5209 .migrate_folio = filemap_migrate_folio, 5210 }; 5211