1 // SPDX-License-Identifier: LGPL-2.1 2 /* 3 * 4 * vfs operations that deal with files 5 * 6 * Copyright (C) International Business Machines Corp., 2002,2010 7 * Author(s): Steve French (sfrench@us.ibm.com) 8 * Jeremy Allison (jra@samba.org) 9 * 10 */ 11 #include <linux/fs.h> 12 #include <linux/filelock.h> 13 #include <linux/backing-dev.h> 14 #include <linux/stat.h> 15 #include <linux/fcntl.h> 16 #include <linux/pagemap.h> 17 #include <linux/pagevec.h> 18 #include <linux/writeback.h> 19 #include <linux/task_io_accounting_ops.h> 20 #include <linux/delay.h> 21 #include <linux/mount.h> 22 #include <linux/slab.h> 23 #include <linux/swap.h> 24 #include <linux/mm.h> 25 #include <asm/div64.h> 26 #include "cifsfs.h" 27 #include "cifspdu.h" 28 #include "cifsglob.h" 29 #include "cifsproto.h" 30 #include "smb2proto.h" 31 #include "cifs_unicode.h" 32 #include "cifs_debug.h" 33 #include "cifs_fs_sb.h" 34 #include "fscache.h" 35 #include "smbdirect.h" 36 #include "fs_context.h" 37 #include "cifs_ioctl.h" 38 #include "cached_dir.h" 39 40 /* 41 * Remove the dirty flags from a span of pages. 42 */ 43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len) 44 { 45 struct address_space *mapping = inode->i_mapping; 46 struct folio *folio; 47 pgoff_t end; 48 49 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 50 51 rcu_read_lock(); 52 53 end = (start + len - 1) / PAGE_SIZE; 54 xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) { 55 if (xas_retry(&xas, folio)) 56 continue; 57 xas_pause(&xas); 58 rcu_read_unlock(); 59 folio_lock(folio); 60 folio_clear_dirty_for_io(folio); 61 folio_unlock(folio); 62 rcu_read_lock(); 63 } 64 65 rcu_read_unlock(); 66 } 67 68 /* 69 * Completion of write to server. 70 */ 71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len) 72 { 73 struct address_space *mapping = inode->i_mapping; 74 struct folio *folio; 75 pgoff_t end; 76 77 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 78 79 if (!len) 80 return; 81 82 rcu_read_lock(); 83 84 end = (start + len - 1) / PAGE_SIZE; 85 xas_for_each(&xas, folio, end) { 86 if (xas_retry(&xas, folio)) 87 continue; 88 if (!folio_test_writeback(folio)) { 89 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 90 len, start, folio->index, end); 91 continue; 92 } 93 94 folio_detach_private(folio); 95 folio_end_writeback(folio); 96 } 97 98 rcu_read_unlock(); 99 } 100 101 /* 102 * Failure of write to server. 103 */ 104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len) 105 { 106 struct address_space *mapping = inode->i_mapping; 107 struct folio *folio; 108 pgoff_t end; 109 110 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 111 112 if (!len) 113 return; 114 115 rcu_read_lock(); 116 117 end = (start + len - 1) / PAGE_SIZE; 118 xas_for_each(&xas, folio, end) { 119 if (xas_retry(&xas, folio)) 120 continue; 121 if (!folio_test_writeback(folio)) { 122 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 123 len, start, folio->index, end); 124 continue; 125 } 126 127 folio_set_error(folio); 128 folio_end_writeback(folio); 129 } 130 131 rcu_read_unlock(); 132 } 133 134 /* 135 * Redirty pages after a temporary failure. 136 */ 137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len) 138 { 139 struct address_space *mapping = inode->i_mapping; 140 struct folio *folio; 141 pgoff_t end; 142 143 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 144 145 if (!len) 146 return; 147 148 rcu_read_lock(); 149 150 end = (start + len - 1) / PAGE_SIZE; 151 xas_for_each(&xas, folio, end) { 152 if (!folio_test_writeback(folio)) { 153 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 154 len, start, folio->index, end); 155 continue; 156 } 157 158 filemap_dirty_folio(folio->mapping, folio); 159 folio_end_writeback(folio); 160 } 161 162 rcu_read_unlock(); 163 } 164 165 /* 166 * Mark as invalid, all open files on tree connections since they 167 * were closed when session to server was lost. 168 */ 169 void 170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon) 171 { 172 struct cifsFileInfo *open_file = NULL; 173 struct list_head *tmp; 174 struct list_head *tmp1; 175 176 /* only send once per connect */ 177 spin_lock(&tcon->tc_lock); 178 if (tcon->need_reconnect) 179 tcon->status = TID_NEED_RECON; 180 181 if (tcon->status != TID_NEED_RECON) { 182 spin_unlock(&tcon->tc_lock); 183 return; 184 } 185 tcon->status = TID_IN_FILES_INVALIDATE; 186 spin_unlock(&tcon->tc_lock); 187 188 /* list all files open on tree connection and mark them invalid */ 189 spin_lock(&tcon->open_file_lock); 190 list_for_each_safe(tmp, tmp1, &tcon->openFileList) { 191 open_file = list_entry(tmp, struct cifsFileInfo, tlist); 192 open_file->invalidHandle = true; 193 open_file->oplock_break_cancelled = true; 194 } 195 spin_unlock(&tcon->open_file_lock); 196 197 invalidate_all_cached_dirs(tcon); 198 spin_lock(&tcon->tc_lock); 199 if (tcon->status == TID_IN_FILES_INVALIDATE) 200 tcon->status = TID_NEED_TCON; 201 spin_unlock(&tcon->tc_lock); 202 203 /* 204 * BB Add call to invalidate_inodes(sb) for all superblocks mounted 205 * to this tcon. 206 */ 207 } 208 209 static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache) 210 { 211 if ((flags & O_ACCMODE) == O_RDONLY) 212 return GENERIC_READ; 213 else if ((flags & O_ACCMODE) == O_WRONLY) 214 return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE; 215 else if ((flags & O_ACCMODE) == O_RDWR) { 216 /* GENERIC_ALL is too much permission to request 217 can cause unnecessary access denied on create */ 218 /* return GENERIC_ALL; */ 219 return (GENERIC_READ | GENERIC_WRITE); 220 } 221 222 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES | 223 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA | 224 FILE_READ_DATA); 225 } 226 227 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 228 static u32 cifs_posix_convert_flags(unsigned int flags) 229 { 230 u32 posix_flags = 0; 231 232 if ((flags & O_ACCMODE) == O_RDONLY) 233 posix_flags = SMB_O_RDONLY; 234 else if ((flags & O_ACCMODE) == O_WRONLY) 235 posix_flags = SMB_O_WRONLY; 236 else if ((flags & O_ACCMODE) == O_RDWR) 237 posix_flags = SMB_O_RDWR; 238 239 if (flags & O_CREAT) { 240 posix_flags |= SMB_O_CREAT; 241 if (flags & O_EXCL) 242 posix_flags |= SMB_O_EXCL; 243 } else if (flags & O_EXCL) 244 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n", 245 current->comm, current->tgid); 246 247 if (flags & O_TRUNC) 248 posix_flags |= SMB_O_TRUNC; 249 /* be safe and imply O_SYNC for O_DSYNC */ 250 if (flags & O_DSYNC) 251 posix_flags |= SMB_O_SYNC; 252 if (flags & O_DIRECTORY) 253 posix_flags |= SMB_O_DIRECTORY; 254 if (flags & O_NOFOLLOW) 255 posix_flags |= SMB_O_NOFOLLOW; 256 if (flags & O_DIRECT) 257 posix_flags |= SMB_O_DIRECT; 258 259 return posix_flags; 260 } 261 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 262 263 static inline int cifs_get_disposition(unsigned int flags) 264 { 265 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) 266 return FILE_CREATE; 267 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) 268 return FILE_OVERWRITE_IF; 269 else if ((flags & O_CREAT) == O_CREAT) 270 return FILE_OPEN_IF; 271 else if ((flags & O_TRUNC) == O_TRUNC) 272 return FILE_OVERWRITE; 273 else 274 return FILE_OPEN; 275 } 276 277 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 278 int cifs_posix_open(const char *full_path, struct inode **pinode, 279 struct super_block *sb, int mode, unsigned int f_flags, 280 __u32 *poplock, __u16 *pnetfid, unsigned int xid) 281 { 282 int rc; 283 FILE_UNIX_BASIC_INFO *presp_data; 284 __u32 posix_flags = 0; 285 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 286 struct cifs_fattr fattr; 287 struct tcon_link *tlink; 288 struct cifs_tcon *tcon; 289 290 cifs_dbg(FYI, "posix open %s\n", full_path); 291 292 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); 293 if (presp_data == NULL) 294 return -ENOMEM; 295 296 tlink = cifs_sb_tlink(cifs_sb); 297 if (IS_ERR(tlink)) { 298 rc = PTR_ERR(tlink); 299 goto posix_open_ret; 300 } 301 302 tcon = tlink_tcon(tlink); 303 mode &= ~current_umask(); 304 305 posix_flags = cifs_posix_convert_flags(f_flags); 306 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data, 307 poplock, full_path, cifs_sb->local_nls, 308 cifs_remap(cifs_sb)); 309 cifs_put_tlink(tlink); 310 311 if (rc) 312 goto posix_open_ret; 313 314 if (presp_data->Type == cpu_to_le32(-1)) 315 goto posix_open_ret; /* open ok, caller does qpathinfo */ 316 317 if (!pinode) 318 goto posix_open_ret; /* caller does not need info */ 319 320 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb); 321 322 /* get new inode and set it up */ 323 if (*pinode == NULL) { 324 cifs_fill_uniqueid(sb, &fattr); 325 *pinode = cifs_iget(sb, &fattr); 326 if (!*pinode) { 327 rc = -ENOMEM; 328 goto posix_open_ret; 329 } 330 } else { 331 cifs_revalidate_mapping(*pinode); 332 rc = cifs_fattr_to_inode(*pinode, &fattr, false); 333 } 334 335 posix_open_ret: 336 kfree(presp_data); 337 return rc; 338 } 339 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 340 341 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, 342 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock, 343 struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf) 344 { 345 int rc; 346 int desired_access; 347 int disposition; 348 int create_options = CREATE_NOT_DIR; 349 struct TCP_Server_Info *server = tcon->ses->server; 350 struct cifs_open_parms oparms; 351 int rdwr_for_fscache = 0; 352 353 if (!server->ops->open) 354 return -ENOSYS; 355 356 /* If we're caching, we need to be able to fill in around partial writes. */ 357 if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY) 358 rdwr_for_fscache = 1; 359 360 desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache); 361 362 /********************************************************************* 363 * open flag mapping table: 364 * 365 * POSIX Flag CIFS Disposition 366 * ---------- ---------------- 367 * O_CREAT FILE_OPEN_IF 368 * O_CREAT | O_EXCL FILE_CREATE 369 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF 370 * O_TRUNC FILE_OVERWRITE 371 * none of the above FILE_OPEN 372 * 373 * Note that there is not a direct match between disposition 374 * FILE_SUPERSEDE (ie create whether or not file exists although 375 * O_CREAT | O_TRUNC is similar but truncates the existing 376 * file rather than creating a new file as FILE_SUPERSEDE does 377 * (which uses the attributes / metadata passed in on open call) 378 *? 379 *? O_SYNC is a reasonable match to CIFS writethrough flag 380 *? and the read write flags match reasonably. O_LARGEFILE 381 *? is irrelevant because largefile support is always used 382 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY, 383 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation 384 *********************************************************************/ 385 386 disposition = cifs_get_disposition(f_flags); 387 388 /* BB pass O_SYNC flag through on file attributes .. BB */ 389 390 /* O_SYNC also has bit for O_DSYNC so following check picks up either */ 391 if (f_flags & O_SYNC) 392 create_options |= CREATE_WRITE_THROUGH; 393 394 if (f_flags & O_DIRECT) 395 create_options |= CREATE_NO_BUFFER; 396 397 retry_open: 398 oparms = (struct cifs_open_parms) { 399 .tcon = tcon, 400 .cifs_sb = cifs_sb, 401 .desired_access = desired_access, 402 .create_options = cifs_create_options(cifs_sb, create_options), 403 .disposition = disposition, 404 .path = full_path, 405 .fid = fid, 406 }; 407 408 rc = server->ops->open(xid, &oparms, oplock, buf); 409 if (rc) { 410 if (rc == -EACCES && rdwr_for_fscache == 1) { 411 desired_access = cifs_convert_flags(f_flags, 0); 412 rdwr_for_fscache = 2; 413 goto retry_open; 414 } 415 return rc; 416 } 417 if (rdwr_for_fscache == 2) 418 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); 419 420 /* TODO: Add support for calling posix query info but with passing in fid */ 421 if (tcon->unix_ext) 422 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb, 423 xid); 424 else 425 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, 426 xid, fid); 427 428 if (rc) { 429 server->ops->close(xid, tcon, fid); 430 if (rc == -ESTALE) 431 rc = -EOPENSTALE; 432 } 433 434 return rc; 435 } 436 437 static bool 438 cifs_has_mand_locks(struct cifsInodeInfo *cinode) 439 { 440 struct cifs_fid_locks *cur; 441 bool has_locks = false; 442 443 down_read(&cinode->lock_sem); 444 list_for_each_entry(cur, &cinode->llist, llist) { 445 if (!list_empty(&cur->locks)) { 446 has_locks = true; 447 break; 448 } 449 } 450 up_read(&cinode->lock_sem); 451 return has_locks; 452 } 453 454 void 455 cifs_down_write(struct rw_semaphore *sem) 456 { 457 while (!down_write_trylock(sem)) 458 msleep(10); 459 } 460 461 static void cifsFileInfo_put_work(struct work_struct *work); 462 void serverclose_work(struct work_struct *work); 463 464 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, 465 struct tcon_link *tlink, __u32 oplock, 466 const char *symlink_target) 467 { 468 struct dentry *dentry = file_dentry(file); 469 struct inode *inode = d_inode(dentry); 470 struct cifsInodeInfo *cinode = CIFS_I(inode); 471 struct cifsFileInfo *cfile; 472 struct cifs_fid_locks *fdlocks; 473 struct cifs_tcon *tcon = tlink_tcon(tlink); 474 struct TCP_Server_Info *server = tcon->ses->server; 475 476 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); 477 if (cfile == NULL) 478 return cfile; 479 480 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL); 481 if (!fdlocks) { 482 kfree(cfile); 483 return NULL; 484 } 485 486 if (symlink_target) { 487 cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL); 488 if (!cfile->symlink_target) { 489 kfree(fdlocks); 490 kfree(cfile); 491 return NULL; 492 } 493 } 494 495 INIT_LIST_HEAD(&fdlocks->locks); 496 fdlocks->cfile = cfile; 497 cfile->llist = fdlocks; 498 499 cfile->count = 1; 500 cfile->pid = current->tgid; 501 cfile->uid = current_fsuid(); 502 cfile->dentry = dget(dentry); 503 cfile->f_flags = file->f_flags; 504 cfile->invalidHandle = false; 505 cfile->deferred_close_scheduled = false; 506 cfile->tlink = cifs_get_tlink(tlink); 507 INIT_WORK(&cfile->oplock_break, cifs_oplock_break); 508 INIT_WORK(&cfile->put, cifsFileInfo_put_work); 509 INIT_WORK(&cfile->serverclose, serverclose_work); 510 INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close); 511 mutex_init(&cfile->fh_mutex); 512 spin_lock_init(&cfile->file_info_lock); 513 514 cifs_sb_active(inode->i_sb); 515 516 /* 517 * If the server returned a read oplock and we have mandatory brlocks, 518 * set oplock level to None. 519 */ 520 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) { 521 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); 522 oplock = 0; 523 } 524 525 cifs_down_write(&cinode->lock_sem); 526 list_add(&fdlocks->llist, &cinode->llist); 527 up_write(&cinode->lock_sem); 528 529 spin_lock(&tcon->open_file_lock); 530 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock) 531 oplock = fid->pending_open->oplock; 532 list_del(&fid->pending_open->olist); 533 534 fid->purge_cache = false; 535 server->ops->set_fid(cfile, fid, oplock); 536 537 list_add(&cfile->tlist, &tcon->openFileList); 538 atomic_inc(&tcon->num_local_opens); 539 540 /* if readable file instance put first in list*/ 541 spin_lock(&cinode->open_file_lock); 542 if (file->f_mode & FMODE_READ) 543 list_add(&cfile->flist, &cinode->openFileList); 544 else 545 list_add_tail(&cfile->flist, &cinode->openFileList); 546 spin_unlock(&cinode->open_file_lock); 547 spin_unlock(&tcon->open_file_lock); 548 549 if (fid->purge_cache) 550 cifs_zap_mapping(inode); 551 552 file->private_data = cfile; 553 return cfile; 554 } 555 556 struct cifsFileInfo * 557 cifsFileInfo_get(struct cifsFileInfo *cifs_file) 558 { 559 spin_lock(&cifs_file->file_info_lock); 560 cifsFileInfo_get_locked(cifs_file); 561 spin_unlock(&cifs_file->file_info_lock); 562 return cifs_file; 563 } 564 565 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file) 566 { 567 struct inode *inode = d_inode(cifs_file->dentry); 568 struct cifsInodeInfo *cifsi = CIFS_I(inode); 569 struct cifsLockInfo *li, *tmp; 570 struct super_block *sb = inode->i_sb; 571 572 /* 573 * Delete any outstanding lock records. We'll lose them when the file 574 * is closed anyway. 575 */ 576 cifs_down_write(&cifsi->lock_sem); 577 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) { 578 list_del(&li->llist); 579 cifs_del_lock_waiters(li); 580 kfree(li); 581 } 582 list_del(&cifs_file->llist->llist); 583 kfree(cifs_file->llist); 584 up_write(&cifsi->lock_sem); 585 586 cifs_put_tlink(cifs_file->tlink); 587 dput(cifs_file->dentry); 588 cifs_sb_deactive(sb); 589 kfree(cifs_file->symlink_target); 590 kfree(cifs_file); 591 } 592 593 static void cifsFileInfo_put_work(struct work_struct *work) 594 { 595 struct cifsFileInfo *cifs_file = container_of(work, 596 struct cifsFileInfo, put); 597 598 cifsFileInfo_put_final(cifs_file); 599 } 600 601 void serverclose_work(struct work_struct *work) 602 { 603 struct cifsFileInfo *cifs_file = container_of(work, 604 struct cifsFileInfo, serverclose); 605 606 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); 607 608 struct TCP_Server_Info *server = tcon->ses->server; 609 int rc = 0; 610 int retries = 0; 611 int MAX_RETRIES = 4; 612 613 do { 614 if (server->ops->close_getattr) 615 rc = server->ops->close_getattr(0, tcon, cifs_file); 616 else if (server->ops->close) 617 rc = server->ops->close(0, tcon, &cifs_file->fid); 618 619 if (rc == -EBUSY || rc == -EAGAIN) { 620 retries++; 621 msleep(250); 622 } 623 } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES) 624 ); 625 626 if (retries == MAX_RETRIES) 627 pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES); 628 629 if (cifs_file->offload) 630 queue_work(fileinfo_put_wq, &cifs_file->put); 631 else 632 cifsFileInfo_put_final(cifs_file); 633 } 634 635 /** 636 * cifsFileInfo_put - release a reference of file priv data 637 * 638 * Always potentially wait for oplock handler. See _cifsFileInfo_put(). 639 * 640 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file 641 */ 642 void cifsFileInfo_put(struct cifsFileInfo *cifs_file) 643 { 644 _cifsFileInfo_put(cifs_file, true, true); 645 } 646 647 /** 648 * _cifsFileInfo_put - release a reference of file priv data 649 * 650 * This may involve closing the filehandle @cifs_file out on the 651 * server. Must be called without holding tcon->open_file_lock, 652 * cinode->open_file_lock and cifs_file->file_info_lock. 653 * 654 * If @wait_for_oplock_handler is true and we are releasing the last 655 * reference, wait for any running oplock break handler of the file 656 * and cancel any pending one. 657 * 658 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file 659 * @wait_oplock_handler: must be false if called from oplock_break_handler 660 * @offload: not offloaded on close and oplock breaks 661 * 662 */ 663 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, 664 bool wait_oplock_handler, bool offload) 665 { 666 struct inode *inode = d_inode(cifs_file->dentry); 667 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); 668 struct TCP_Server_Info *server = tcon->ses->server; 669 struct cifsInodeInfo *cifsi = CIFS_I(inode); 670 struct super_block *sb = inode->i_sb; 671 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 672 struct cifs_fid fid = {}; 673 struct cifs_pending_open open; 674 bool oplock_break_cancelled; 675 bool serverclose_offloaded = false; 676 677 spin_lock(&tcon->open_file_lock); 678 spin_lock(&cifsi->open_file_lock); 679 spin_lock(&cifs_file->file_info_lock); 680 681 cifs_file->offload = offload; 682 if (--cifs_file->count > 0) { 683 spin_unlock(&cifs_file->file_info_lock); 684 spin_unlock(&cifsi->open_file_lock); 685 spin_unlock(&tcon->open_file_lock); 686 return; 687 } 688 spin_unlock(&cifs_file->file_info_lock); 689 690 if (server->ops->get_lease_key) 691 server->ops->get_lease_key(inode, &fid); 692 693 /* store open in pending opens to make sure we don't miss lease break */ 694 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open); 695 696 /* remove it from the lists */ 697 list_del(&cifs_file->flist); 698 list_del(&cifs_file->tlist); 699 atomic_dec(&tcon->num_local_opens); 700 701 if (list_empty(&cifsi->openFileList)) { 702 cifs_dbg(FYI, "closing last open instance for inode %p\n", 703 d_inode(cifs_file->dentry)); 704 /* 705 * In strict cache mode we need invalidate mapping on the last 706 * close because it may cause a error when we open this file 707 * again and get at least level II oplock. 708 */ 709 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) 710 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags); 711 cifs_set_oplock_level(cifsi, 0); 712 } 713 714 spin_unlock(&cifsi->open_file_lock); 715 spin_unlock(&tcon->open_file_lock); 716 717 oplock_break_cancelled = wait_oplock_handler ? 718 cancel_work_sync(&cifs_file->oplock_break) : false; 719 720 if (!tcon->need_reconnect && !cifs_file->invalidHandle) { 721 struct TCP_Server_Info *server = tcon->ses->server; 722 unsigned int xid; 723 int rc = 0; 724 725 xid = get_xid(); 726 if (server->ops->close_getattr) 727 rc = server->ops->close_getattr(xid, tcon, cifs_file); 728 else if (server->ops->close) 729 rc = server->ops->close(xid, tcon, &cifs_file->fid); 730 _free_xid(xid); 731 732 if (rc == -EBUSY || rc == -EAGAIN) { 733 // Server close failed, hence offloading it as an async op 734 queue_work(serverclose_wq, &cifs_file->serverclose); 735 serverclose_offloaded = true; 736 } 737 } 738 739 if (oplock_break_cancelled) 740 cifs_done_oplock_break(cifsi); 741 742 cifs_del_pending_open(&open); 743 744 // if serverclose has been offloaded to wq (on failure), it will 745 // handle offloading put as well. If serverclose not offloaded, 746 // we need to handle offloading put here. 747 if (!serverclose_offloaded) { 748 if (offload) 749 queue_work(fileinfo_put_wq, &cifs_file->put); 750 else 751 cifsFileInfo_put_final(cifs_file); 752 } 753 } 754 755 int cifs_open(struct inode *inode, struct file *file) 756 757 { 758 int rc = -EACCES; 759 unsigned int xid; 760 __u32 oplock; 761 struct cifs_sb_info *cifs_sb; 762 struct TCP_Server_Info *server; 763 struct cifs_tcon *tcon; 764 struct tcon_link *tlink; 765 struct cifsFileInfo *cfile = NULL; 766 void *page; 767 const char *full_path; 768 bool posix_open_ok = false; 769 struct cifs_fid fid = {}; 770 struct cifs_pending_open open; 771 struct cifs_open_info_data data = {}; 772 773 xid = get_xid(); 774 775 cifs_sb = CIFS_SB(inode->i_sb); 776 if (unlikely(cifs_forced_shutdown(cifs_sb))) { 777 free_xid(xid); 778 return -EIO; 779 } 780 781 tlink = cifs_sb_tlink(cifs_sb); 782 if (IS_ERR(tlink)) { 783 free_xid(xid); 784 return PTR_ERR(tlink); 785 } 786 tcon = tlink_tcon(tlink); 787 server = tcon->ses->server; 788 789 page = alloc_dentry_path(); 790 full_path = build_path_from_dentry(file_dentry(file), page); 791 if (IS_ERR(full_path)) { 792 rc = PTR_ERR(full_path); 793 goto out; 794 } 795 796 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n", 797 inode, file->f_flags, full_path); 798 799 if (file->f_flags & O_DIRECT && 800 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { 801 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) 802 file->f_op = &cifs_file_direct_nobrl_ops; 803 else 804 file->f_op = &cifs_file_direct_ops; 805 } 806 807 /* Get the cached handle as SMB2 close is deferred */ 808 rc = cifs_get_readable_path(tcon, full_path, &cfile); 809 if (rc == 0) { 810 if (file->f_flags == cfile->f_flags) { 811 file->private_data = cfile; 812 spin_lock(&CIFS_I(inode)->deferred_lock); 813 cifs_del_deferred_close(cfile); 814 spin_unlock(&CIFS_I(inode)->deferred_lock); 815 goto use_cache; 816 } else { 817 _cifsFileInfo_put(cfile, true, false); 818 } 819 } 820 821 if (server->oplocks) 822 oplock = REQ_OPLOCK; 823 else 824 oplock = 0; 825 826 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 827 if (!tcon->broken_posix_open && tcon->unix_ext && 828 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & 829 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 830 /* can not refresh inode info since size could be stale */ 831 rc = cifs_posix_open(full_path, &inode, inode->i_sb, 832 cifs_sb->ctx->file_mode /* ignored */, 833 file->f_flags, &oplock, &fid.netfid, xid); 834 if (rc == 0) { 835 cifs_dbg(FYI, "posix open succeeded\n"); 836 posix_open_ok = true; 837 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 838 if (tcon->ses->serverNOS) 839 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n", 840 tcon->ses->ip_addr, 841 tcon->ses->serverNOS); 842 tcon->broken_posix_open = true; 843 } else if ((rc != -EIO) && (rc != -EREMOTE) && 844 (rc != -EOPNOTSUPP)) /* path not found or net err */ 845 goto out; 846 /* 847 * Else fallthrough to retry open the old way on network i/o 848 * or DFS errors. 849 */ 850 } 851 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 852 853 if (server->ops->get_lease_key) 854 server->ops->get_lease_key(inode, &fid); 855 856 cifs_add_pending_open(&fid, tlink, &open); 857 858 if (!posix_open_ok) { 859 if (server->ops->get_lease_key) 860 server->ops->get_lease_key(inode, &fid); 861 862 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid, 863 xid, &data); 864 if (rc) { 865 cifs_del_pending_open(&open); 866 goto out; 867 } 868 } 869 870 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target); 871 if (cfile == NULL) { 872 if (server->ops->close) 873 server->ops->close(xid, tcon, &fid); 874 cifs_del_pending_open(&open); 875 rc = -ENOMEM; 876 goto out; 877 } 878 879 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 880 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) { 881 /* 882 * Time to set mode which we can not set earlier due to 883 * problems creating new read-only files. 884 */ 885 struct cifs_unix_set_info_args args = { 886 .mode = inode->i_mode, 887 .uid = INVALID_UID, /* no change */ 888 .gid = INVALID_GID, /* no change */ 889 .ctime = NO_CHANGE_64, 890 .atime = NO_CHANGE_64, 891 .mtime = NO_CHANGE_64, 892 .device = 0, 893 }; 894 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid, 895 cfile->pid); 896 } 897 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 898 899 use_cache: 900 fscache_use_cookie(cifs_inode_cookie(file_inode(file)), 901 file->f_mode & FMODE_WRITE); 902 if (!(file->f_flags & O_DIRECT)) 903 goto out; 904 if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY) 905 goto out; 906 cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE); 907 908 out: 909 free_dentry_path(page); 910 free_xid(xid); 911 cifs_put_tlink(tlink); 912 cifs_free_open_info(&data); 913 return rc; 914 } 915 916 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 917 static int cifs_push_posix_locks(struct cifsFileInfo *cfile); 918 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 919 920 /* 921 * Try to reacquire byte range locks that were released when session 922 * to server was lost. 923 */ 924 static int 925 cifs_relock_file(struct cifsFileInfo *cfile) 926 { 927 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 928 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 929 int rc = 0; 930 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 931 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 932 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 933 934 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING); 935 if (cinode->can_cache_brlcks) { 936 /* can cache locks - no need to relock */ 937 up_read(&cinode->lock_sem); 938 return rc; 939 } 940 941 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 942 if (cap_unix(tcon->ses) && 943 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 944 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 945 rc = cifs_push_posix_locks(cfile); 946 else 947 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 948 rc = tcon->ses->server->ops->push_mand_locks(cfile); 949 950 up_read(&cinode->lock_sem); 951 return rc; 952 } 953 954 static int 955 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) 956 { 957 int rc = -EACCES; 958 unsigned int xid; 959 __u32 oplock; 960 struct cifs_sb_info *cifs_sb; 961 struct cifs_tcon *tcon; 962 struct TCP_Server_Info *server; 963 struct cifsInodeInfo *cinode; 964 struct inode *inode; 965 void *page; 966 const char *full_path; 967 int desired_access; 968 int disposition = FILE_OPEN; 969 int create_options = CREATE_NOT_DIR; 970 struct cifs_open_parms oparms; 971 int rdwr_for_fscache = 0; 972 973 xid = get_xid(); 974 mutex_lock(&cfile->fh_mutex); 975 if (!cfile->invalidHandle) { 976 mutex_unlock(&cfile->fh_mutex); 977 free_xid(xid); 978 return 0; 979 } 980 981 inode = d_inode(cfile->dentry); 982 cifs_sb = CIFS_SB(inode->i_sb); 983 tcon = tlink_tcon(cfile->tlink); 984 server = tcon->ses->server; 985 986 /* 987 * Can not grab rename sem here because various ops, including those 988 * that already have the rename sem can end up causing writepage to get 989 * called and if the server was down that means we end up here, and we 990 * can never tell if the caller already has the rename_sem. 991 */ 992 page = alloc_dentry_path(); 993 full_path = build_path_from_dentry(cfile->dentry, page); 994 if (IS_ERR(full_path)) { 995 mutex_unlock(&cfile->fh_mutex); 996 free_dentry_path(page); 997 free_xid(xid); 998 return PTR_ERR(full_path); 999 } 1000 1001 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n", 1002 inode, cfile->f_flags, full_path); 1003 1004 if (tcon->ses->server->oplocks) 1005 oplock = REQ_OPLOCK; 1006 else 1007 oplock = 0; 1008 1009 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1010 if (tcon->unix_ext && cap_unix(tcon->ses) && 1011 (CIFS_UNIX_POSIX_PATH_OPS_CAP & 1012 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 1013 /* 1014 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the 1015 * original open. Must mask them off for a reopen. 1016 */ 1017 unsigned int oflags = cfile->f_flags & 1018 ~(O_CREAT | O_EXCL | O_TRUNC); 1019 1020 rc = cifs_posix_open(full_path, NULL, inode->i_sb, 1021 cifs_sb->ctx->file_mode /* ignored */, 1022 oflags, &oplock, &cfile->fid.netfid, xid); 1023 if (rc == 0) { 1024 cifs_dbg(FYI, "posix reopen succeeded\n"); 1025 oparms.reconnect = true; 1026 goto reopen_success; 1027 } 1028 /* 1029 * fallthrough to retry open the old way on errors, especially 1030 * in the reconnect path it is important to retry hard 1031 */ 1032 } 1033 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1034 1035 /* If we're caching, we need to be able to fill in around partial writes. */ 1036 if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY) 1037 rdwr_for_fscache = 1; 1038 1039 desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache); 1040 1041 /* O_SYNC also has bit for O_DSYNC so following check picks up either */ 1042 if (cfile->f_flags & O_SYNC) 1043 create_options |= CREATE_WRITE_THROUGH; 1044 1045 if (cfile->f_flags & O_DIRECT) 1046 create_options |= CREATE_NO_BUFFER; 1047 1048 if (server->ops->get_lease_key) 1049 server->ops->get_lease_key(inode, &cfile->fid); 1050 1051 retry_open: 1052 oparms = (struct cifs_open_parms) { 1053 .tcon = tcon, 1054 .cifs_sb = cifs_sb, 1055 .desired_access = desired_access, 1056 .create_options = cifs_create_options(cifs_sb, create_options), 1057 .disposition = disposition, 1058 .path = full_path, 1059 .fid = &cfile->fid, 1060 .reconnect = true, 1061 }; 1062 1063 /* 1064 * Can not refresh inode by passing in file_info buf to be returned by 1065 * ops->open and then calling get_inode_info with returned buf since 1066 * file might have write behind data that needs to be flushed and server 1067 * version of file size can be stale. If we knew for sure that inode was 1068 * not dirty locally we could do this. 1069 */ 1070 rc = server->ops->open(xid, &oparms, &oplock, NULL); 1071 if (rc == -ENOENT && oparms.reconnect == false) { 1072 /* durable handle timeout is expired - open the file again */ 1073 rc = server->ops->open(xid, &oparms, &oplock, NULL); 1074 /* indicate that we need to relock the file */ 1075 oparms.reconnect = true; 1076 } 1077 if (rc == -EACCES && rdwr_for_fscache == 1) { 1078 desired_access = cifs_convert_flags(cfile->f_flags, 0); 1079 rdwr_for_fscache = 2; 1080 goto retry_open; 1081 } 1082 1083 if (rc) { 1084 mutex_unlock(&cfile->fh_mutex); 1085 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc); 1086 cifs_dbg(FYI, "oplock: %d\n", oplock); 1087 goto reopen_error_exit; 1088 } 1089 1090 if (rdwr_for_fscache == 2) 1091 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); 1092 1093 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1094 reopen_success: 1095 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1096 cfile->invalidHandle = false; 1097 mutex_unlock(&cfile->fh_mutex); 1098 cinode = CIFS_I(inode); 1099 1100 if (can_flush) { 1101 rc = filemap_write_and_wait(inode->i_mapping); 1102 if (!is_interrupt_error(rc)) 1103 mapping_set_error(inode->i_mapping, rc); 1104 1105 if (tcon->posix_extensions) 1106 rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid); 1107 else if (tcon->unix_ext) 1108 rc = cifs_get_inode_info_unix(&inode, full_path, 1109 inode->i_sb, xid); 1110 else 1111 rc = cifs_get_inode_info(&inode, full_path, NULL, 1112 inode->i_sb, xid, NULL); 1113 } 1114 /* 1115 * Else we are writing out data to server already and could deadlock if 1116 * we tried to flush data, and since we do not know if we have data that 1117 * would invalidate the current end of file on the server we can not go 1118 * to the server to get the new inode info. 1119 */ 1120 1121 /* 1122 * If the server returned a read oplock and we have mandatory brlocks, 1123 * set oplock level to None. 1124 */ 1125 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) { 1126 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); 1127 oplock = 0; 1128 } 1129 1130 server->ops->set_fid(cfile, &cfile->fid, oplock); 1131 if (oparms.reconnect) 1132 cifs_relock_file(cfile); 1133 1134 reopen_error_exit: 1135 free_dentry_path(page); 1136 free_xid(xid); 1137 return rc; 1138 } 1139 1140 void smb2_deferred_work_close(struct work_struct *work) 1141 { 1142 struct cifsFileInfo *cfile = container_of(work, 1143 struct cifsFileInfo, deferred.work); 1144 1145 spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); 1146 cifs_del_deferred_close(cfile); 1147 cfile->deferred_close_scheduled = false; 1148 spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); 1149 _cifsFileInfo_put(cfile, true, false); 1150 } 1151 1152 int cifs_close(struct inode *inode, struct file *file) 1153 { 1154 struct cifsFileInfo *cfile; 1155 struct cifsInodeInfo *cinode = CIFS_I(inode); 1156 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1157 struct cifs_deferred_close *dclose; 1158 1159 cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE); 1160 1161 if (file->private_data != NULL) { 1162 cfile = file->private_data; 1163 file->private_data = NULL; 1164 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL); 1165 if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG) 1166 && cinode->lease_granted && 1167 !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) && 1168 dclose) { 1169 if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) { 1170 inode_set_mtime_to_ts(inode, 1171 inode_set_ctime_current(inode)); 1172 } 1173 spin_lock(&cinode->deferred_lock); 1174 cifs_add_deferred_close(cfile, dclose); 1175 if (cfile->deferred_close_scheduled && 1176 delayed_work_pending(&cfile->deferred)) { 1177 /* 1178 * If there is no pending work, mod_delayed_work queues new work. 1179 * So, Increase the ref count to avoid use-after-free. 1180 */ 1181 if (!mod_delayed_work(deferredclose_wq, 1182 &cfile->deferred, cifs_sb->ctx->closetimeo)) 1183 cifsFileInfo_get(cfile); 1184 } else { 1185 /* Deferred close for files */ 1186 queue_delayed_work(deferredclose_wq, 1187 &cfile->deferred, cifs_sb->ctx->closetimeo); 1188 cfile->deferred_close_scheduled = true; 1189 spin_unlock(&cinode->deferred_lock); 1190 return 0; 1191 } 1192 spin_unlock(&cinode->deferred_lock); 1193 _cifsFileInfo_put(cfile, true, false); 1194 } else { 1195 _cifsFileInfo_put(cfile, true, false); 1196 kfree(dclose); 1197 } 1198 } 1199 1200 /* return code from the ->release op is always ignored */ 1201 return 0; 1202 } 1203 1204 void 1205 cifs_reopen_persistent_handles(struct cifs_tcon *tcon) 1206 { 1207 struct cifsFileInfo *open_file, *tmp; 1208 struct list_head tmp_list; 1209 1210 if (!tcon->use_persistent || !tcon->need_reopen_files) 1211 return; 1212 1213 tcon->need_reopen_files = false; 1214 1215 cifs_dbg(FYI, "Reopen persistent handles\n"); 1216 INIT_LIST_HEAD(&tmp_list); 1217 1218 /* list all files open on tree connection, reopen resilient handles */ 1219 spin_lock(&tcon->open_file_lock); 1220 list_for_each_entry(open_file, &tcon->openFileList, tlist) { 1221 if (!open_file->invalidHandle) 1222 continue; 1223 cifsFileInfo_get(open_file); 1224 list_add_tail(&open_file->rlist, &tmp_list); 1225 } 1226 spin_unlock(&tcon->open_file_lock); 1227 1228 list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) { 1229 if (cifs_reopen_file(open_file, false /* do not flush */)) 1230 tcon->need_reopen_files = true; 1231 list_del_init(&open_file->rlist); 1232 cifsFileInfo_put(open_file); 1233 } 1234 } 1235 1236 int cifs_closedir(struct inode *inode, struct file *file) 1237 { 1238 int rc = 0; 1239 unsigned int xid; 1240 struct cifsFileInfo *cfile = file->private_data; 1241 struct cifs_tcon *tcon; 1242 struct TCP_Server_Info *server; 1243 char *buf; 1244 1245 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode); 1246 1247 if (cfile == NULL) 1248 return rc; 1249 1250 xid = get_xid(); 1251 tcon = tlink_tcon(cfile->tlink); 1252 server = tcon->ses->server; 1253 1254 cifs_dbg(FYI, "Freeing private data in close dir\n"); 1255 spin_lock(&cfile->file_info_lock); 1256 if (server->ops->dir_needs_close(cfile)) { 1257 cfile->invalidHandle = true; 1258 spin_unlock(&cfile->file_info_lock); 1259 if (server->ops->close_dir) 1260 rc = server->ops->close_dir(xid, tcon, &cfile->fid); 1261 else 1262 rc = -ENOSYS; 1263 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc); 1264 /* not much we can do if it fails anyway, ignore rc */ 1265 rc = 0; 1266 } else 1267 spin_unlock(&cfile->file_info_lock); 1268 1269 buf = cfile->srch_inf.ntwrk_buf_start; 1270 if (buf) { 1271 cifs_dbg(FYI, "closedir free smb buf in srch struct\n"); 1272 cfile->srch_inf.ntwrk_buf_start = NULL; 1273 if (cfile->srch_inf.smallBuf) 1274 cifs_small_buf_release(buf); 1275 else 1276 cifs_buf_release(buf); 1277 } 1278 1279 cifs_put_tlink(cfile->tlink); 1280 kfree(file->private_data); 1281 file->private_data = NULL; 1282 /* BB can we lock the filestruct while this is going on? */ 1283 free_xid(xid); 1284 return rc; 1285 } 1286 1287 static struct cifsLockInfo * 1288 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags) 1289 { 1290 struct cifsLockInfo *lock = 1291 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL); 1292 if (!lock) 1293 return lock; 1294 lock->offset = offset; 1295 lock->length = length; 1296 lock->type = type; 1297 lock->pid = current->tgid; 1298 lock->flags = flags; 1299 INIT_LIST_HEAD(&lock->blist); 1300 init_waitqueue_head(&lock->block_q); 1301 return lock; 1302 } 1303 1304 void 1305 cifs_del_lock_waiters(struct cifsLockInfo *lock) 1306 { 1307 struct cifsLockInfo *li, *tmp; 1308 list_for_each_entry_safe(li, tmp, &lock->blist, blist) { 1309 list_del_init(&li->blist); 1310 wake_up(&li->block_q); 1311 } 1312 } 1313 1314 #define CIFS_LOCK_OP 0 1315 #define CIFS_READ_OP 1 1316 #define CIFS_WRITE_OP 2 1317 1318 /* @rw_check : 0 - no op, 1 - read, 2 - write */ 1319 static bool 1320 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset, 1321 __u64 length, __u8 type, __u16 flags, 1322 struct cifsFileInfo *cfile, 1323 struct cifsLockInfo **conf_lock, int rw_check) 1324 { 1325 struct cifsLockInfo *li; 1326 struct cifsFileInfo *cur_cfile = fdlocks->cfile; 1327 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 1328 1329 list_for_each_entry(li, &fdlocks->locks, llist) { 1330 if (offset + length <= li->offset || 1331 offset >= li->offset + li->length) 1332 continue; 1333 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid && 1334 server->ops->compare_fids(cfile, cur_cfile)) { 1335 /* shared lock prevents write op through the same fid */ 1336 if (!(li->type & server->vals->shared_lock_type) || 1337 rw_check != CIFS_WRITE_OP) 1338 continue; 1339 } 1340 if ((type & server->vals->shared_lock_type) && 1341 ((server->ops->compare_fids(cfile, cur_cfile) && 1342 current->tgid == li->pid) || type == li->type)) 1343 continue; 1344 if (rw_check == CIFS_LOCK_OP && 1345 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) && 1346 server->ops->compare_fids(cfile, cur_cfile)) 1347 continue; 1348 if (conf_lock) 1349 *conf_lock = li; 1350 return true; 1351 } 1352 return false; 1353 } 1354 1355 bool 1356 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length, 1357 __u8 type, __u16 flags, 1358 struct cifsLockInfo **conf_lock, int rw_check) 1359 { 1360 bool rc = false; 1361 struct cifs_fid_locks *cur; 1362 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1363 1364 list_for_each_entry(cur, &cinode->llist, llist) { 1365 rc = cifs_find_fid_lock_conflict(cur, offset, length, type, 1366 flags, cfile, conf_lock, 1367 rw_check); 1368 if (rc) 1369 break; 1370 } 1371 1372 return rc; 1373 } 1374 1375 /* 1376 * Check if there is another lock that prevents us to set the lock (mandatory 1377 * style). If such a lock exists, update the flock structure with its 1378 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks 1379 * or leave it the same if we can't. Returns 0 if we don't need to request to 1380 * the server or 1 otherwise. 1381 */ 1382 static int 1383 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length, 1384 __u8 type, struct file_lock *flock) 1385 { 1386 int rc = 0; 1387 struct cifsLockInfo *conf_lock; 1388 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1389 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 1390 bool exist; 1391 1392 down_read(&cinode->lock_sem); 1393 1394 exist = cifs_find_lock_conflict(cfile, offset, length, type, 1395 flock->fl_flags, &conf_lock, 1396 CIFS_LOCK_OP); 1397 if (exist) { 1398 flock->fl_start = conf_lock->offset; 1399 flock->fl_end = conf_lock->offset + conf_lock->length - 1; 1400 flock->fl_pid = conf_lock->pid; 1401 if (conf_lock->type & server->vals->shared_lock_type) 1402 flock->fl_type = F_RDLCK; 1403 else 1404 flock->fl_type = F_WRLCK; 1405 } else if (!cinode->can_cache_brlcks) 1406 rc = 1; 1407 else 1408 flock->fl_type = F_UNLCK; 1409 1410 up_read(&cinode->lock_sem); 1411 return rc; 1412 } 1413 1414 static void 1415 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock) 1416 { 1417 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1418 cifs_down_write(&cinode->lock_sem); 1419 list_add_tail(&lock->llist, &cfile->llist->locks); 1420 up_write(&cinode->lock_sem); 1421 } 1422 1423 /* 1424 * Set the byte-range lock (mandatory style). Returns: 1425 * 1) 0, if we set the lock and don't need to request to the server; 1426 * 2) 1, if no locks prevent us but we need to request to the server; 1427 * 3) -EACCES, if there is a lock that prevents us and wait is false. 1428 */ 1429 static int 1430 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock, 1431 bool wait) 1432 { 1433 struct cifsLockInfo *conf_lock; 1434 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1435 bool exist; 1436 int rc = 0; 1437 1438 try_again: 1439 exist = false; 1440 cifs_down_write(&cinode->lock_sem); 1441 1442 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, 1443 lock->type, lock->flags, &conf_lock, 1444 CIFS_LOCK_OP); 1445 if (!exist && cinode->can_cache_brlcks) { 1446 list_add_tail(&lock->llist, &cfile->llist->locks); 1447 up_write(&cinode->lock_sem); 1448 return rc; 1449 } 1450 1451 if (!exist) 1452 rc = 1; 1453 else if (!wait) 1454 rc = -EACCES; 1455 else { 1456 list_add_tail(&lock->blist, &conf_lock->blist); 1457 up_write(&cinode->lock_sem); 1458 rc = wait_event_interruptible(lock->block_q, 1459 (lock->blist.prev == &lock->blist) && 1460 (lock->blist.next == &lock->blist)); 1461 if (!rc) 1462 goto try_again; 1463 cifs_down_write(&cinode->lock_sem); 1464 list_del_init(&lock->blist); 1465 } 1466 1467 up_write(&cinode->lock_sem); 1468 return rc; 1469 } 1470 1471 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1472 /* 1473 * Check if there is another lock that prevents us to set the lock (posix 1474 * style). If such a lock exists, update the flock structure with its 1475 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks 1476 * or leave it the same if we can't. Returns 0 if we don't need to request to 1477 * the server or 1 otherwise. 1478 */ 1479 static int 1480 cifs_posix_lock_test(struct file *file, struct file_lock *flock) 1481 { 1482 int rc = 0; 1483 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); 1484 unsigned char saved_type = flock->fl_type; 1485 1486 if ((flock->fl_flags & FL_POSIX) == 0) 1487 return 1; 1488 1489 down_read(&cinode->lock_sem); 1490 posix_test_lock(file, flock); 1491 1492 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) { 1493 flock->fl_type = saved_type; 1494 rc = 1; 1495 } 1496 1497 up_read(&cinode->lock_sem); 1498 return rc; 1499 } 1500 1501 /* 1502 * Set the byte-range lock (posix style). Returns: 1503 * 1) <0, if the error occurs while setting the lock; 1504 * 2) 0, if we set the lock and don't need to request to the server; 1505 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock; 1506 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server. 1507 */ 1508 static int 1509 cifs_posix_lock_set(struct file *file, struct file_lock *flock) 1510 { 1511 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); 1512 int rc = FILE_LOCK_DEFERRED + 1; 1513 1514 if ((flock->fl_flags & FL_POSIX) == 0) 1515 return rc; 1516 1517 cifs_down_write(&cinode->lock_sem); 1518 if (!cinode->can_cache_brlcks) { 1519 up_write(&cinode->lock_sem); 1520 return rc; 1521 } 1522 1523 rc = posix_lock_file(file, flock, NULL); 1524 up_write(&cinode->lock_sem); 1525 return rc; 1526 } 1527 1528 int 1529 cifs_push_mandatory_locks(struct cifsFileInfo *cfile) 1530 { 1531 unsigned int xid; 1532 int rc = 0, stored_rc; 1533 struct cifsLockInfo *li, *tmp; 1534 struct cifs_tcon *tcon; 1535 unsigned int num, max_num, max_buf; 1536 LOCKING_ANDX_RANGE *buf, *cur; 1537 static const int types[] = { 1538 LOCKING_ANDX_LARGE_FILES, 1539 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES 1540 }; 1541 int i; 1542 1543 xid = get_xid(); 1544 tcon = tlink_tcon(cfile->tlink); 1545 1546 /* 1547 * Accessing maxBuf is racy with cifs_reconnect - need to store value 1548 * and check it before using. 1549 */ 1550 max_buf = tcon->ses->server->maxBuf; 1551 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) { 1552 free_xid(xid); 1553 return -EINVAL; 1554 } 1555 1556 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > 1557 PAGE_SIZE); 1558 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), 1559 PAGE_SIZE); 1560 max_num = (max_buf - sizeof(struct smb_hdr)) / 1561 sizeof(LOCKING_ANDX_RANGE); 1562 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 1563 if (!buf) { 1564 free_xid(xid); 1565 return -ENOMEM; 1566 } 1567 1568 for (i = 0; i < 2; i++) { 1569 cur = buf; 1570 num = 0; 1571 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { 1572 if (li->type != types[i]) 1573 continue; 1574 cur->Pid = cpu_to_le16(li->pid); 1575 cur->LengthLow = cpu_to_le32((u32)li->length); 1576 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); 1577 cur->OffsetLow = cpu_to_le32((u32)li->offset); 1578 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); 1579 if (++num == max_num) { 1580 stored_rc = cifs_lockv(xid, tcon, 1581 cfile->fid.netfid, 1582 (__u8)li->type, 0, num, 1583 buf); 1584 if (stored_rc) 1585 rc = stored_rc; 1586 cur = buf; 1587 num = 0; 1588 } else 1589 cur++; 1590 } 1591 1592 if (num) { 1593 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, 1594 (__u8)types[i], 0, num, buf); 1595 if (stored_rc) 1596 rc = stored_rc; 1597 } 1598 } 1599 1600 kfree(buf); 1601 free_xid(xid); 1602 return rc; 1603 } 1604 1605 static __u32 1606 hash_lockowner(fl_owner_t owner) 1607 { 1608 return cifs_lock_secret ^ hash32_ptr((const void *)owner); 1609 } 1610 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1611 1612 struct lock_to_push { 1613 struct list_head llist; 1614 __u64 offset; 1615 __u64 length; 1616 __u32 pid; 1617 __u16 netfid; 1618 __u8 type; 1619 }; 1620 1621 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1622 static int 1623 cifs_push_posix_locks(struct cifsFileInfo *cfile) 1624 { 1625 struct inode *inode = d_inode(cfile->dentry); 1626 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1627 struct file_lock *flock; 1628 struct file_lock_context *flctx = locks_inode_context(inode); 1629 unsigned int count = 0, i; 1630 int rc = 0, xid, type; 1631 struct list_head locks_to_send, *el; 1632 struct lock_to_push *lck, *tmp; 1633 __u64 length; 1634 1635 xid = get_xid(); 1636 1637 if (!flctx) 1638 goto out; 1639 1640 spin_lock(&flctx->flc_lock); 1641 list_for_each(el, &flctx->flc_posix) { 1642 count++; 1643 } 1644 spin_unlock(&flctx->flc_lock); 1645 1646 INIT_LIST_HEAD(&locks_to_send); 1647 1648 /* 1649 * Allocating count locks is enough because no FL_POSIX locks can be 1650 * added to the list while we are holding cinode->lock_sem that 1651 * protects locking operations of this inode. 1652 */ 1653 for (i = 0; i < count; i++) { 1654 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); 1655 if (!lck) { 1656 rc = -ENOMEM; 1657 goto err_out; 1658 } 1659 list_add_tail(&lck->llist, &locks_to_send); 1660 } 1661 1662 el = locks_to_send.next; 1663 spin_lock(&flctx->flc_lock); 1664 list_for_each_entry(flock, &flctx->flc_posix, fl_list) { 1665 if (el == &locks_to_send) { 1666 /* 1667 * The list ended. We don't have enough allocated 1668 * structures - something is really wrong. 1669 */ 1670 cifs_dbg(VFS, "Can't push all brlocks!\n"); 1671 break; 1672 } 1673 length = cifs_flock_len(flock); 1674 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK) 1675 type = CIFS_RDLCK; 1676 else 1677 type = CIFS_WRLCK; 1678 lck = list_entry(el, struct lock_to_push, llist); 1679 lck->pid = hash_lockowner(flock->fl_owner); 1680 lck->netfid = cfile->fid.netfid; 1681 lck->length = length; 1682 lck->type = type; 1683 lck->offset = flock->fl_start; 1684 } 1685 spin_unlock(&flctx->flc_lock); 1686 1687 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { 1688 int stored_rc; 1689 1690 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid, 1691 lck->offset, lck->length, NULL, 1692 lck->type, 0); 1693 if (stored_rc) 1694 rc = stored_rc; 1695 list_del(&lck->llist); 1696 kfree(lck); 1697 } 1698 1699 out: 1700 free_xid(xid); 1701 return rc; 1702 err_out: 1703 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { 1704 list_del(&lck->llist); 1705 kfree(lck); 1706 } 1707 goto out; 1708 } 1709 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1710 1711 static int 1712 cifs_push_locks(struct cifsFileInfo *cfile) 1713 { 1714 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1715 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1716 int rc = 0; 1717 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1718 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 1719 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1720 1721 /* we are going to update can_cache_brlcks here - need a write access */ 1722 cifs_down_write(&cinode->lock_sem); 1723 if (!cinode->can_cache_brlcks) { 1724 up_write(&cinode->lock_sem); 1725 return rc; 1726 } 1727 1728 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1729 if (cap_unix(tcon->ses) && 1730 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 1731 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 1732 rc = cifs_push_posix_locks(cfile); 1733 else 1734 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1735 rc = tcon->ses->server->ops->push_mand_locks(cfile); 1736 1737 cinode->can_cache_brlcks = false; 1738 up_write(&cinode->lock_sem); 1739 return rc; 1740 } 1741 1742 static void 1743 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock, 1744 bool *wait_flag, struct TCP_Server_Info *server) 1745 { 1746 if (flock->fl_flags & FL_POSIX) 1747 cifs_dbg(FYI, "Posix\n"); 1748 if (flock->fl_flags & FL_FLOCK) 1749 cifs_dbg(FYI, "Flock\n"); 1750 if (flock->fl_flags & FL_SLEEP) { 1751 cifs_dbg(FYI, "Blocking lock\n"); 1752 *wait_flag = true; 1753 } 1754 if (flock->fl_flags & FL_ACCESS) 1755 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n"); 1756 if (flock->fl_flags & FL_LEASE) 1757 cifs_dbg(FYI, "Lease on file - not implemented yet\n"); 1758 if (flock->fl_flags & 1759 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | 1760 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK))) 1761 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags); 1762 1763 *type = server->vals->large_lock_type; 1764 if (flock->fl_type == F_WRLCK) { 1765 cifs_dbg(FYI, "F_WRLCK\n"); 1766 *type |= server->vals->exclusive_lock_type; 1767 *lock = 1; 1768 } else if (flock->fl_type == F_UNLCK) { 1769 cifs_dbg(FYI, "F_UNLCK\n"); 1770 *type |= server->vals->unlock_lock_type; 1771 *unlock = 1; 1772 /* Check if unlock includes more than one lock range */ 1773 } else if (flock->fl_type == F_RDLCK) { 1774 cifs_dbg(FYI, "F_RDLCK\n"); 1775 *type |= server->vals->shared_lock_type; 1776 *lock = 1; 1777 } else if (flock->fl_type == F_EXLCK) { 1778 cifs_dbg(FYI, "F_EXLCK\n"); 1779 *type |= server->vals->exclusive_lock_type; 1780 *lock = 1; 1781 } else if (flock->fl_type == F_SHLCK) { 1782 cifs_dbg(FYI, "F_SHLCK\n"); 1783 *type |= server->vals->shared_lock_type; 1784 *lock = 1; 1785 } else 1786 cifs_dbg(FYI, "Unknown type of lock\n"); 1787 } 1788 1789 static int 1790 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, 1791 bool wait_flag, bool posix_lck, unsigned int xid) 1792 { 1793 int rc = 0; 1794 __u64 length = cifs_flock_len(flock); 1795 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 1796 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1797 struct TCP_Server_Info *server = tcon->ses->server; 1798 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1799 __u16 netfid = cfile->fid.netfid; 1800 1801 if (posix_lck) { 1802 int posix_lock_type; 1803 1804 rc = cifs_posix_lock_test(file, flock); 1805 if (!rc) 1806 return rc; 1807 1808 if (type & server->vals->shared_lock_type) 1809 posix_lock_type = CIFS_RDLCK; 1810 else 1811 posix_lock_type = CIFS_WRLCK; 1812 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1813 hash_lockowner(flock->fl_owner), 1814 flock->fl_start, length, flock, 1815 posix_lock_type, wait_flag); 1816 return rc; 1817 } 1818 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1819 1820 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock); 1821 if (!rc) 1822 return rc; 1823 1824 /* BB we could chain these into one lock request BB */ 1825 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type, 1826 1, 0, false); 1827 if (rc == 0) { 1828 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1829 type, 0, 1, false); 1830 flock->fl_type = F_UNLCK; 1831 if (rc != 0) 1832 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", 1833 rc); 1834 return 0; 1835 } 1836 1837 if (type & server->vals->shared_lock_type) { 1838 flock->fl_type = F_WRLCK; 1839 return 0; 1840 } 1841 1842 type &= ~server->vals->exclusive_lock_type; 1843 1844 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1845 type | server->vals->shared_lock_type, 1846 1, 0, false); 1847 if (rc == 0) { 1848 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1849 type | server->vals->shared_lock_type, 0, 1, false); 1850 flock->fl_type = F_RDLCK; 1851 if (rc != 0) 1852 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", 1853 rc); 1854 } else 1855 flock->fl_type = F_WRLCK; 1856 1857 return 0; 1858 } 1859 1860 void 1861 cifs_move_llist(struct list_head *source, struct list_head *dest) 1862 { 1863 struct list_head *li, *tmp; 1864 list_for_each_safe(li, tmp, source) 1865 list_move(li, dest); 1866 } 1867 1868 void 1869 cifs_free_llist(struct list_head *llist) 1870 { 1871 struct cifsLockInfo *li, *tmp; 1872 list_for_each_entry_safe(li, tmp, llist, llist) { 1873 cifs_del_lock_waiters(li); 1874 list_del(&li->llist); 1875 kfree(li); 1876 } 1877 } 1878 1879 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1880 int 1881 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, 1882 unsigned int xid) 1883 { 1884 int rc = 0, stored_rc; 1885 static const int types[] = { 1886 LOCKING_ANDX_LARGE_FILES, 1887 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES 1888 }; 1889 unsigned int i; 1890 unsigned int max_num, num, max_buf; 1891 LOCKING_ANDX_RANGE *buf, *cur; 1892 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1893 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1894 struct cifsLockInfo *li, *tmp; 1895 __u64 length = cifs_flock_len(flock); 1896 struct list_head tmp_llist; 1897 1898 INIT_LIST_HEAD(&tmp_llist); 1899 1900 /* 1901 * Accessing maxBuf is racy with cifs_reconnect - need to store value 1902 * and check it before using. 1903 */ 1904 max_buf = tcon->ses->server->maxBuf; 1905 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) 1906 return -EINVAL; 1907 1908 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > 1909 PAGE_SIZE); 1910 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), 1911 PAGE_SIZE); 1912 max_num = (max_buf - sizeof(struct smb_hdr)) / 1913 sizeof(LOCKING_ANDX_RANGE); 1914 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 1915 if (!buf) 1916 return -ENOMEM; 1917 1918 cifs_down_write(&cinode->lock_sem); 1919 for (i = 0; i < 2; i++) { 1920 cur = buf; 1921 num = 0; 1922 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { 1923 if (flock->fl_start > li->offset || 1924 (flock->fl_start + length) < 1925 (li->offset + li->length)) 1926 continue; 1927 if (current->tgid != li->pid) 1928 continue; 1929 if (types[i] != li->type) 1930 continue; 1931 if (cinode->can_cache_brlcks) { 1932 /* 1933 * We can cache brlock requests - simply remove 1934 * a lock from the file's list. 1935 */ 1936 list_del(&li->llist); 1937 cifs_del_lock_waiters(li); 1938 kfree(li); 1939 continue; 1940 } 1941 cur->Pid = cpu_to_le16(li->pid); 1942 cur->LengthLow = cpu_to_le32((u32)li->length); 1943 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); 1944 cur->OffsetLow = cpu_to_le32((u32)li->offset); 1945 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); 1946 /* 1947 * We need to save a lock here to let us add it again to 1948 * the file's list if the unlock range request fails on 1949 * the server. 1950 */ 1951 list_move(&li->llist, &tmp_llist); 1952 if (++num == max_num) { 1953 stored_rc = cifs_lockv(xid, tcon, 1954 cfile->fid.netfid, 1955 li->type, num, 0, buf); 1956 if (stored_rc) { 1957 /* 1958 * We failed on the unlock range 1959 * request - add all locks from the tmp 1960 * list to the head of the file's list. 1961 */ 1962 cifs_move_llist(&tmp_llist, 1963 &cfile->llist->locks); 1964 rc = stored_rc; 1965 } else 1966 /* 1967 * The unlock range request succeed - 1968 * free the tmp list. 1969 */ 1970 cifs_free_llist(&tmp_llist); 1971 cur = buf; 1972 num = 0; 1973 } else 1974 cur++; 1975 } 1976 if (num) { 1977 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, 1978 types[i], num, 0, buf); 1979 if (stored_rc) { 1980 cifs_move_llist(&tmp_llist, 1981 &cfile->llist->locks); 1982 rc = stored_rc; 1983 } else 1984 cifs_free_llist(&tmp_llist); 1985 } 1986 } 1987 1988 up_write(&cinode->lock_sem); 1989 kfree(buf); 1990 return rc; 1991 } 1992 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1993 1994 static int 1995 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, 1996 bool wait_flag, bool posix_lck, int lock, int unlock, 1997 unsigned int xid) 1998 { 1999 int rc = 0; 2000 __u64 length = cifs_flock_len(flock); 2001 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 2002 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 2003 struct TCP_Server_Info *server = tcon->ses->server; 2004 struct inode *inode = d_inode(cfile->dentry); 2005 2006 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 2007 if (posix_lck) { 2008 int posix_lock_type; 2009 2010 rc = cifs_posix_lock_set(file, flock); 2011 if (rc <= FILE_LOCK_DEFERRED) 2012 return rc; 2013 2014 if (type & server->vals->shared_lock_type) 2015 posix_lock_type = CIFS_RDLCK; 2016 else 2017 posix_lock_type = CIFS_WRLCK; 2018 2019 if (unlock == 1) 2020 posix_lock_type = CIFS_UNLCK; 2021 2022 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid, 2023 hash_lockowner(flock->fl_owner), 2024 flock->fl_start, length, 2025 NULL, posix_lock_type, wait_flag); 2026 goto out; 2027 } 2028 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 2029 if (lock) { 2030 struct cifsLockInfo *lock; 2031 2032 lock = cifs_lock_init(flock->fl_start, length, type, 2033 flock->fl_flags); 2034 if (!lock) 2035 return -ENOMEM; 2036 2037 rc = cifs_lock_add_if(cfile, lock, wait_flag); 2038 if (rc < 0) { 2039 kfree(lock); 2040 return rc; 2041 } 2042 if (!rc) 2043 goto out; 2044 2045 /* 2046 * Windows 7 server can delay breaking lease from read to None 2047 * if we set a byte-range lock on a file - break it explicitly 2048 * before sending the lock to the server to be sure the next 2049 * read won't conflict with non-overlapted locks due to 2050 * pagereading. 2051 */ 2052 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) && 2053 CIFS_CACHE_READ(CIFS_I(inode))) { 2054 cifs_zap_mapping(inode); 2055 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n", 2056 inode); 2057 CIFS_I(inode)->oplock = 0; 2058 } 2059 2060 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 2061 type, 1, 0, wait_flag); 2062 if (rc) { 2063 kfree(lock); 2064 return rc; 2065 } 2066 2067 cifs_lock_add(cfile, lock); 2068 } else if (unlock) 2069 rc = server->ops->mand_unlock_range(cfile, flock, xid); 2070 2071 out: 2072 if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) { 2073 /* 2074 * If this is a request to remove all locks because we 2075 * are closing the file, it doesn't matter if the 2076 * unlocking failed as both cifs.ko and the SMB server 2077 * remove the lock on file close 2078 */ 2079 if (rc) { 2080 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc); 2081 if (!(flock->fl_flags & FL_CLOSE)) 2082 return rc; 2083 } 2084 rc = locks_lock_file_wait(file, flock); 2085 } 2086 return rc; 2087 } 2088 2089 int cifs_flock(struct file *file, int cmd, struct file_lock *fl) 2090 { 2091 int rc, xid; 2092 int lock = 0, unlock = 0; 2093 bool wait_flag = false; 2094 bool posix_lck = false; 2095 struct cifs_sb_info *cifs_sb; 2096 struct cifs_tcon *tcon; 2097 struct cifsFileInfo *cfile; 2098 __u32 type; 2099 2100 xid = get_xid(); 2101 2102 if (!(fl->fl_flags & FL_FLOCK)) { 2103 rc = -ENOLCK; 2104 free_xid(xid); 2105 return rc; 2106 } 2107 2108 cfile = (struct cifsFileInfo *)file->private_data; 2109 tcon = tlink_tcon(cfile->tlink); 2110 2111 cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag, 2112 tcon->ses->server); 2113 cifs_sb = CIFS_FILE_SB(file); 2114 2115 if (cap_unix(tcon->ses) && 2116 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 2117 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2118 posix_lck = true; 2119 2120 if (!lock && !unlock) { 2121 /* 2122 * if no lock or unlock then nothing to do since we do not 2123 * know what it is 2124 */ 2125 rc = -EOPNOTSUPP; 2126 free_xid(xid); 2127 return rc; 2128 } 2129 2130 rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock, 2131 xid); 2132 free_xid(xid); 2133 return rc; 2134 2135 2136 } 2137 2138 int cifs_lock(struct file *file, int cmd, struct file_lock *flock) 2139 { 2140 int rc, xid; 2141 int lock = 0, unlock = 0; 2142 bool wait_flag = false; 2143 bool posix_lck = false; 2144 struct cifs_sb_info *cifs_sb; 2145 struct cifs_tcon *tcon; 2146 struct cifsFileInfo *cfile; 2147 __u32 type; 2148 2149 rc = -EACCES; 2150 xid = get_xid(); 2151 2152 cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd, 2153 flock->fl_flags, flock->fl_type, (long long)flock->fl_start, 2154 (long long)flock->fl_end); 2155 2156 cfile = (struct cifsFileInfo *)file->private_data; 2157 tcon = tlink_tcon(cfile->tlink); 2158 2159 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag, 2160 tcon->ses->server); 2161 cifs_sb = CIFS_FILE_SB(file); 2162 set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags); 2163 2164 if (cap_unix(tcon->ses) && 2165 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 2166 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2167 posix_lck = true; 2168 /* 2169 * BB add code here to normalize offset and length to account for 2170 * negative length which we can not accept over the wire. 2171 */ 2172 if (IS_GETLK(cmd)) { 2173 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid); 2174 free_xid(xid); 2175 return rc; 2176 } 2177 2178 if (!lock && !unlock) { 2179 /* 2180 * if no lock or unlock then nothing to do since we do not 2181 * know what it is 2182 */ 2183 free_xid(xid); 2184 return -EOPNOTSUPP; 2185 } 2186 2187 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock, 2188 xid); 2189 free_xid(xid); 2190 return rc; 2191 } 2192 2193 /* 2194 * update the file size (if needed) after a write. Should be called with 2195 * the inode->i_lock held 2196 */ 2197 void 2198 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, 2199 unsigned int bytes_written) 2200 { 2201 loff_t end_of_write = offset + bytes_written; 2202 2203 if (end_of_write > cifsi->server_eof) 2204 cifsi->server_eof = end_of_write; 2205 } 2206 2207 static ssize_t 2208 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data, 2209 size_t write_size, loff_t *offset) 2210 { 2211 int rc = 0; 2212 unsigned int bytes_written = 0; 2213 unsigned int total_written; 2214 struct cifs_tcon *tcon; 2215 struct TCP_Server_Info *server; 2216 unsigned int xid; 2217 struct dentry *dentry = open_file->dentry; 2218 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry)); 2219 struct cifs_io_parms io_parms = {0}; 2220 2221 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n", 2222 write_size, *offset, dentry); 2223 2224 tcon = tlink_tcon(open_file->tlink); 2225 server = tcon->ses->server; 2226 2227 if (!server->ops->sync_write) 2228 return -ENOSYS; 2229 2230 xid = get_xid(); 2231 2232 for (total_written = 0; write_size > total_written; 2233 total_written += bytes_written) { 2234 rc = -EAGAIN; 2235 while (rc == -EAGAIN) { 2236 struct kvec iov[2]; 2237 unsigned int len; 2238 2239 if (open_file->invalidHandle) { 2240 /* we could deadlock if we called 2241 filemap_fdatawait from here so tell 2242 reopen_file not to flush data to 2243 server now */ 2244 rc = cifs_reopen_file(open_file, false); 2245 if (rc != 0) 2246 break; 2247 } 2248 2249 len = min(server->ops->wp_retry_size(d_inode(dentry)), 2250 (unsigned int)write_size - total_written); 2251 /* iov[0] is reserved for smb header */ 2252 iov[1].iov_base = (char *)write_data + total_written; 2253 iov[1].iov_len = len; 2254 io_parms.pid = pid; 2255 io_parms.tcon = tcon; 2256 io_parms.offset = *offset; 2257 io_parms.length = len; 2258 rc = server->ops->sync_write(xid, &open_file->fid, 2259 &io_parms, &bytes_written, iov, 1); 2260 } 2261 if (rc || (bytes_written == 0)) { 2262 if (total_written) 2263 break; 2264 else { 2265 free_xid(xid); 2266 return rc; 2267 } 2268 } else { 2269 spin_lock(&d_inode(dentry)->i_lock); 2270 cifs_update_eof(cifsi, *offset, bytes_written); 2271 spin_unlock(&d_inode(dentry)->i_lock); 2272 *offset += bytes_written; 2273 } 2274 } 2275 2276 cifs_stats_bytes_written(tcon, total_written); 2277 2278 if (total_written > 0) { 2279 spin_lock(&d_inode(dentry)->i_lock); 2280 if (*offset > d_inode(dentry)->i_size) { 2281 i_size_write(d_inode(dentry), *offset); 2282 d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9; 2283 } 2284 spin_unlock(&d_inode(dentry)->i_lock); 2285 } 2286 mark_inode_dirty_sync(d_inode(dentry)); 2287 free_xid(xid); 2288 return total_written; 2289 } 2290 2291 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, 2292 bool fsuid_only) 2293 { 2294 struct cifsFileInfo *open_file = NULL; 2295 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); 2296 2297 /* only filter by fsuid on multiuser mounts */ 2298 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 2299 fsuid_only = false; 2300 2301 spin_lock(&cifs_inode->open_file_lock); 2302 /* we could simply get the first_list_entry since write-only entries 2303 are always at the end of the list but since the first entry might 2304 have a close pending, we go through the whole list */ 2305 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 2306 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) 2307 continue; 2308 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) { 2309 if ((!open_file->invalidHandle)) { 2310 /* found a good file */ 2311 /* lock it so it will not be closed on us */ 2312 cifsFileInfo_get(open_file); 2313 spin_unlock(&cifs_inode->open_file_lock); 2314 return open_file; 2315 } /* else might as well continue, and look for 2316 another, or simply have the caller reopen it 2317 again rather than trying to fix this handle */ 2318 } else /* write only file */ 2319 break; /* write only files are last so must be done */ 2320 } 2321 spin_unlock(&cifs_inode->open_file_lock); 2322 return NULL; 2323 } 2324 2325 /* Return -EBADF if no handle is found and general rc otherwise */ 2326 int 2327 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, 2328 struct cifsFileInfo **ret_file) 2329 { 2330 struct cifsFileInfo *open_file, *inv_file = NULL; 2331 struct cifs_sb_info *cifs_sb; 2332 bool any_available = false; 2333 int rc = -EBADF; 2334 unsigned int refind = 0; 2335 bool fsuid_only = flags & FIND_WR_FSUID_ONLY; 2336 bool with_delete = flags & FIND_WR_WITH_DELETE; 2337 *ret_file = NULL; 2338 2339 /* 2340 * Having a null inode here (because mapping->host was set to zero by 2341 * the VFS or MM) should not happen but we had reports of on oops (due 2342 * to it being zero) during stress testcases so we need to check for it 2343 */ 2344 2345 if (cifs_inode == NULL) { 2346 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n"); 2347 dump_stack(); 2348 return rc; 2349 } 2350 2351 cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); 2352 2353 /* only filter by fsuid on multiuser mounts */ 2354 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 2355 fsuid_only = false; 2356 2357 spin_lock(&cifs_inode->open_file_lock); 2358 refind_writable: 2359 if (refind > MAX_REOPEN_ATT) { 2360 spin_unlock(&cifs_inode->open_file_lock); 2361 return rc; 2362 } 2363 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 2364 if (!any_available && open_file->pid != current->tgid) 2365 continue; 2366 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) 2367 continue; 2368 if (with_delete && !(open_file->fid.access & DELETE)) 2369 continue; 2370 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { 2371 if (!open_file->invalidHandle) { 2372 /* found a good writable file */ 2373 cifsFileInfo_get(open_file); 2374 spin_unlock(&cifs_inode->open_file_lock); 2375 *ret_file = open_file; 2376 return 0; 2377 } else { 2378 if (!inv_file) 2379 inv_file = open_file; 2380 } 2381 } 2382 } 2383 /* couldn't find useable FH with same pid, try any available */ 2384 if (!any_available) { 2385 any_available = true; 2386 goto refind_writable; 2387 } 2388 2389 if (inv_file) { 2390 any_available = false; 2391 cifsFileInfo_get(inv_file); 2392 } 2393 2394 spin_unlock(&cifs_inode->open_file_lock); 2395 2396 if (inv_file) { 2397 rc = cifs_reopen_file(inv_file, false); 2398 if (!rc) { 2399 *ret_file = inv_file; 2400 return 0; 2401 } 2402 2403 spin_lock(&cifs_inode->open_file_lock); 2404 list_move_tail(&inv_file->flist, &cifs_inode->openFileList); 2405 spin_unlock(&cifs_inode->open_file_lock); 2406 cifsFileInfo_put(inv_file); 2407 ++refind; 2408 inv_file = NULL; 2409 spin_lock(&cifs_inode->open_file_lock); 2410 goto refind_writable; 2411 } 2412 2413 return rc; 2414 } 2415 2416 struct cifsFileInfo * 2417 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags) 2418 { 2419 struct cifsFileInfo *cfile; 2420 int rc; 2421 2422 rc = cifs_get_writable_file(cifs_inode, flags, &cfile); 2423 if (rc) 2424 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc); 2425 2426 return cfile; 2427 } 2428 2429 int 2430 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, 2431 int flags, 2432 struct cifsFileInfo **ret_file) 2433 { 2434 struct cifsFileInfo *cfile; 2435 void *page = alloc_dentry_path(); 2436 2437 *ret_file = NULL; 2438 2439 spin_lock(&tcon->open_file_lock); 2440 list_for_each_entry(cfile, &tcon->openFileList, tlist) { 2441 struct cifsInodeInfo *cinode; 2442 const char *full_path = build_path_from_dentry(cfile->dentry, page); 2443 if (IS_ERR(full_path)) { 2444 spin_unlock(&tcon->open_file_lock); 2445 free_dentry_path(page); 2446 return PTR_ERR(full_path); 2447 } 2448 if (strcmp(full_path, name)) 2449 continue; 2450 2451 cinode = CIFS_I(d_inode(cfile->dentry)); 2452 spin_unlock(&tcon->open_file_lock); 2453 free_dentry_path(page); 2454 return cifs_get_writable_file(cinode, flags, ret_file); 2455 } 2456 2457 spin_unlock(&tcon->open_file_lock); 2458 free_dentry_path(page); 2459 return -ENOENT; 2460 } 2461 2462 int 2463 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, 2464 struct cifsFileInfo **ret_file) 2465 { 2466 struct cifsFileInfo *cfile; 2467 void *page = alloc_dentry_path(); 2468 2469 *ret_file = NULL; 2470 2471 spin_lock(&tcon->open_file_lock); 2472 list_for_each_entry(cfile, &tcon->openFileList, tlist) { 2473 struct cifsInodeInfo *cinode; 2474 const char *full_path = build_path_from_dentry(cfile->dentry, page); 2475 if (IS_ERR(full_path)) { 2476 spin_unlock(&tcon->open_file_lock); 2477 free_dentry_path(page); 2478 return PTR_ERR(full_path); 2479 } 2480 if (strcmp(full_path, name)) 2481 continue; 2482 2483 cinode = CIFS_I(d_inode(cfile->dentry)); 2484 spin_unlock(&tcon->open_file_lock); 2485 free_dentry_path(page); 2486 *ret_file = find_readable_file(cinode, 0); 2487 return *ret_file ? 0 : -ENOENT; 2488 } 2489 2490 spin_unlock(&tcon->open_file_lock); 2491 free_dentry_path(page); 2492 return -ENOENT; 2493 } 2494 2495 void 2496 cifs_writedata_release(struct kref *refcount) 2497 { 2498 struct cifs_writedata *wdata = container_of(refcount, 2499 struct cifs_writedata, refcount); 2500 #ifdef CONFIG_CIFS_SMB_DIRECT 2501 if (wdata->mr) { 2502 smbd_deregister_mr(wdata->mr); 2503 wdata->mr = NULL; 2504 } 2505 #endif 2506 2507 if (wdata->cfile) 2508 cifsFileInfo_put(wdata->cfile); 2509 2510 kfree(wdata); 2511 } 2512 2513 /* 2514 * Write failed with a retryable error. Resend the write request. It's also 2515 * possible that the page was redirtied so re-clean the page. 2516 */ 2517 static void 2518 cifs_writev_requeue(struct cifs_writedata *wdata) 2519 { 2520 int rc = 0; 2521 struct inode *inode = d_inode(wdata->cfile->dentry); 2522 struct TCP_Server_Info *server; 2523 unsigned int rest_len = wdata->bytes; 2524 loff_t fpos = wdata->offset; 2525 2526 server = tlink_tcon(wdata->cfile->tlink)->ses->server; 2527 do { 2528 struct cifs_writedata *wdata2; 2529 unsigned int wsize, cur_len; 2530 2531 wsize = server->ops->wp_retry_size(inode); 2532 if (wsize < rest_len) { 2533 if (wsize < PAGE_SIZE) { 2534 rc = -EOPNOTSUPP; 2535 break; 2536 } 2537 cur_len = min(round_down(wsize, PAGE_SIZE), rest_len); 2538 } else { 2539 cur_len = rest_len; 2540 } 2541 2542 wdata2 = cifs_writedata_alloc(cifs_writev_complete); 2543 if (!wdata2) { 2544 rc = -ENOMEM; 2545 break; 2546 } 2547 2548 wdata2->sync_mode = wdata->sync_mode; 2549 wdata2->offset = fpos; 2550 wdata2->bytes = cur_len; 2551 wdata2->iter = wdata->iter; 2552 2553 iov_iter_advance(&wdata2->iter, fpos - wdata->offset); 2554 iov_iter_truncate(&wdata2->iter, wdata2->bytes); 2555 2556 if (iov_iter_is_xarray(&wdata2->iter)) 2557 /* Check for pages having been redirtied and clean 2558 * them. We can do this by walking the xarray. If 2559 * it's not an xarray, then it's a DIO and we shouldn't 2560 * be mucking around with the page bits. 2561 */ 2562 cifs_undirty_folios(inode, fpos, cur_len); 2563 2564 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, 2565 &wdata2->cfile); 2566 if (!wdata2->cfile) { 2567 cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n", 2568 rc); 2569 if (!is_retryable_error(rc)) 2570 rc = -EBADF; 2571 } else { 2572 wdata2->pid = wdata2->cfile->pid; 2573 rc = server->ops->async_writev(wdata2, 2574 cifs_writedata_release); 2575 } 2576 2577 kref_put(&wdata2->refcount, cifs_writedata_release); 2578 if (rc) { 2579 if (is_retryable_error(rc)) 2580 continue; 2581 fpos += cur_len; 2582 rest_len -= cur_len; 2583 break; 2584 } 2585 2586 fpos += cur_len; 2587 rest_len -= cur_len; 2588 } while (rest_len > 0); 2589 2590 /* Clean up remaining pages from the original wdata */ 2591 if (iov_iter_is_xarray(&wdata->iter)) 2592 cifs_pages_write_failed(inode, fpos, rest_len); 2593 2594 if (rc != 0 && !is_retryable_error(rc)) 2595 mapping_set_error(inode->i_mapping, rc); 2596 kref_put(&wdata->refcount, cifs_writedata_release); 2597 } 2598 2599 void 2600 cifs_writev_complete(struct work_struct *work) 2601 { 2602 struct cifs_writedata *wdata = container_of(work, 2603 struct cifs_writedata, work); 2604 struct inode *inode = d_inode(wdata->cfile->dentry); 2605 2606 if (wdata->result == 0) { 2607 spin_lock(&inode->i_lock); 2608 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes); 2609 spin_unlock(&inode->i_lock); 2610 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink), 2611 wdata->bytes); 2612 } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN) 2613 return cifs_writev_requeue(wdata); 2614 2615 if (wdata->result == -EAGAIN) 2616 cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes); 2617 else if (wdata->result < 0) 2618 cifs_pages_write_failed(inode, wdata->offset, wdata->bytes); 2619 else 2620 cifs_pages_written_back(inode, wdata->offset, wdata->bytes); 2621 2622 if (wdata->result != -EAGAIN) 2623 mapping_set_error(inode->i_mapping, wdata->result); 2624 kref_put(&wdata->refcount, cifs_writedata_release); 2625 } 2626 2627 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete) 2628 { 2629 struct cifs_writedata *wdata; 2630 2631 wdata = kzalloc(sizeof(*wdata), GFP_NOFS); 2632 if (wdata != NULL) { 2633 kref_init(&wdata->refcount); 2634 INIT_LIST_HEAD(&wdata->list); 2635 init_completion(&wdata->done); 2636 INIT_WORK(&wdata->work, complete); 2637 } 2638 return wdata; 2639 } 2640 2641 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) 2642 { 2643 struct address_space *mapping = page->mapping; 2644 loff_t offset = (loff_t)page->index << PAGE_SHIFT; 2645 char *write_data; 2646 int rc = -EFAULT; 2647 int bytes_written = 0; 2648 struct inode *inode; 2649 struct cifsFileInfo *open_file; 2650 2651 if (!mapping || !mapping->host) 2652 return -EFAULT; 2653 2654 inode = page->mapping->host; 2655 2656 offset += (loff_t)from; 2657 write_data = kmap(page); 2658 write_data += from; 2659 2660 if ((to > PAGE_SIZE) || (from > to)) { 2661 kunmap(page); 2662 return -EIO; 2663 } 2664 2665 /* racing with truncate? */ 2666 if (offset > mapping->host->i_size) { 2667 kunmap(page); 2668 return 0; /* don't care */ 2669 } 2670 2671 /* check to make sure that we are not extending the file */ 2672 if (mapping->host->i_size - offset < (loff_t)to) 2673 to = (unsigned)(mapping->host->i_size - offset); 2674 2675 rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY, 2676 &open_file); 2677 if (!rc) { 2678 bytes_written = cifs_write(open_file, open_file->pid, 2679 write_data, to - from, &offset); 2680 cifsFileInfo_put(open_file); 2681 /* Does mm or vfs already set times? */ 2682 simple_inode_init_ts(inode); 2683 if ((bytes_written > 0) && (offset)) 2684 rc = 0; 2685 else if (bytes_written < 0) 2686 rc = bytes_written; 2687 else 2688 rc = -EFAULT; 2689 } else { 2690 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc); 2691 if (!is_retryable_error(rc)) 2692 rc = -EIO; 2693 } 2694 2695 kunmap(page); 2696 return rc; 2697 } 2698 2699 /* 2700 * Extend the region to be written back to include subsequent contiguously 2701 * dirty pages if possible, but don't sleep while doing so. 2702 */ 2703 static void cifs_extend_writeback(struct address_space *mapping, 2704 struct xa_state *xas, 2705 long *_count, 2706 loff_t start, 2707 int max_pages, 2708 loff_t max_len, 2709 size_t *_len) 2710 { 2711 struct folio_batch batch; 2712 struct folio *folio; 2713 unsigned int nr_pages; 2714 pgoff_t index = (start + *_len) / PAGE_SIZE; 2715 size_t len; 2716 bool stop = true; 2717 unsigned int i; 2718 2719 folio_batch_init(&batch); 2720 2721 do { 2722 /* Firstly, we gather up a batch of contiguous dirty pages 2723 * under the RCU read lock - but we can't clear the dirty flags 2724 * there if any of those pages are mapped. 2725 */ 2726 rcu_read_lock(); 2727 2728 xas_for_each(xas, folio, ULONG_MAX) { 2729 stop = true; 2730 if (xas_retry(xas, folio)) 2731 continue; 2732 if (xa_is_value(folio)) 2733 break; 2734 if (folio->index != index) { 2735 xas_reset(xas); 2736 break; 2737 } 2738 2739 if (!folio_try_get_rcu(folio)) { 2740 xas_reset(xas); 2741 continue; 2742 } 2743 nr_pages = folio_nr_pages(folio); 2744 if (nr_pages > max_pages) { 2745 xas_reset(xas); 2746 break; 2747 } 2748 2749 /* Has the page moved or been split? */ 2750 if (unlikely(folio != xas_reload(xas))) { 2751 folio_put(folio); 2752 xas_reset(xas); 2753 break; 2754 } 2755 2756 if (!folio_trylock(folio)) { 2757 folio_put(folio); 2758 xas_reset(xas); 2759 break; 2760 } 2761 if (!folio_test_dirty(folio) || 2762 folio_test_writeback(folio)) { 2763 folio_unlock(folio); 2764 folio_put(folio); 2765 xas_reset(xas); 2766 break; 2767 } 2768 2769 max_pages -= nr_pages; 2770 len = folio_size(folio); 2771 stop = false; 2772 2773 index += nr_pages; 2774 *_count -= nr_pages; 2775 *_len += len; 2776 if (max_pages <= 0 || *_len >= max_len || *_count <= 0) 2777 stop = true; 2778 2779 if (!folio_batch_add(&batch, folio)) 2780 break; 2781 if (stop) 2782 break; 2783 } 2784 2785 xas_pause(xas); 2786 rcu_read_unlock(); 2787 2788 /* Now, if we obtained any pages, we can shift them to being 2789 * writable and mark them for caching. 2790 */ 2791 if (!folio_batch_count(&batch)) 2792 break; 2793 2794 for (i = 0; i < folio_batch_count(&batch); i++) { 2795 folio = batch.folios[i]; 2796 /* The folio should be locked, dirty and not undergoing 2797 * writeback from the loop above. 2798 */ 2799 if (!folio_clear_dirty_for_io(folio)) 2800 WARN_ON(1); 2801 folio_start_writeback(folio); 2802 folio_unlock(folio); 2803 } 2804 2805 folio_batch_release(&batch); 2806 cond_resched(); 2807 } while (!stop); 2808 } 2809 2810 /* 2811 * Write back the locked page and any subsequent non-locked dirty pages. 2812 */ 2813 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping, 2814 struct writeback_control *wbc, 2815 struct xa_state *xas, 2816 struct folio *folio, 2817 unsigned long long start, 2818 unsigned long long end) 2819 { 2820 struct inode *inode = mapping->host; 2821 struct TCP_Server_Info *server; 2822 struct cifs_writedata *wdata; 2823 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 2824 struct cifs_credits credits_on_stack; 2825 struct cifs_credits *credits = &credits_on_stack; 2826 struct cifsFileInfo *cfile = NULL; 2827 unsigned long long i_size = i_size_read(inode), max_len; 2828 unsigned int xid, wsize; 2829 size_t len = folio_size(folio); 2830 long count = wbc->nr_to_write; 2831 int rc; 2832 2833 /* The folio should be locked, dirty and not undergoing writeback. */ 2834 if (!folio_clear_dirty_for_io(folio)) 2835 WARN_ON_ONCE(1); 2836 folio_start_writeback(folio); 2837 2838 count -= folio_nr_pages(folio); 2839 2840 xid = get_xid(); 2841 server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses); 2842 2843 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile); 2844 if (rc) { 2845 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc); 2846 goto err_xid; 2847 } 2848 2849 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, 2850 &wsize, credits); 2851 if (rc != 0) 2852 goto err_close; 2853 2854 wdata = cifs_writedata_alloc(cifs_writev_complete); 2855 if (!wdata) { 2856 rc = -ENOMEM; 2857 goto err_uncredit; 2858 } 2859 2860 wdata->sync_mode = wbc->sync_mode; 2861 wdata->offset = folio_pos(folio); 2862 wdata->pid = cfile->pid; 2863 wdata->credits = credits_on_stack; 2864 wdata->cfile = cfile; 2865 wdata->server = server; 2866 cfile = NULL; 2867 2868 /* Find all consecutive lockable dirty pages that have contiguous 2869 * written regions, stopping when we find a page that is not 2870 * immediately lockable, is not dirty or is missing, or we reach the 2871 * end of the range. 2872 */ 2873 if (start < i_size) { 2874 /* Trim the write to the EOF; the extra data is ignored. Also 2875 * put an upper limit on the size of a single storedata op. 2876 */ 2877 max_len = wsize; 2878 max_len = min_t(unsigned long long, max_len, end - start + 1); 2879 max_len = min_t(unsigned long long, max_len, i_size - start); 2880 2881 if (len < max_len) { 2882 int max_pages = INT_MAX; 2883 2884 #ifdef CONFIG_CIFS_SMB_DIRECT 2885 if (server->smbd_conn) 2886 max_pages = server->smbd_conn->max_frmr_depth; 2887 #endif 2888 max_pages -= folio_nr_pages(folio); 2889 2890 if (max_pages > 0) 2891 cifs_extend_writeback(mapping, xas, &count, start, 2892 max_pages, max_len, &len); 2893 } 2894 } 2895 len = min_t(unsigned long long, len, i_size - start); 2896 2897 /* We now have a contiguous set of dirty pages, each with writeback 2898 * set; the first page is still locked at this point, but all the rest 2899 * have been unlocked. 2900 */ 2901 folio_unlock(folio); 2902 wdata->bytes = len; 2903 2904 if (start < i_size) { 2905 iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages, 2906 start, len); 2907 2908 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes); 2909 if (rc) 2910 goto err_wdata; 2911 2912 if (wdata->cfile->invalidHandle) 2913 rc = -EAGAIN; 2914 else 2915 rc = wdata->server->ops->async_writev(wdata, 2916 cifs_writedata_release); 2917 if (rc >= 0) { 2918 kref_put(&wdata->refcount, cifs_writedata_release); 2919 goto err_close; 2920 } 2921 } else { 2922 /* The dirty region was entirely beyond the EOF. */ 2923 cifs_pages_written_back(inode, start, len); 2924 rc = 0; 2925 } 2926 2927 err_wdata: 2928 kref_put(&wdata->refcount, cifs_writedata_release); 2929 err_uncredit: 2930 add_credits_and_wake_if(server, credits, 0); 2931 err_close: 2932 if (cfile) 2933 cifsFileInfo_put(cfile); 2934 err_xid: 2935 free_xid(xid); 2936 if (rc == 0) { 2937 wbc->nr_to_write = count; 2938 rc = len; 2939 } else if (is_retryable_error(rc)) { 2940 cifs_pages_write_redirty(inode, start, len); 2941 } else { 2942 cifs_pages_write_failed(inode, start, len); 2943 mapping_set_error(mapping, rc); 2944 } 2945 /* Indication to update ctime and mtime as close is deferred */ 2946 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 2947 return rc; 2948 } 2949 2950 /* 2951 * write a region of pages back to the server 2952 */ 2953 static ssize_t cifs_writepages_begin(struct address_space *mapping, 2954 struct writeback_control *wbc, 2955 struct xa_state *xas, 2956 unsigned long long *_start, 2957 unsigned long long end) 2958 { 2959 struct folio *folio; 2960 unsigned long long start = *_start; 2961 ssize_t ret; 2962 int skips = 0; 2963 2964 search_again: 2965 /* Find the first dirty page. */ 2966 rcu_read_lock(); 2967 2968 for (;;) { 2969 folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY); 2970 if (xas_retry(xas, folio) || xa_is_value(folio)) 2971 continue; 2972 if (!folio) 2973 break; 2974 2975 if (!folio_try_get_rcu(folio)) { 2976 xas_reset(xas); 2977 continue; 2978 } 2979 2980 if (unlikely(folio != xas_reload(xas))) { 2981 folio_put(folio); 2982 xas_reset(xas); 2983 continue; 2984 } 2985 2986 xas_pause(xas); 2987 break; 2988 } 2989 rcu_read_unlock(); 2990 if (!folio) 2991 return 0; 2992 2993 start = folio_pos(folio); /* May regress with THPs */ 2994 2995 /* At this point we hold neither the i_pages lock nor the page lock: 2996 * the page may be truncated or invalidated (changing page->mapping to 2997 * NULL), or even swizzled back from swapper_space to tmpfs file 2998 * mapping 2999 */ 3000 lock_again: 3001 if (wbc->sync_mode != WB_SYNC_NONE) { 3002 ret = folio_lock_killable(folio); 3003 if (ret < 0) 3004 return ret; 3005 } else { 3006 if (!folio_trylock(folio)) 3007 goto search_again; 3008 } 3009 3010 if (folio->mapping != mapping || 3011 !folio_test_dirty(folio)) { 3012 start += folio_size(folio); 3013 folio_unlock(folio); 3014 goto search_again; 3015 } 3016 3017 if (folio_test_writeback(folio) || 3018 folio_test_fscache(folio)) { 3019 folio_unlock(folio); 3020 if (wbc->sync_mode != WB_SYNC_NONE) { 3021 folio_wait_writeback(folio); 3022 #ifdef CONFIG_CIFS_FSCACHE 3023 folio_wait_fscache(folio); 3024 #endif 3025 goto lock_again; 3026 } 3027 3028 start += folio_size(folio); 3029 if (wbc->sync_mode == WB_SYNC_NONE) { 3030 if (skips >= 5 || need_resched()) { 3031 ret = 0; 3032 goto out; 3033 } 3034 skips++; 3035 } 3036 goto search_again; 3037 } 3038 3039 ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end); 3040 out: 3041 if (ret > 0) 3042 *_start = start + ret; 3043 return ret; 3044 } 3045 3046 /* 3047 * Write a region of pages back to the server 3048 */ 3049 static int cifs_writepages_region(struct address_space *mapping, 3050 struct writeback_control *wbc, 3051 unsigned long long *_start, 3052 unsigned long long end) 3053 { 3054 ssize_t ret; 3055 3056 XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE); 3057 3058 do { 3059 ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end); 3060 if (ret > 0 && wbc->nr_to_write > 0) 3061 cond_resched(); 3062 } while (ret > 0 && wbc->nr_to_write > 0); 3063 3064 return ret > 0 ? 0 : ret; 3065 } 3066 3067 /* 3068 * Write some of the pending data back to the server 3069 */ 3070 static int cifs_writepages(struct address_space *mapping, 3071 struct writeback_control *wbc) 3072 { 3073 loff_t start, end; 3074 int ret; 3075 3076 /* We have to be careful as we can end up racing with setattr() 3077 * truncating the pagecache since the caller doesn't take a lock here 3078 * to prevent it. 3079 */ 3080 3081 if (wbc->range_cyclic && mapping->writeback_index) { 3082 start = mapping->writeback_index * PAGE_SIZE; 3083 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX); 3084 if (ret < 0) 3085 goto out; 3086 3087 if (wbc->nr_to_write <= 0) { 3088 mapping->writeback_index = start / PAGE_SIZE; 3089 goto out; 3090 } 3091 3092 start = 0; 3093 end = mapping->writeback_index * PAGE_SIZE; 3094 mapping->writeback_index = 0; 3095 ret = cifs_writepages_region(mapping, wbc, &start, end); 3096 if (ret == 0) 3097 mapping->writeback_index = start / PAGE_SIZE; 3098 } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { 3099 start = 0; 3100 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX); 3101 if (wbc->nr_to_write > 0 && ret == 0) 3102 mapping->writeback_index = start / PAGE_SIZE; 3103 } else { 3104 start = wbc->range_start; 3105 ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end); 3106 } 3107 3108 out: 3109 return ret; 3110 } 3111 3112 static int 3113 cifs_writepage_locked(struct page *page, struct writeback_control *wbc) 3114 { 3115 int rc; 3116 unsigned int xid; 3117 3118 xid = get_xid(); 3119 /* BB add check for wbc flags */ 3120 get_page(page); 3121 if (!PageUptodate(page)) 3122 cifs_dbg(FYI, "ppw - page not up to date\n"); 3123 3124 /* 3125 * Set the "writeback" flag, and clear "dirty" in the radix tree. 3126 * 3127 * A writepage() implementation always needs to do either this, 3128 * or re-dirty the page with "redirty_page_for_writepage()" in 3129 * the case of a failure. 3130 * 3131 * Just unlocking the page will cause the radix tree tag-bits 3132 * to fail to update with the state of the page correctly. 3133 */ 3134 set_page_writeback(page); 3135 retry_write: 3136 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE); 3137 if (is_retryable_error(rc)) { 3138 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) 3139 goto retry_write; 3140 redirty_page_for_writepage(wbc, page); 3141 } else if (rc != 0) { 3142 SetPageError(page); 3143 mapping_set_error(page->mapping, rc); 3144 } else { 3145 SetPageUptodate(page); 3146 } 3147 end_page_writeback(page); 3148 put_page(page); 3149 free_xid(xid); 3150 return rc; 3151 } 3152 3153 static int cifs_write_end(struct file *file, struct address_space *mapping, 3154 loff_t pos, unsigned len, unsigned copied, 3155 struct page *page, void *fsdata) 3156 { 3157 int rc; 3158 struct inode *inode = mapping->host; 3159 struct cifsFileInfo *cfile = file->private_data; 3160 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 3161 struct folio *folio = page_folio(page); 3162 __u32 pid; 3163 3164 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3165 pid = cfile->pid; 3166 else 3167 pid = current->tgid; 3168 3169 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n", 3170 page, pos, copied); 3171 3172 if (folio_test_checked(folio)) { 3173 if (copied == len) 3174 folio_mark_uptodate(folio); 3175 folio_clear_checked(folio); 3176 } else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE) 3177 folio_mark_uptodate(folio); 3178 3179 if (!folio_test_uptodate(folio)) { 3180 char *page_data; 3181 unsigned offset = pos & (PAGE_SIZE - 1); 3182 unsigned int xid; 3183 3184 xid = get_xid(); 3185 /* this is probably better than directly calling 3186 partialpage_write since in this function the file handle is 3187 known which we might as well leverage */ 3188 /* BB check if anything else missing out of ppw 3189 such as updating last write time */ 3190 page_data = kmap(page); 3191 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos); 3192 /* if (rc < 0) should we set writebehind rc? */ 3193 kunmap(page); 3194 3195 free_xid(xid); 3196 } else { 3197 rc = copied; 3198 pos += copied; 3199 set_page_dirty(page); 3200 } 3201 3202 if (rc > 0) { 3203 spin_lock(&inode->i_lock); 3204 if (pos > inode->i_size) { 3205 i_size_write(inode, pos); 3206 inode->i_blocks = (512 - 1 + pos) >> 9; 3207 } 3208 spin_unlock(&inode->i_lock); 3209 } 3210 3211 unlock_page(page); 3212 put_page(page); 3213 /* Indication to update ctime and mtime as close is deferred */ 3214 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 3215 3216 return rc; 3217 } 3218 3219 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, 3220 int datasync) 3221 { 3222 unsigned int xid; 3223 int rc = 0; 3224 struct cifs_tcon *tcon; 3225 struct TCP_Server_Info *server; 3226 struct cifsFileInfo *smbfile = file->private_data; 3227 struct inode *inode = file_inode(file); 3228 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 3229 3230 rc = file_write_and_wait_range(file, start, end); 3231 if (rc) { 3232 trace_cifs_fsync_err(inode->i_ino, rc); 3233 return rc; 3234 } 3235 3236 xid = get_xid(); 3237 3238 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", 3239 file, datasync); 3240 3241 if (!CIFS_CACHE_READ(CIFS_I(inode))) { 3242 rc = cifs_zap_mapping(inode); 3243 if (rc) { 3244 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc); 3245 rc = 0; /* don't care about it in fsync */ 3246 } 3247 } 3248 3249 tcon = tlink_tcon(smbfile->tlink); 3250 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { 3251 server = tcon->ses->server; 3252 if (server->ops->flush == NULL) { 3253 rc = -ENOSYS; 3254 goto strict_fsync_exit; 3255 } 3256 3257 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { 3258 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); 3259 if (smbfile) { 3260 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3261 cifsFileInfo_put(smbfile); 3262 } else 3263 cifs_dbg(FYI, "ignore fsync for file not open for write\n"); 3264 } else 3265 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3266 } 3267 3268 strict_fsync_exit: 3269 free_xid(xid); 3270 return rc; 3271 } 3272 3273 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) 3274 { 3275 unsigned int xid; 3276 int rc = 0; 3277 struct cifs_tcon *tcon; 3278 struct TCP_Server_Info *server; 3279 struct cifsFileInfo *smbfile = file->private_data; 3280 struct inode *inode = file_inode(file); 3281 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); 3282 3283 rc = file_write_and_wait_range(file, start, end); 3284 if (rc) { 3285 trace_cifs_fsync_err(file_inode(file)->i_ino, rc); 3286 return rc; 3287 } 3288 3289 xid = get_xid(); 3290 3291 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", 3292 file, datasync); 3293 3294 tcon = tlink_tcon(smbfile->tlink); 3295 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { 3296 server = tcon->ses->server; 3297 if (server->ops->flush == NULL) { 3298 rc = -ENOSYS; 3299 goto fsync_exit; 3300 } 3301 3302 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { 3303 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); 3304 if (smbfile) { 3305 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3306 cifsFileInfo_put(smbfile); 3307 } else 3308 cifs_dbg(FYI, "ignore fsync for file not open for write\n"); 3309 } else 3310 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3311 } 3312 3313 fsync_exit: 3314 free_xid(xid); 3315 return rc; 3316 } 3317 3318 /* 3319 * As file closes, flush all cached write data for this inode checking 3320 * for write behind errors. 3321 */ 3322 int cifs_flush(struct file *file, fl_owner_t id) 3323 { 3324 struct inode *inode = file_inode(file); 3325 int rc = 0; 3326 3327 if (file->f_mode & FMODE_WRITE) 3328 rc = filemap_write_and_wait(inode->i_mapping); 3329 3330 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc); 3331 if (rc) { 3332 /* get more nuanced writeback errors */ 3333 rc = filemap_check_wb_err(file->f_mapping, 0); 3334 trace_cifs_flush_err(inode->i_ino, rc); 3335 } 3336 return rc; 3337 } 3338 3339 static void 3340 cifs_uncached_writedata_release(struct kref *refcount) 3341 { 3342 struct cifs_writedata *wdata = container_of(refcount, 3343 struct cifs_writedata, refcount); 3344 3345 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release); 3346 cifs_writedata_release(refcount); 3347 } 3348 3349 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx); 3350 3351 static void 3352 cifs_uncached_writev_complete(struct work_struct *work) 3353 { 3354 struct cifs_writedata *wdata = container_of(work, 3355 struct cifs_writedata, work); 3356 struct inode *inode = d_inode(wdata->cfile->dentry); 3357 struct cifsInodeInfo *cifsi = CIFS_I(inode); 3358 3359 spin_lock(&inode->i_lock); 3360 cifs_update_eof(cifsi, wdata->offset, wdata->bytes); 3361 if (cifsi->server_eof > inode->i_size) 3362 i_size_write(inode, cifsi->server_eof); 3363 spin_unlock(&inode->i_lock); 3364 3365 complete(&wdata->done); 3366 collect_uncached_write_data(wdata->ctx); 3367 /* the below call can possibly free the last ref to aio ctx */ 3368 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3369 } 3370 3371 static int 3372 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list, 3373 struct cifs_aio_ctx *ctx) 3374 { 3375 unsigned int wsize; 3376 struct cifs_credits credits; 3377 int rc; 3378 struct TCP_Server_Info *server = wdata->server; 3379 3380 do { 3381 if (wdata->cfile->invalidHandle) { 3382 rc = cifs_reopen_file(wdata->cfile, false); 3383 if (rc == -EAGAIN) 3384 continue; 3385 else if (rc) 3386 break; 3387 } 3388 3389 3390 /* 3391 * Wait for credits to resend this wdata. 3392 * Note: we are attempting to resend the whole wdata not in 3393 * segments 3394 */ 3395 do { 3396 rc = server->ops->wait_mtu_credits(server, wdata->bytes, 3397 &wsize, &credits); 3398 if (rc) 3399 goto fail; 3400 3401 if (wsize < wdata->bytes) { 3402 add_credits_and_wake_if(server, &credits, 0); 3403 msleep(1000); 3404 } 3405 } while (wsize < wdata->bytes); 3406 wdata->credits = credits; 3407 3408 rc = adjust_credits(server, &wdata->credits, wdata->bytes); 3409 3410 if (!rc) { 3411 if (wdata->cfile->invalidHandle) 3412 rc = -EAGAIN; 3413 else { 3414 #ifdef CONFIG_CIFS_SMB_DIRECT 3415 if (wdata->mr) { 3416 wdata->mr->need_invalidate = true; 3417 smbd_deregister_mr(wdata->mr); 3418 wdata->mr = NULL; 3419 } 3420 #endif 3421 rc = server->ops->async_writev(wdata, 3422 cifs_uncached_writedata_release); 3423 } 3424 } 3425 3426 /* If the write was successfully sent, we are done */ 3427 if (!rc) { 3428 list_add_tail(&wdata->list, wdata_list); 3429 return 0; 3430 } 3431 3432 /* Roll back credits and retry if needed */ 3433 add_credits_and_wake_if(server, &wdata->credits, 0); 3434 } while (rc == -EAGAIN); 3435 3436 fail: 3437 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3438 return rc; 3439 } 3440 3441 /* 3442 * Select span of a bvec iterator we're going to use. Limit it by both maximum 3443 * size and maximum number of segments. 3444 */ 3445 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size, 3446 size_t max_segs, unsigned int *_nsegs) 3447 { 3448 const struct bio_vec *bvecs = iter->bvec; 3449 unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0; 3450 size_t len, span = 0, n = iter->count; 3451 size_t skip = iter->iov_offset; 3452 3453 if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0) 3454 return 0; 3455 3456 while (n && ix < nbv && skip) { 3457 len = bvecs[ix].bv_len; 3458 if (skip < len) 3459 break; 3460 skip -= len; 3461 n -= len; 3462 ix++; 3463 } 3464 3465 while (n && ix < nbv) { 3466 len = min3(n, bvecs[ix].bv_len - skip, max_size); 3467 span += len; 3468 max_size -= len; 3469 nsegs++; 3470 ix++; 3471 if (max_size == 0 || nsegs >= max_segs) 3472 break; 3473 skip = 0; 3474 n -= len; 3475 } 3476 3477 *_nsegs = nsegs; 3478 return span; 3479 } 3480 3481 static int 3482 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from, 3483 struct cifsFileInfo *open_file, 3484 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list, 3485 struct cifs_aio_ctx *ctx) 3486 { 3487 int rc = 0; 3488 size_t cur_len, max_len; 3489 struct cifs_writedata *wdata; 3490 pid_t pid; 3491 struct TCP_Server_Info *server; 3492 unsigned int xid, max_segs = INT_MAX; 3493 3494 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3495 pid = open_file->pid; 3496 else 3497 pid = current->tgid; 3498 3499 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 3500 xid = get_xid(); 3501 3502 #ifdef CONFIG_CIFS_SMB_DIRECT 3503 if (server->smbd_conn) 3504 max_segs = server->smbd_conn->max_frmr_depth; 3505 #endif 3506 3507 do { 3508 struct cifs_credits credits_on_stack; 3509 struct cifs_credits *credits = &credits_on_stack; 3510 unsigned int wsize, nsegs = 0; 3511 3512 if (signal_pending(current)) { 3513 rc = -EINTR; 3514 break; 3515 } 3516 3517 if (open_file->invalidHandle) { 3518 rc = cifs_reopen_file(open_file, false); 3519 if (rc == -EAGAIN) 3520 continue; 3521 else if (rc) 3522 break; 3523 } 3524 3525 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, 3526 &wsize, credits); 3527 if (rc) 3528 break; 3529 3530 max_len = min_t(const size_t, len, wsize); 3531 if (!max_len) { 3532 rc = -EAGAIN; 3533 add_credits_and_wake_if(server, credits, 0); 3534 break; 3535 } 3536 3537 cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs); 3538 cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n", 3539 cur_len, max_len, nsegs, from->nr_segs, max_segs); 3540 if (cur_len == 0) { 3541 rc = -EIO; 3542 add_credits_and_wake_if(server, credits, 0); 3543 break; 3544 } 3545 3546 wdata = cifs_writedata_alloc(cifs_uncached_writev_complete); 3547 if (!wdata) { 3548 rc = -ENOMEM; 3549 add_credits_and_wake_if(server, credits, 0); 3550 break; 3551 } 3552 3553 wdata->sync_mode = WB_SYNC_ALL; 3554 wdata->offset = (__u64)fpos; 3555 wdata->cfile = cifsFileInfo_get(open_file); 3556 wdata->server = server; 3557 wdata->pid = pid; 3558 wdata->bytes = cur_len; 3559 wdata->credits = credits_on_stack; 3560 wdata->iter = *from; 3561 wdata->ctx = ctx; 3562 kref_get(&ctx->refcount); 3563 3564 iov_iter_truncate(&wdata->iter, cur_len); 3565 3566 rc = adjust_credits(server, &wdata->credits, wdata->bytes); 3567 3568 if (!rc) { 3569 if (wdata->cfile->invalidHandle) 3570 rc = -EAGAIN; 3571 else 3572 rc = server->ops->async_writev(wdata, 3573 cifs_uncached_writedata_release); 3574 } 3575 3576 if (rc) { 3577 add_credits_and_wake_if(server, &wdata->credits, 0); 3578 kref_put(&wdata->refcount, 3579 cifs_uncached_writedata_release); 3580 if (rc == -EAGAIN) 3581 continue; 3582 break; 3583 } 3584 3585 list_add_tail(&wdata->list, wdata_list); 3586 iov_iter_advance(from, cur_len); 3587 fpos += cur_len; 3588 len -= cur_len; 3589 } while (len > 0); 3590 3591 free_xid(xid); 3592 return rc; 3593 } 3594 3595 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx) 3596 { 3597 struct cifs_writedata *wdata, *tmp; 3598 struct cifs_tcon *tcon; 3599 struct cifs_sb_info *cifs_sb; 3600 struct dentry *dentry = ctx->cfile->dentry; 3601 ssize_t rc; 3602 3603 tcon = tlink_tcon(ctx->cfile->tlink); 3604 cifs_sb = CIFS_SB(dentry->d_sb); 3605 3606 mutex_lock(&ctx->aio_mutex); 3607 3608 if (list_empty(&ctx->list)) { 3609 mutex_unlock(&ctx->aio_mutex); 3610 return; 3611 } 3612 3613 rc = ctx->rc; 3614 /* 3615 * Wait for and collect replies for any successful sends in order of 3616 * increasing offset. Once an error is hit, then return without waiting 3617 * for any more replies. 3618 */ 3619 restart_loop: 3620 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) { 3621 if (!rc) { 3622 if (!try_wait_for_completion(&wdata->done)) { 3623 mutex_unlock(&ctx->aio_mutex); 3624 return; 3625 } 3626 3627 if (wdata->result) 3628 rc = wdata->result; 3629 else 3630 ctx->total_len += wdata->bytes; 3631 3632 /* resend call if it's a retryable error */ 3633 if (rc == -EAGAIN) { 3634 struct list_head tmp_list; 3635 struct iov_iter tmp_from = ctx->iter; 3636 3637 INIT_LIST_HEAD(&tmp_list); 3638 list_del_init(&wdata->list); 3639 3640 if (ctx->direct_io) 3641 rc = cifs_resend_wdata( 3642 wdata, &tmp_list, ctx); 3643 else { 3644 iov_iter_advance(&tmp_from, 3645 wdata->offset - ctx->pos); 3646 3647 rc = cifs_write_from_iter(wdata->offset, 3648 wdata->bytes, &tmp_from, 3649 ctx->cfile, cifs_sb, &tmp_list, 3650 ctx); 3651 3652 kref_put(&wdata->refcount, 3653 cifs_uncached_writedata_release); 3654 } 3655 3656 list_splice(&tmp_list, &ctx->list); 3657 goto restart_loop; 3658 } 3659 } 3660 list_del_init(&wdata->list); 3661 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3662 } 3663 3664 cifs_stats_bytes_written(tcon, ctx->total_len); 3665 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags); 3666 3667 ctx->rc = (rc == 0) ? ctx->total_len : rc; 3668 3669 mutex_unlock(&ctx->aio_mutex); 3670 3671 if (ctx->iocb && ctx->iocb->ki_complete) 3672 ctx->iocb->ki_complete(ctx->iocb, ctx->rc); 3673 else 3674 complete(&ctx->done); 3675 } 3676 3677 static ssize_t __cifs_writev( 3678 struct kiocb *iocb, struct iov_iter *from, bool direct) 3679 { 3680 struct file *file = iocb->ki_filp; 3681 ssize_t total_written = 0; 3682 struct cifsFileInfo *cfile; 3683 struct cifs_tcon *tcon; 3684 struct cifs_sb_info *cifs_sb; 3685 struct cifs_aio_ctx *ctx; 3686 int rc; 3687 3688 rc = generic_write_checks(iocb, from); 3689 if (rc <= 0) 3690 return rc; 3691 3692 cifs_sb = CIFS_FILE_SB(file); 3693 cfile = file->private_data; 3694 tcon = tlink_tcon(cfile->tlink); 3695 3696 if (!tcon->ses->server->ops->async_writev) 3697 return -ENOSYS; 3698 3699 ctx = cifs_aio_ctx_alloc(); 3700 if (!ctx) 3701 return -ENOMEM; 3702 3703 ctx->cfile = cifsFileInfo_get(cfile); 3704 3705 if (!is_sync_kiocb(iocb)) 3706 ctx->iocb = iocb; 3707 3708 ctx->pos = iocb->ki_pos; 3709 ctx->direct_io = direct; 3710 ctx->nr_pinned_pages = 0; 3711 3712 if (user_backed_iter(from)) { 3713 /* 3714 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as 3715 * they contain references to the calling process's virtual 3716 * memory layout which won't be available in an async worker 3717 * thread. This also takes a pin on every folio involved. 3718 */ 3719 rc = netfs_extract_user_iter(from, iov_iter_count(from), 3720 &ctx->iter, 0); 3721 if (rc < 0) { 3722 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3723 return rc; 3724 } 3725 3726 ctx->nr_pinned_pages = rc; 3727 ctx->bv = (void *)ctx->iter.bvec; 3728 ctx->bv_need_unpin = iov_iter_extract_will_pin(from); 3729 } else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) && 3730 !is_sync_kiocb(iocb)) { 3731 /* 3732 * If the op is asynchronous, we need to copy the list attached 3733 * to a BVEC/KVEC-type iterator, but we assume that the storage 3734 * will be pinned by the caller; in any case, we may or may not 3735 * be able to pin the pages, so we don't try. 3736 */ 3737 ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL); 3738 if (!ctx->bv) { 3739 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3740 return -ENOMEM; 3741 } 3742 } else { 3743 /* 3744 * Otherwise, we just pass the iterator down as-is and rely on 3745 * the caller to make sure the pages referred to by the 3746 * iterator don't evaporate. 3747 */ 3748 ctx->iter = *from; 3749 } 3750 3751 ctx->len = iov_iter_count(&ctx->iter); 3752 3753 /* grab a lock here due to read response handlers can access ctx */ 3754 mutex_lock(&ctx->aio_mutex); 3755 3756 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter, 3757 cfile, cifs_sb, &ctx->list, ctx); 3758 3759 /* 3760 * If at least one write was successfully sent, then discard any rc 3761 * value from the later writes. If the other write succeeds, then 3762 * we'll end up returning whatever was written. If it fails, then 3763 * we'll get a new rc value from that. 3764 */ 3765 if (!list_empty(&ctx->list)) 3766 rc = 0; 3767 3768 mutex_unlock(&ctx->aio_mutex); 3769 3770 if (rc) { 3771 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3772 return rc; 3773 } 3774 3775 if (!is_sync_kiocb(iocb)) { 3776 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3777 return -EIOCBQUEUED; 3778 } 3779 3780 rc = wait_for_completion_killable(&ctx->done); 3781 if (rc) { 3782 mutex_lock(&ctx->aio_mutex); 3783 ctx->rc = rc = -EINTR; 3784 total_written = ctx->total_len; 3785 mutex_unlock(&ctx->aio_mutex); 3786 } else { 3787 rc = ctx->rc; 3788 total_written = ctx->total_len; 3789 } 3790 3791 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3792 3793 if (unlikely(!total_written)) 3794 return rc; 3795 3796 iocb->ki_pos += total_written; 3797 return total_written; 3798 } 3799 3800 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from) 3801 { 3802 struct file *file = iocb->ki_filp; 3803 3804 cifs_revalidate_mapping(file->f_inode); 3805 return __cifs_writev(iocb, from, true); 3806 } 3807 3808 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) 3809 { 3810 return __cifs_writev(iocb, from, false); 3811 } 3812 3813 static ssize_t 3814 cifs_writev(struct kiocb *iocb, struct iov_iter *from) 3815 { 3816 struct file *file = iocb->ki_filp; 3817 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 3818 struct inode *inode = file->f_mapping->host; 3819 struct cifsInodeInfo *cinode = CIFS_I(inode); 3820 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 3821 ssize_t rc; 3822 3823 inode_lock(inode); 3824 /* 3825 * We need to hold the sem to be sure nobody modifies lock list 3826 * with a brlock that prevents writing. 3827 */ 3828 down_read(&cinode->lock_sem); 3829 3830 rc = generic_write_checks(iocb, from); 3831 if (rc <= 0) 3832 goto out; 3833 3834 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), 3835 server->vals->exclusive_lock_type, 0, 3836 NULL, CIFS_WRITE_OP)) 3837 rc = __generic_file_write_iter(iocb, from); 3838 else 3839 rc = -EACCES; 3840 out: 3841 up_read(&cinode->lock_sem); 3842 inode_unlock(inode); 3843 3844 if (rc > 0) 3845 rc = generic_write_sync(iocb, rc); 3846 return rc; 3847 } 3848 3849 ssize_t 3850 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from) 3851 { 3852 struct inode *inode = file_inode(iocb->ki_filp); 3853 struct cifsInodeInfo *cinode = CIFS_I(inode); 3854 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 3855 struct cifsFileInfo *cfile = (struct cifsFileInfo *) 3856 iocb->ki_filp->private_data; 3857 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 3858 ssize_t written; 3859 3860 written = cifs_get_writer(cinode); 3861 if (written) 3862 return written; 3863 3864 if (CIFS_CACHE_WRITE(cinode)) { 3865 if (cap_unix(tcon->ses) && 3866 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) 3867 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { 3868 written = generic_file_write_iter(iocb, from); 3869 goto out; 3870 } 3871 written = cifs_writev(iocb, from); 3872 goto out; 3873 } 3874 /* 3875 * For non-oplocked files in strict cache mode we need to write the data 3876 * to the server exactly from the pos to pos+len-1 rather than flush all 3877 * affected pages because it may cause a error with mandatory locks on 3878 * these pages but not on the region from pos to ppos+len-1. 3879 */ 3880 written = cifs_user_writev(iocb, from); 3881 if (CIFS_CACHE_READ(cinode)) { 3882 /* 3883 * We have read level caching and we have just sent a write 3884 * request to the server thus making data in the cache stale. 3885 * Zap the cache and set oplock/lease level to NONE to avoid 3886 * reading stale data from the cache. All subsequent read 3887 * operations will read new data from the server. 3888 */ 3889 cifs_zap_mapping(inode); 3890 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n", 3891 inode); 3892 cinode->oplock = 0; 3893 } 3894 out: 3895 cifs_put_writer(cinode); 3896 return written; 3897 } 3898 3899 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete) 3900 { 3901 struct cifs_readdata *rdata; 3902 3903 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL); 3904 if (rdata) { 3905 kref_init(&rdata->refcount); 3906 INIT_LIST_HEAD(&rdata->list); 3907 init_completion(&rdata->done); 3908 INIT_WORK(&rdata->work, complete); 3909 } 3910 3911 return rdata; 3912 } 3913 3914 void 3915 cifs_readdata_release(struct kref *refcount) 3916 { 3917 struct cifs_readdata *rdata = container_of(refcount, 3918 struct cifs_readdata, refcount); 3919 3920 if (rdata->ctx) 3921 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release); 3922 #ifdef CONFIG_CIFS_SMB_DIRECT 3923 if (rdata->mr) { 3924 smbd_deregister_mr(rdata->mr); 3925 rdata->mr = NULL; 3926 } 3927 #endif 3928 if (rdata->cfile) 3929 cifsFileInfo_put(rdata->cfile); 3930 3931 kfree(rdata); 3932 } 3933 3934 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx); 3935 3936 static void 3937 cifs_uncached_readv_complete(struct work_struct *work) 3938 { 3939 struct cifs_readdata *rdata = container_of(work, 3940 struct cifs_readdata, work); 3941 3942 complete(&rdata->done); 3943 collect_uncached_read_data(rdata->ctx); 3944 /* the below call can possibly free the last ref to aio ctx */ 3945 kref_put(&rdata->refcount, cifs_readdata_release); 3946 } 3947 3948 static int cifs_resend_rdata(struct cifs_readdata *rdata, 3949 struct list_head *rdata_list, 3950 struct cifs_aio_ctx *ctx) 3951 { 3952 unsigned int rsize; 3953 struct cifs_credits credits; 3954 int rc; 3955 struct TCP_Server_Info *server; 3956 3957 /* XXX: should we pick a new channel here? */ 3958 server = rdata->server; 3959 3960 do { 3961 if (rdata->cfile->invalidHandle) { 3962 rc = cifs_reopen_file(rdata->cfile, true); 3963 if (rc == -EAGAIN) 3964 continue; 3965 else if (rc) 3966 break; 3967 } 3968 3969 /* 3970 * Wait for credits to resend this rdata. 3971 * Note: we are attempting to resend the whole rdata not in 3972 * segments 3973 */ 3974 do { 3975 rc = server->ops->wait_mtu_credits(server, rdata->bytes, 3976 &rsize, &credits); 3977 3978 if (rc) 3979 goto fail; 3980 3981 if (rsize < rdata->bytes) { 3982 add_credits_and_wake_if(server, &credits, 0); 3983 msleep(1000); 3984 } 3985 } while (rsize < rdata->bytes); 3986 rdata->credits = credits; 3987 3988 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 3989 if (!rc) { 3990 if (rdata->cfile->invalidHandle) 3991 rc = -EAGAIN; 3992 else { 3993 #ifdef CONFIG_CIFS_SMB_DIRECT 3994 if (rdata->mr) { 3995 rdata->mr->need_invalidate = true; 3996 smbd_deregister_mr(rdata->mr); 3997 rdata->mr = NULL; 3998 } 3999 #endif 4000 rc = server->ops->async_readv(rdata); 4001 } 4002 } 4003 4004 /* If the read was successfully sent, we are done */ 4005 if (!rc) { 4006 /* Add to aio pending list */ 4007 list_add_tail(&rdata->list, rdata_list); 4008 return 0; 4009 } 4010 4011 /* Roll back credits and retry if needed */ 4012 add_credits_and_wake_if(server, &rdata->credits, 0); 4013 } while (rc == -EAGAIN); 4014 4015 fail: 4016 kref_put(&rdata->refcount, cifs_readdata_release); 4017 return rc; 4018 } 4019 4020 static int 4021 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file, 4022 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list, 4023 struct cifs_aio_ctx *ctx) 4024 { 4025 struct cifs_readdata *rdata; 4026 unsigned int rsize, nsegs, max_segs = INT_MAX; 4027 struct cifs_credits credits_on_stack; 4028 struct cifs_credits *credits = &credits_on_stack; 4029 size_t cur_len, max_len; 4030 int rc; 4031 pid_t pid; 4032 struct TCP_Server_Info *server; 4033 4034 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 4035 4036 #ifdef CONFIG_CIFS_SMB_DIRECT 4037 if (server->smbd_conn) 4038 max_segs = server->smbd_conn->max_frmr_depth; 4039 #endif 4040 4041 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4042 pid = open_file->pid; 4043 else 4044 pid = current->tgid; 4045 4046 do { 4047 if (open_file->invalidHandle) { 4048 rc = cifs_reopen_file(open_file, true); 4049 if (rc == -EAGAIN) 4050 continue; 4051 else if (rc) 4052 break; 4053 } 4054 4055 if (cifs_sb->ctx->rsize == 0) 4056 cifs_sb->ctx->rsize = 4057 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), 4058 cifs_sb->ctx); 4059 4060 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, 4061 &rsize, credits); 4062 if (rc) 4063 break; 4064 4065 max_len = min_t(size_t, len, rsize); 4066 4067 cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len, 4068 max_segs, &nsegs); 4069 cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n", 4070 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs); 4071 if (cur_len == 0) { 4072 rc = -EIO; 4073 add_credits_and_wake_if(server, credits, 0); 4074 break; 4075 } 4076 4077 rdata = cifs_readdata_alloc(cifs_uncached_readv_complete); 4078 if (!rdata) { 4079 add_credits_and_wake_if(server, credits, 0); 4080 rc = -ENOMEM; 4081 break; 4082 } 4083 4084 rdata->server = server; 4085 rdata->cfile = cifsFileInfo_get(open_file); 4086 rdata->offset = fpos; 4087 rdata->bytes = cur_len; 4088 rdata->pid = pid; 4089 rdata->credits = credits_on_stack; 4090 rdata->ctx = ctx; 4091 kref_get(&ctx->refcount); 4092 4093 rdata->iter = ctx->iter; 4094 iov_iter_truncate(&rdata->iter, cur_len); 4095 4096 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 4097 4098 if (!rc) { 4099 if (rdata->cfile->invalidHandle) 4100 rc = -EAGAIN; 4101 else 4102 rc = server->ops->async_readv(rdata); 4103 } 4104 4105 if (rc) { 4106 add_credits_and_wake_if(server, &rdata->credits, 0); 4107 kref_put(&rdata->refcount, cifs_readdata_release); 4108 if (rc == -EAGAIN) 4109 continue; 4110 break; 4111 } 4112 4113 list_add_tail(&rdata->list, rdata_list); 4114 iov_iter_advance(&ctx->iter, cur_len); 4115 fpos += cur_len; 4116 len -= cur_len; 4117 } while (len > 0); 4118 4119 return rc; 4120 } 4121 4122 static void 4123 collect_uncached_read_data(struct cifs_aio_ctx *ctx) 4124 { 4125 struct cifs_readdata *rdata, *tmp; 4126 struct cifs_sb_info *cifs_sb; 4127 int rc; 4128 4129 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb); 4130 4131 mutex_lock(&ctx->aio_mutex); 4132 4133 if (list_empty(&ctx->list)) { 4134 mutex_unlock(&ctx->aio_mutex); 4135 return; 4136 } 4137 4138 rc = ctx->rc; 4139 /* the loop below should proceed in the order of increasing offsets */ 4140 again: 4141 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) { 4142 if (!rc) { 4143 if (!try_wait_for_completion(&rdata->done)) { 4144 mutex_unlock(&ctx->aio_mutex); 4145 return; 4146 } 4147 4148 if (rdata->result == -EAGAIN) { 4149 /* resend call if it's a retryable error */ 4150 struct list_head tmp_list; 4151 unsigned int got_bytes = rdata->got_bytes; 4152 4153 list_del_init(&rdata->list); 4154 INIT_LIST_HEAD(&tmp_list); 4155 4156 if (ctx->direct_io) { 4157 /* 4158 * Re-use rdata as this is a 4159 * direct I/O 4160 */ 4161 rc = cifs_resend_rdata( 4162 rdata, 4163 &tmp_list, ctx); 4164 } else { 4165 rc = cifs_send_async_read( 4166 rdata->offset + got_bytes, 4167 rdata->bytes - got_bytes, 4168 rdata->cfile, cifs_sb, 4169 &tmp_list, ctx); 4170 4171 kref_put(&rdata->refcount, 4172 cifs_readdata_release); 4173 } 4174 4175 list_splice(&tmp_list, &ctx->list); 4176 4177 goto again; 4178 } else if (rdata->result) 4179 rc = rdata->result; 4180 4181 /* if there was a short read -- discard anything left */ 4182 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes) 4183 rc = -ENODATA; 4184 4185 ctx->total_len += rdata->got_bytes; 4186 } 4187 list_del_init(&rdata->list); 4188 kref_put(&rdata->refcount, cifs_readdata_release); 4189 } 4190 4191 /* mask nodata case */ 4192 if (rc == -ENODATA) 4193 rc = 0; 4194 4195 ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc; 4196 4197 mutex_unlock(&ctx->aio_mutex); 4198 4199 if (ctx->iocb && ctx->iocb->ki_complete) 4200 ctx->iocb->ki_complete(ctx->iocb, ctx->rc); 4201 else 4202 complete(&ctx->done); 4203 } 4204 4205 static ssize_t __cifs_readv( 4206 struct kiocb *iocb, struct iov_iter *to, bool direct) 4207 { 4208 size_t len; 4209 struct file *file = iocb->ki_filp; 4210 struct cifs_sb_info *cifs_sb; 4211 struct cifsFileInfo *cfile; 4212 struct cifs_tcon *tcon; 4213 ssize_t rc, total_read = 0; 4214 loff_t offset = iocb->ki_pos; 4215 struct cifs_aio_ctx *ctx; 4216 4217 len = iov_iter_count(to); 4218 if (!len) 4219 return 0; 4220 4221 cifs_sb = CIFS_FILE_SB(file); 4222 cfile = file->private_data; 4223 tcon = tlink_tcon(cfile->tlink); 4224 4225 if (!tcon->ses->server->ops->async_readv) 4226 return -ENOSYS; 4227 4228 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 4229 cifs_dbg(FYI, "attempting read on write only file instance\n"); 4230 4231 ctx = cifs_aio_ctx_alloc(); 4232 if (!ctx) 4233 return -ENOMEM; 4234 4235 ctx->pos = offset; 4236 ctx->direct_io = direct; 4237 ctx->len = len; 4238 ctx->cfile = cifsFileInfo_get(cfile); 4239 ctx->nr_pinned_pages = 0; 4240 4241 if (!is_sync_kiocb(iocb)) 4242 ctx->iocb = iocb; 4243 4244 if (user_backed_iter(to)) { 4245 /* 4246 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as 4247 * they contain references to the calling process's virtual 4248 * memory layout which won't be available in an async worker 4249 * thread. This also takes a pin on every folio involved. 4250 */ 4251 rc = netfs_extract_user_iter(to, iov_iter_count(to), 4252 &ctx->iter, 0); 4253 if (rc < 0) { 4254 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4255 return rc; 4256 } 4257 4258 ctx->nr_pinned_pages = rc; 4259 ctx->bv = (void *)ctx->iter.bvec; 4260 ctx->bv_need_unpin = iov_iter_extract_will_pin(to); 4261 ctx->should_dirty = true; 4262 } else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) && 4263 !is_sync_kiocb(iocb)) { 4264 /* 4265 * If the op is asynchronous, we need to copy the list attached 4266 * to a BVEC/KVEC-type iterator, but we assume that the storage 4267 * will be retained by the caller; in any case, we may or may 4268 * not be able to pin the pages, so we don't try. 4269 */ 4270 ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL); 4271 if (!ctx->bv) { 4272 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4273 return -ENOMEM; 4274 } 4275 } else { 4276 /* 4277 * Otherwise, we just pass the iterator down as-is and rely on 4278 * the caller to make sure the pages referred to by the 4279 * iterator don't evaporate. 4280 */ 4281 ctx->iter = *to; 4282 } 4283 4284 if (direct) { 4285 rc = filemap_write_and_wait_range(file->f_inode->i_mapping, 4286 offset, offset + len - 1); 4287 if (rc) { 4288 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4289 return -EAGAIN; 4290 } 4291 } 4292 4293 /* grab a lock here due to read response handlers can access ctx */ 4294 mutex_lock(&ctx->aio_mutex); 4295 4296 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx); 4297 4298 /* if at least one read request send succeeded, then reset rc */ 4299 if (!list_empty(&ctx->list)) 4300 rc = 0; 4301 4302 mutex_unlock(&ctx->aio_mutex); 4303 4304 if (rc) { 4305 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4306 return rc; 4307 } 4308 4309 if (!is_sync_kiocb(iocb)) { 4310 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4311 return -EIOCBQUEUED; 4312 } 4313 4314 rc = wait_for_completion_killable(&ctx->done); 4315 if (rc) { 4316 mutex_lock(&ctx->aio_mutex); 4317 ctx->rc = rc = -EINTR; 4318 total_read = ctx->total_len; 4319 mutex_unlock(&ctx->aio_mutex); 4320 } else { 4321 rc = ctx->rc; 4322 total_read = ctx->total_len; 4323 } 4324 4325 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4326 4327 if (total_read) { 4328 iocb->ki_pos += total_read; 4329 return total_read; 4330 } 4331 return rc; 4332 } 4333 4334 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to) 4335 { 4336 return __cifs_readv(iocb, to, true); 4337 } 4338 4339 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) 4340 { 4341 return __cifs_readv(iocb, to, false); 4342 } 4343 4344 ssize_t 4345 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) 4346 { 4347 struct inode *inode = file_inode(iocb->ki_filp); 4348 struct cifsInodeInfo *cinode = CIFS_I(inode); 4349 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 4350 struct cifsFileInfo *cfile = (struct cifsFileInfo *) 4351 iocb->ki_filp->private_data; 4352 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 4353 int rc = -EACCES; 4354 4355 /* 4356 * In strict cache mode we need to read from the server all the time 4357 * if we don't have level II oplock because the server can delay mtime 4358 * change - so we can't make a decision about inode invalidating. 4359 * And we can also fail with pagereading if there are mandatory locks 4360 * on pages affected by this read but not on the region from pos to 4361 * pos+len-1. 4362 */ 4363 if (!CIFS_CACHE_READ(cinode)) 4364 return cifs_user_readv(iocb, to); 4365 4366 if (cap_unix(tcon->ses) && 4367 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 4368 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 4369 return generic_file_read_iter(iocb, to); 4370 4371 /* 4372 * We need to hold the sem to be sure nobody modifies lock list 4373 * with a brlock that prevents reading. 4374 */ 4375 down_read(&cinode->lock_sem); 4376 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to), 4377 tcon->ses->server->vals->shared_lock_type, 4378 0, NULL, CIFS_READ_OP)) 4379 rc = generic_file_read_iter(iocb, to); 4380 up_read(&cinode->lock_sem); 4381 return rc; 4382 } 4383 4384 static ssize_t 4385 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) 4386 { 4387 int rc = -EACCES; 4388 unsigned int bytes_read = 0; 4389 unsigned int total_read; 4390 unsigned int current_read_size; 4391 unsigned int rsize; 4392 struct cifs_sb_info *cifs_sb; 4393 struct cifs_tcon *tcon; 4394 struct TCP_Server_Info *server; 4395 unsigned int xid; 4396 char *cur_offset; 4397 struct cifsFileInfo *open_file; 4398 struct cifs_io_parms io_parms = {0}; 4399 int buf_type = CIFS_NO_BUFFER; 4400 __u32 pid; 4401 4402 xid = get_xid(); 4403 cifs_sb = CIFS_FILE_SB(file); 4404 4405 /* FIXME: set up handlers for larger reads and/or convert to async */ 4406 rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize); 4407 4408 if (file->private_data == NULL) { 4409 rc = -EBADF; 4410 free_xid(xid); 4411 return rc; 4412 } 4413 open_file = file->private_data; 4414 tcon = tlink_tcon(open_file->tlink); 4415 server = cifs_pick_channel(tcon->ses); 4416 4417 if (!server->ops->sync_read) { 4418 free_xid(xid); 4419 return -ENOSYS; 4420 } 4421 4422 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4423 pid = open_file->pid; 4424 else 4425 pid = current->tgid; 4426 4427 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 4428 cifs_dbg(FYI, "attempting read on write only file instance\n"); 4429 4430 for (total_read = 0, cur_offset = read_data; read_size > total_read; 4431 total_read += bytes_read, cur_offset += bytes_read) { 4432 do { 4433 current_read_size = min_t(uint, read_size - total_read, 4434 rsize); 4435 /* 4436 * For windows me and 9x we do not want to request more 4437 * than it negotiated since it will refuse the read 4438 * then. 4439 */ 4440 if (!(tcon->ses->capabilities & 4441 tcon->ses->server->vals->cap_large_files)) { 4442 current_read_size = min_t(uint, 4443 current_read_size, CIFSMaxBufSize); 4444 } 4445 if (open_file->invalidHandle) { 4446 rc = cifs_reopen_file(open_file, true); 4447 if (rc != 0) 4448 break; 4449 } 4450 io_parms.pid = pid; 4451 io_parms.tcon = tcon; 4452 io_parms.offset = *offset; 4453 io_parms.length = current_read_size; 4454 io_parms.server = server; 4455 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms, 4456 &bytes_read, &cur_offset, 4457 &buf_type); 4458 } while (rc == -EAGAIN); 4459 4460 if (rc || (bytes_read == 0)) { 4461 if (total_read) { 4462 break; 4463 } else { 4464 free_xid(xid); 4465 return rc; 4466 } 4467 } else { 4468 cifs_stats_bytes_read(tcon, total_read); 4469 *offset += bytes_read; 4470 } 4471 } 4472 free_xid(xid); 4473 return total_read; 4474 } 4475 4476 /* 4477 * If the page is mmap'ed into a process' page tables, then we need to make 4478 * sure that it doesn't change while being written back. 4479 */ 4480 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf) 4481 { 4482 struct folio *folio = page_folio(vmf->page); 4483 4484 /* Wait for the folio to be written to the cache before we allow it to 4485 * be modified. We then assume the entire folio will need writing back. 4486 */ 4487 #ifdef CONFIG_CIFS_FSCACHE 4488 if (folio_test_fscache(folio) && 4489 folio_wait_fscache_killable(folio) < 0) 4490 return VM_FAULT_RETRY; 4491 #endif 4492 4493 folio_wait_writeback(folio); 4494 4495 if (folio_lock_killable(folio) < 0) 4496 return VM_FAULT_RETRY; 4497 return VM_FAULT_LOCKED; 4498 } 4499 4500 static const struct vm_operations_struct cifs_file_vm_ops = { 4501 .fault = filemap_fault, 4502 .map_pages = filemap_map_pages, 4503 .page_mkwrite = cifs_page_mkwrite, 4504 }; 4505 4506 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) 4507 { 4508 int xid, rc = 0; 4509 struct inode *inode = file_inode(file); 4510 4511 xid = get_xid(); 4512 4513 if (!CIFS_CACHE_READ(CIFS_I(inode))) 4514 rc = cifs_zap_mapping(inode); 4515 if (!rc) 4516 rc = generic_file_mmap(file, vma); 4517 if (!rc) 4518 vma->vm_ops = &cifs_file_vm_ops; 4519 4520 free_xid(xid); 4521 return rc; 4522 } 4523 4524 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) 4525 { 4526 int rc, xid; 4527 4528 xid = get_xid(); 4529 4530 rc = cifs_revalidate_file(file); 4531 if (rc) 4532 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n", 4533 rc); 4534 if (!rc) 4535 rc = generic_file_mmap(file, vma); 4536 if (!rc) 4537 vma->vm_ops = &cifs_file_vm_ops; 4538 4539 free_xid(xid); 4540 return rc; 4541 } 4542 4543 /* 4544 * Unlock a bunch of folios in the pagecache. 4545 */ 4546 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last) 4547 { 4548 struct folio *folio; 4549 XA_STATE(xas, &mapping->i_pages, first); 4550 4551 rcu_read_lock(); 4552 xas_for_each(&xas, folio, last) { 4553 folio_unlock(folio); 4554 } 4555 rcu_read_unlock(); 4556 } 4557 4558 static void cifs_readahead_complete(struct work_struct *work) 4559 { 4560 struct cifs_readdata *rdata = container_of(work, 4561 struct cifs_readdata, work); 4562 struct folio *folio; 4563 pgoff_t last; 4564 bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes); 4565 4566 XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE); 4567 4568 if (good) 4569 cifs_readahead_to_fscache(rdata->mapping->host, 4570 rdata->offset, rdata->bytes); 4571 4572 if (iov_iter_count(&rdata->iter) > 0) 4573 iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter); 4574 4575 last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE; 4576 4577 rcu_read_lock(); 4578 xas_for_each(&xas, folio, last) { 4579 if (good) { 4580 flush_dcache_folio(folio); 4581 folio_mark_uptodate(folio); 4582 } 4583 folio_unlock(folio); 4584 } 4585 rcu_read_unlock(); 4586 4587 kref_put(&rdata->refcount, cifs_readdata_release); 4588 } 4589 4590 static void cifs_readahead(struct readahead_control *ractl) 4591 { 4592 struct cifsFileInfo *open_file = ractl->file->private_data; 4593 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file); 4594 struct TCP_Server_Info *server; 4595 unsigned int xid, nr_pages, cache_nr_pages = 0; 4596 unsigned int ra_pages; 4597 pgoff_t next_cached = ULONG_MAX, ra_index; 4598 bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) && 4599 cifs_inode_cookie(ractl->mapping->host)->cache_priv; 4600 bool check_cache = caching; 4601 pid_t pid; 4602 int rc = 0; 4603 4604 /* Note that readahead_count() lags behind our dequeuing of pages from 4605 * the ractl, wo we have to keep track for ourselves. 4606 */ 4607 ra_pages = readahead_count(ractl); 4608 ra_index = readahead_index(ractl); 4609 4610 xid = get_xid(); 4611 4612 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4613 pid = open_file->pid; 4614 else 4615 pid = current->tgid; 4616 4617 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 4618 4619 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n", 4620 __func__, ractl->file, ractl->mapping, ra_pages); 4621 4622 /* 4623 * Chop the readahead request up into rsize-sized read requests. 4624 */ 4625 while ((nr_pages = ra_pages)) { 4626 unsigned int i, rsize; 4627 struct cifs_readdata *rdata; 4628 struct cifs_credits credits_on_stack; 4629 struct cifs_credits *credits = &credits_on_stack; 4630 struct folio *folio; 4631 pgoff_t fsize; 4632 4633 /* 4634 * Find out if we have anything cached in the range of 4635 * interest, and if so, where the next chunk of cached data is. 4636 */ 4637 if (caching) { 4638 if (check_cache) { 4639 rc = cifs_fscache_query_occupancy( 4640 ractl->mapping->host, ra_index, nr_pages, 4641 &next_cached, &cache_nr_pages); 4642 if (rc < 0) 4643 caching = false; 4644 check_cache = false; 4645 } 4646 4647 if (ra_index == next_cached) { 4648 /* 4649 * TODO: Send a whole batch of pages to be read 4650 * by the cache. 4651 */ 4652 folio = readahead_folio(ractl); 4653 fsize = folio_nr_pages(folio); 4654 ra_pages -= fsize; 4655 ra_index += fsize; 4656 if (cifs_readpage_from_fscache(ractl->mapping->host, 4657 &folio->page) < 0) { 4658 /* 4659 * TODO: Deal with cache read failure 4660 * here, but for the moment, delegate 4661 * that to readpage. 4662 */ 4663 caching = false; 4664 } 4665 folio_unlock(folio); 4666 next_cached += fsize; 4667 cache_nr_pages -= fsize; 4668 if (cache_nr_pages == 0) 4669 check_cache = true; 4670 continue; 4671 } 4672 } 4673 4674 if (open_file->invalidHandle) { 4675 rc = cifs_reopen_file(open_file, true); 4676 if (rc) { 4677 if (rc == -EAGAIN) 4678 continue; 4679 break; 4680 } 4681 } 4682 4683 if (cifs_sb->ctx->rsize == 0) 4684 cifs_sb->ctx->rsize = 4685 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), 4686 cifs_sb->ctx); 4687 4688 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, 4689 &rsize, credits); 4690 if (rc) 4691 break; 4692 nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages); 4693 if (next_cached != ULONG_MAX) 4694 nr_pages = min_t(size_t, nr_pages, next_cached - ra_index); 4695 4696 /* 4697 * Give up immediately if rsize is too small to read an entire 4698 * page. The VFS will fall back to readpage. We should never 4699 * reach this point however since we set ra_pages to 0 when the 4700 * rsize is smaller than a cache page. 4701 */ 4702 if (unlikely(!nr_pages)) { 4703 add_credits_and_wake_if(server, credits, 0); 4704 break; 4705 } 4706 4707 rdata = cifs_readdata_alloc(cifs_readahead_complete); 4708 if (!rdata) { 4709 /* best to give up if we're out of mem */ 4710 add_credits_and_wake_if(server, credits, 0); 4711 break; 4712 } 4713 4714 rdata->offset = ra_index * PAGE_SIZE; 4715 rdata->bytes = nr_pages * PAGE_SIZE; 4716 rdata->cfile = cifsFileInfo_get(open_file); 4717 rdata->server = server; 4718 rdata->mapping = ractl->mapping; 4719 rdata->pid = pid; 4720 rdata->credits = credits_on_stack; 4721 4722 for (i = 0; i < nr_pages; i++) { 4723 if (!readahead_folio(ractl)) 4724 WARN_ON(1); 4725 } 4726 ra_pages -= nr_pages; 4727 ra_index += nr_pages; 4728 4729 iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages, 4730 rdata->offset, rdata->bytes); 4731 4732 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 4733 if (!rc) { 4734 if (rdata->cfile->invalidHandle) 4735 rc = -EAGAIN; 4736 else 4737 rc = server->ops->async_readv(rdata); 4738 } 4739 4740 if (rc) { 4741 add_credits_and_wake_if(server, &rdata->credits, 0); 4742 cifs_unlock_folios(rdata->mapping, 4743 rdata->offset / PAGE_SIZE, 4744 (rdata->offset + rdata->bytes - 1) / PAGE_SIZE); 4745 /* Fallback to the readpage in error/reconnect cases */ 4746 kref_put(&rdata->refcount, cifs_readdata_release); 4747 break; 4748 } 4749 4750 kref_put(&rdata->refcount, cifs_readdata_release); 4751 } 4752 4753 free_xid(xid); 4754 } 4755 4756 /* 4757 * cifs_readpage_worker must be called with the page pinned 4758 */ 4759 static int cifs_readpage_worker(struct file *file, struct page *page, 4760 loff_t *poffset) 4761 { 4762 struct inode *inode = file_inode(file); 4763 struct timespec64 atime, mtime; 4764 char *read_data; 4765 int rc; 4766 4767 /* Is the page cached? */ 4768 rc = cifs_readpage_from_fscache(inode, page); 4769 if (rc == 0) 4770 goto read_complete; 4771 4772 read_data = kmap(page); 4773 /* for reads over a certain size could initiate async read ahead */ 4774 4775 rc = cifs_read(file, read_data, PAGE_SIZE, poffset); 4776 4777 if (rc < 0) 4778 goto io_error; 4779 else 4780 cifs_dbg(FYI, "Bytes read %d\n", rc); 4781 4782 /* we do not want atime to be less than mtime, it broke some apps */ 4783 atime = inode_set_atime_to_ts(inode, current_time(inode)); 4784 mtime = inode_get_mtime(inode); 4785 if (timespec64_compare(&atime, &mtime) < 0) 4786 inode_set_atime_to_ts(inode, inode_get_mtime(inode)); 4787 4788 if (PAGE_SIZE > rc) 4789 memset(read_data + rc, 0, PAGE_SIZE - rc); 4790 4791 flush_dcache_page(page); 4792 SetPageUptodate(page); 4793 rc = 0; 4794 4795 io_error: 4796 kunmap(page); 4797 4798 read_complete: 4799 unlock_page(page); 4800 return rc; 4801 } 4802 4803 static int cifs_read_folio(struct file *file, struct folio *folio) 4804 { 4805 struct page *page = &folio->page; 4806 loff_t offset = page_file_offset(page); 4807 int rc = -EACCES; 4808 unsigned int xid; 4809 4810 xid = get_xid(); 4811 4812 if (file->private_data == NULL) { 4813 rc = -EBADF; 4814 free_xid(xid); 4815 return rc; 4816 } 4817 4818 cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n", 4819 page, (int)offset, (int)offset); 4820 4821 rc = cifs_readpage_worker(file, page, &offset); 4822 4823 free_xid(xid); 4824 return rc; 4825 } 4826 4827 static int is_inode_writable(struct cifsInodeInfo *cifs_inode) 4828 { 4829 struct cifsFileInfo *open_file; 4830 4831 spin_lock(&cifs_inode->open_file_lock); 4832 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 4833 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { 4834 spin_unlock(&cifs_inode->open_file_lock); 4835 return 1; 4836 } 4837 } 4838 spin_unlock(&cifs_inode->open_file_lock); 4839 return 0; 4840 } 4841 4842 /* We do not want to update the file size from server for inodes 4843 open for write - to avoid races with writepage extending 4844 the file - in the future we could consider allowing 4845 refreshing the inode only on increases in the file size 4846 but this is tricky to do without racing with writebehind 4847 page caching in the current Linux kernel design */ 4848 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file, 4849 bool from_readdir) 4850 { 4851 if (!cifsInode) 4852 return true; 4853 4854 if (is_inode_writable(cifsInode) || 4855 ((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) { 4856 /* This inode is open for write at least once */ 4857 struct cifs_sb_info *cifs_sb; 4858 4859 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb); 4860 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { 4861 /* since no page cache to corrupt on directio 4862 we can change size safely */ 4863 return true; 4864 } 4865 4866 if (i_size_read(&cifsInode->netfs.inode) < end_of_file) 4867 return true; 4868 4869 return false; 4870 } else 4871 return true; 4872 } 4873 4874 static int cifs_write_begin(struct file *file, struct address_space *mapping, 4875 loff_t pos, unsigned len, 4876 struct page **pagep, void **fsdata) 4877 { 4878 int oncethru = 0; 4879 pgoff_t index = pos >> PAGE_SHIFT; 4880 loff_t offset = pos & (PAGE_SIZE - 1); 4881 loff_t page_start = pos & PAGE_MASK; 4882 loff_t i_size; 4883 struct page *page; 4884 int rc = 0; 4885 4886 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len); 4887 4888 start: 4889 page = grab_cache_page_write_begin(mapping, index); 4890 if (!page) { 4891 rc = -ENOMEM; 4892 goto out; 4893 } 4894 4895 if (PageUptodate(page)) 4896 goto out; 4897 4898 /* 4899 * If we write a full page it will be up to date, no need to read from 4900 * the server. If the write is short, we'll end up doing a sync write 4901 * instead. 4902 */ 4903 if (len == PAGE_SIZE) 4904 goto out; 4905 4906 /* 4907 * optimize away the read when we have an oplock, and we're not 4908 * expecting to use any of the data we'd be reading in. That 4909 * is, when the page lies beyond the EOF, or straddles the EOF 4910 * and the write will cover all of the existing data. 4911 */ 4912 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) { 4913 i_size = i_size_read(mapping->host); 4914 if (page_start >= i_size || 4915 (offset == 0 && (pos + len) >= i_size)) { 4916 zero_user_segments(page, 0, offset, 4917 offset + len, 4918 PAGE_SIZE); 4919 /* 4920 * PageChecked means that the parts of the page 4921 * to which we're not writing are considered up 4922 * to date. Once the data is copied to the 4923 * page, it can be set uptodate. 4924 */ 4925 SetPageChecked(page); 4926 goto out; 4927 } 4928 } 4929 4930 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) { 4931 /* 4932 * might as well read a page, it is fast enough. If we get 4933 * an error, we don't need to return it. cifs_write_end will 4934 * do a sync write instead since PG_uptodate isn't set. 4935 */ 4936 cifs_readpage_worker(file, page, &page_start); 4937 put_page(page); 4938 oncethru = 1; 4939 goto start; 4940 } else { 4941 /* we could try using another file handle if there is one - 4942 but how would we lock it to prevent close of that handle 4943 racing with this read? In any case 4944 this will be written out by write_end so is fine */ 4945 } 4946 out: 4947 *pagep = page; 4948 return rc; 4949 } 4950 4951 static bool cifs_release_folio(struct folio *folio, gfp_t gfp) 4952 { 4953 if (folio_test_private(folio)) 4954 return 0; 4955 if (folio_test_fscache(folio)) { 4956 if (current_is_kswapd() || !(gfp & __GFP_FS)) 4957 return false; 4958 folio_wait_fscache(folio); 4959 } 4960 fscache_note_page_release(cifs_inode_cookie(folio->mapping->host)); 4961 return true; 4962 } 4963 4964 static void cifs_invalidate_folio(struct folio *folio, size_t offset, 4965 size_t length) 4966 { 4967 folio_wait_fscache(folio); 4968 } 4969 4970 static int cifs_launder_folio(struct folio *folio) 4971 { 4972 int rc = 0; 4973 loff_t range_start = folio_pos(folio); 4974 loff_t range_end = range_start + folio_size(folio); 4975 struct writeback_control wbc = { 4976 .sync_mode = WB_SYNC_ALL, 4977 .nr_to_write = 0, 4978 .range_start = range_start, 4979 .range_end = range_end, 4980 }; 4981 4982 cifs_dbg(FYI, "Launder page: %lu\n", folio->index); 4983 4984 if (folio_clear_dirty_for_io(folio)) 4985 rc = cifs_writepage_locked(&folio->page, &wbc); 4986 4987 folio_wait_fscache(folio); 4988 return rc; 4989 } 4990 4991 void cifs_oplock_break(struct work_struct *work) 4992 { 4993 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, 4994 oplock_break); 4995 struct inode *inode = d_inode(cfile->dentry); 4996 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 4997 struct cifsInodeInfo *cinode = CIFS_I(inode); 4998 struct cifs_tcon *tcon; 4999 struct TCP_Server_Info *server; 5000 struct tcon_link *tlink; 5001 int rc = 0; 5002 bool purge_cache = false, oplock_break_cancelled; 5003 __u64 persistent_fid, volatile_fid; 5004 __u16 net_fid; 5005 5006 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, 5007 TASK_UNINTERRUPTIBLE); 5008 5009 tlink = cifs_sb_tlink(cifs_sb); 5010 if (IS_ERR(tlink)) 5011 goto out; 5012 tcon = tlink_tcon(tlink); 5013 server = tcon->ses->server; 5014 5015 server->ops->downgrade_oplock(server, cinode, cfile->oplock_level, 5016 cfile->oplock_epoch, &purge_cache); 5017 5018 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) && 5019 cifs_has_mand_locks(cinode)) { 5020 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n", 5021 inode); 5022 cinode->oplock = 0; 5023 } 5024 5025 if (inode && S_ISREG(inode->i_mode)) { 5026 if (CIFS_CACHE_READ(cinode)) 5027 break_lease(inode, O_RDONLY); 5028 else 5029 break_lease(inode, O_WRONLY); 5030 rc = filemap_fdatawrite(inode->i_mapping); 5031 if (!CIFS_CACHE_READ(cinode) || purge_cache) { 5032 rc = filemap_fdatawait(inode->i_mapping); 5033 mapping_set_error(inode->i_mapping, rc); 5034 cifs_zap_mapping(inode); 5035 } 5036 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc); 5037 if (CIFS_CACHE_WRITE(cinode)) 5038 goto oplock_break_ack; 5039 } 5040 5041 rc = cifs_push_locks(cfile); 5042 if (rc) 5043 cifs_dbg(VFS, "Push locks rc = %d\n", rc); 5044 5045 oplock_break_ack: 5046 /* 5047 * When oplock break is received and there are no active 5048 * file handles but cached, then schedule deferred close immediately. 5049 * So, new open will not use cached handle. 5050 */ 5051 5052 if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes)) 5053 cifs_close_deferred_file(cinode); 5054 5055 persistent_fid = cfile->fid.persistent_fid; 5056 volatile_fid = cfile->fid.volatile_fid; 5057 net_fid = cfile->fid.netfid; 5058 oplock_break_cancelled = cfile->oplock_break_cancelled; 5059 5060 _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false); 5061 /* 5062 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require 5063 * an acknowledgment to be sent when the file has already been closed. 5064 */ 5065 spin_lock(&cinode->open_file_lock); 5066 /* check list empty since can race with kill_sb calling tree disconnect */ 5067 if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) { 5068 spin_unlock(&cinode->open_file_lock); 5069 rc = server->ops->oplock_response(tcon, persistent_fid, 5070 volatile_fid, net_fid, cinode); 5071 cifs_dbg(FYI, "Oplock release rc = %d\n", rc); 5072 } else 5073 spin_unlock(&cinode->open_file_lock); 5074 5075 cifs_put_tlink(tlink); 5076 out: 5077 cifs_done_oplock_break(cinode); 5078 } 5079 5080 /* 5081 * The presence of cifs_direct_io() in the address space ops vector 5082 * allowes open() O_DIRECT flags which would have failed otherwise. 5083 * 5084 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests 5085 * so this method should never be called. 5086 * 5087 * Direct IO is not yet supported in the cached mode. 5088 */ 5089 static ssize_t 5090 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter) 5091 { 5092 /* 5093 * FIXME 5094 * Eventually need to support direct IO for non forcedirectio mounts 5095 */ 5096 return -EINVAL; 5097 } 5098 5099 static int cifs_swap_activate(struct swap_info_struct *sis, 5100 struct file *swap_file, sector_t *span) 5101 { 5102 struct cifsFileInfo *cfile = swap_file->private_data; 5103 struct inode *inode = swap_file->f_mapping->host; 5104 unsigned long blocks; 5105 long long isize; 5106 5107 cifs_dbg(FYI, "swap activate\n"); 5108 5109 if (!swap_file->f_mapping->a_ops->swap_rw) 5110 /* Cannot support swap */ 5111 return -EINVAL; 5112 5113 spin_lock(&inode->i_lock); 5114 blocks = inode->i_blocks; 5115 isize = inode->i_size; 5116 spin_unlock(&inode->i_lock); 5117 if (blocks*512 < isize) { 5118 pr_warn("swap activate: swapfile has holes\n"); 5119 return -EINVAL; 5120 } 5121 *span = sis->pages; 5122 5123 pr_warn_once("Swap support over SMB3 is experimental\n"); 5124 5125 /* 5126 * TODO: consider adding ACL (or documenting how) to prevent other 5127 * users (on this or other systems) from reading it 5128 */ 5129 5130 5131 /* TODO: add sk_set_memalloc(inet) or similar */ 5132 5133 if (cfile) 5134 cfile->swapfile = true; 5135 /* 5136 * TODO: Since file already open, we can't open with DENY_ALL here 5137 * but we could add call to grab a byte range lock to prevent others 5138 * from reading or writing the file 5139 */ 5140 5141 sis->flags |= SWP_FS_OPS; 5142 return add_swap_extent(sis, 0, sis->max, 0); 5143 } 5144 5145 static void cifs_swap_deactivate(struct file *file) 5146 { 5147 struct cifsFileInfo *cfile = file->private_data; 5148 5149 cifs_dbg(FYI, "swap deactivate\n"); 5150 5151 /* TODO: undo sk_set_memalloc(inet) will eventually be needed */ 5152 5153 if (cfile) 5154 cfile->swapfile = false; 5155 5156 /* do we need to unpin (or unlock) the file */ 5157 } 5158 5159 /* 5160 * Mark a page as having been made dirty and thus needing writeback. We also 5161 * need to pin the cache object to write back to. 5162 */ 5163 #ifdef CONFIG_CIFS_FSCACHE 5164 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio) 5165 { 5166 return fscache_dirty_folio(mapping, folio, 5167 cifs_inode_cookie(mapping->host)); 5168 } 5169 #else 5170 #define cifs_dirty_folio filemap_dirty_folio 5171 #endif 5172 5173 const struct address_space_operations cifs_addr_ops = { 5174 .read_folio = cifs_read_folio, 5175 .readahead = cifs_readahead, 5176 .writepages = cifs_writepages, 5177 .write_begin = cifs_write_begin, 5178 .write_end = cifs_write_end, 5179 .dirty_folio = cifs_dirty_folio, 5180 .release_folio = cifs_release_folio, 5181 .direct_IO = cifs_direct_io, 5182 .invalidate_folio = cifs_invalidate_folio, 5183 .launder_folio = cifs_launder_folio, 5184 .migrate_folio = filemap_migrate_folio, 5185 /* 5186 * TODO: investigate and if useful we could add an is_dirty_writeback 5187 * helper if needed 5188 */ 5189 .swap_activate = cifs_swap_activate, 5190 .swap_deactivate = cifs_swap_deactivate, 5191 }; 5192 5193 /* 5194 * cifs_readahead requires the server to support a buffer large enough to 5195 * contain the header plus one complete page of data. Otherwise, we need 5196 * to leave cifs_readahead out of the address space operations. 5197 */ 5198 const struct address_space_operations cifs_addr_ops_smallbuf = { 5199 .read_folio = cifs_read_folio, 5200 .writepages = cifs_writepages, 5201 .write_begin = cifs_write_begin, 5202 .write_end = cifs_write_end, 5203 .dirty_folio = cifs_dirty_folio, 5204 .release_folio = cifs_release_folio, 5205 .invalidate_folio = cifs_invalidate_folio, 5206 .launder_folio = cifs_launder_folio, 5207 .migrate_folio = filemap_migrate_folio, 5208 }; 5209