1 // SPDX-License-Identifier: LGPL-2.1 2 /* 3 * 4 * vfs operations that deal with files 5 * 6 * Copyright (C) International Business Machines Corp., 2002,2010 7 * Author(s): Steve French (sfrench@us.ibm.com) 8 * Jeremy Allison (jra@samba.org) 9 * 10 */ 11 #include <linux/fs.h> 12 #include <linux/filelock.h> 13 #include <linux/backing-dev.h> 14 #include <linux/stat.h> 15 #include <linux/fcntl.h> 16 #include <linux/pagemap.h> 17 #include <linux/pagevec.h> 18 #include <linux/writeback.h> 19 #include <linux/task_io_accounting_ops.h> 20 #include <linux/delay.h> 21 #include <linux/mount.h> 22 #include <linux/slab.h> 23 #include <linux/swap.h> 24 #include <linux/mm.h> 25 #include <asm/div64.h> 26 #include "cifsfs.h" 27 #include "cifspdu.h" 28 #include "cifsglob.h" 29 #include "cifsproto.h" 30 #include "smb2proto.h" 31 #include "cifs_unicode.h" 32 #include "cifs_debug.h" 33 #include "cifs_fs_sb.h" 34 #include "fscache.h" 35 #include "smbdirect.h" 36 #include "fs_context.h" 37 #include "cifs_ioctl.h" 38 #include "cached_dir.h" 39 40 /* 41 * Remove the dirty flags from a span of pages. 42 */ 43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len) 44 { 45 struct address_space *mapping = inode->i_mapping; 46 struct folio *folio; 47 pgoff_t end; 48 49 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 50 51 rcu_read_lock(); 52 53 end = (start + len - 1) / PAGE_SIZE; 54 xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) { 55 if (xas_retry(&xas, folio)) 56 continue; 57 xas_pause(&xas); 58 rcu_read_unlock(); 59 folio_lock(folio); 60 folio_clear_dirty_for_io(folio); 61 folio_unlock(folio); 62 rcu_read_lock(); 63 } 64 65 rcu_read_unlock(); 66 } 67 68 /* 69 * Completion of write to server. 70 */ 71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len) 72 { 73 struct address_space *mapping = inode->i_mapping; 74 struct folio *folio; 75 pgoff_t end; 76 77 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 78 79 if (!len) 80 return; 81 82 rcu_read_lock(); 83 84 end = (start + len - 1) / PAGE_SIZE; 85 xas_for_each(&xas, folio, end) { 86 if (xas_retry(&xas, folio)) 87 continue; 88 if (!folio_test_writeback(folio)) { 89 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 90 len, start, folio->index, end); 91 continue; 92 } 93 94 folio_detach_private(folio); 95 folio_end_writeback(folio); 96 } 97 98 rcu_read_unlock(); 99 } 100 101 /* 102 * Failure of write to server. 103 */ 104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len) 105 { 106 struct address_space *mapping = inode->i_mapping; 107 struct folio *folio; 108 pgoff_t end; 109 110 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 111 112 if (!len) 113 return; 114 115 rcu_read_lock(); 116 117 end = (start + len - 1) / PAGE_SIZE; 118 xas_for_each(&xas, folio, end) { 119 if (xas_retry(&xas, folio)) 120 continue; 121 if (!folio_test_writeback(folio)) { 122 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 123 len, start, folio->index, end); 124 continue; 125 } 126 127 folio_set_error(folio); 128 folio_end_writeback(folio); 129 } 130 131 rcu_read_unlock(); 132 } 133 134 /* 135 * Redirty pages after a temporary failure. 136 */ 137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len) 138 { 139 struct address_space *mapping = inode->i_mapping; 140 struct folio *folio; 141 pgoff_t end; 142 143 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); 144 145 if (!len) 146 return; 147 148 rcu_read_lock(); 149 150 end = (start + len - 1) / PAGE_SIZE; 151 xas_for_each(&xas, folio, end) { 152 if (!folio_test_writeback(folio)) { 153 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", 154 len, start, folio->index, end); 155 continue; 156 } 157 158 filemap_dirty_folio(folio->mapping, folio); 159 folio_end_writeback(folio); 160 } 161 162 rcu_read_unlock(); 163 } 164 165 /* 166 * Mark as invalid, all open files on tree connections since they 167 * were closed when session to server was lost. 168 */ 169 void 170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon) 171 { 172 struct cifsFileInfo *open_file = NULL; 173 struct list_head *tmp; 174 struct list_head *tmp1; 175 176 /* only send once per connect */ 177 spin_lock(&tcon->tc_lock); 178 if (tcon->need_reconnect) 179 tcon->status = TID_NEED_RECON; 180 181 if (tcon->status != TID_NEED_RECON) { 182 spin_unlock(&tcon->tc_lock); 183 return; 184 } 185 tcon->status = TID_IN_FILES_INVALIDATE; 186 spin_unlock(&tcon->tc_lock); 187 188 /* list all files open on tree connection and mark them invalid */ 189 spin_lock(&tcon->open_file_lock); 190 list_for_each_safe(tmp, tmp1, &tcon->openFileList) { 191 open_file = list_entry(tmp, struct cifsFileInfo, tlist); 192 open_file->invalidHandle = true; 193 open_file->oplock_break_cancelled = true; 194 } 195 spin_unlock(&tcon->open_file_lock); 196 197 invalidate_all_cached_dirs(tcon); 198 spin_lock(&tcon->tc_lock); 199 if (tcon->status == TID_IN_FILES_INVALIDATE) 200 tcon->status = TID_NEED_TCON; 201 spin_unlock(&tcon->tc_lock); 202 203 /* 204 * BB Add call to invalidate_inodes(sb) for all superblocks mounted 205 * to this tcon. 206 */ 207 } 208 209 static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache) 210 { 211 if ((flags & O_ACCMODE) == O_RDONLY) 212 return GENERIC_READ; 213 else if ((flags & O_ACCMODE) == O_WRONLY) 214 return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE; 215 else if ((flags & O_ACCMODE) == O_RDWR) { 216 /* GENERIC_ALL is too much permission to request 217 can cause unnecessary access denied on create */ 218 /* return GENERIC_ALL; */ 219 return (GENERIC_READ | GENERIC_WRITE); 220 } 221 222 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES | 223 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA | 224 FILE_READ_DATA); 225 } 226 227 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 228 static u32 cifs_posix_convert_flags(unsigned int flags) 229 { 230 u32 posix_flags = 0; 231 232 if ((flags & O_ACCMODE) == O_RDONLY) 233 posix_flags = SMB_O_RDONLY; 234 else if ((flags & O_ACCMODE) == O_WRONLY) 235 posix_flags = SMB_O_WRONLY; 236 else if ((flags & O_ACCMODE) == O_RDWR) 237 posix_flags = SMB_O_RDWR; 238 239 if (flags & O_CREAT) { 240 posix_flags |= SMB_O_CREAT; 241 if (flags & O_EXCL) 242 posix_flags |= SMB_O_EXCL; 243 } else if (flags & O_EXCL) 244 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n", 245 current->comm, current->tgid); 246 247 if (flags & O_TRUNC) 248 posix_flags |= SMB_O_TRUNC; 249 /* be safe and imply O_SYNC for O_DSYNC */ 250 if (flags & O_DSYNC) 251 posix_flags |= SMB_O_SYNC; 252 if (flags & O_DIRECTORY) 253 posix_flags |= SMB_O_DIRECTORY; 254 if (flags & O_NOFOLLOW) 255 posix_flags |= SMB_O_NOFOLLOW; 256 if (flags & O_DIRECT) 257 posix_flags |= SMB_O_DIRECT; 258 259 return posix_flags; 260 } 261 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 262 263 static inline int cifs_get_disposition(unsigned int flags) 264 { 265 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) 266 return FILE_CREATE; 267 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) 268 return FILE_OVERWRITE_IF; 269 else if ((flags & O_CREAT) == O_CREAT) 270 return FILE_OPEN_IF; 271 else if ((flags & O_TRUNC) == O_TRUNC) 272 return FILE_OVERWRITE; 273 else 274 return FILE_OPEN; 275 } 276 277 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 278 int cifs_posix_open(const char *full_path, struct inode **pinode, 279 struct super_block *sb, int mode, unsigned int f_flags, 280 __u32 *poplock, __u16 *pnetfid, unsigned int xid) 281 { 282 int rc; 283 FILE_UNIX_BASIC_INFO *presp_data; 284 __u32 posix_flags = 0; 285 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 286 struct cifs_fattr fattr; 287 struct tcon_link *tlink; 288 struct cifs_tcon *tcon; 289 290 cifs_dbg(FYI, "posix open %s\n", full_path); 291 292 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); 293 if (presp_data == NULL) 294 return -ENOMEM; 295 296 tlink = cifs_sb_tlink(cifs_sb); 297 if (IS_ERR(tlink)) { 298 rc = PTR_ERR(tlink); 299 goto posix_open_ret; 300 } 301 302 tcon = tlink_tcon(tlink); 303 mode &= ~current_umask(); 304 305 posix_flags = cifs_posix_convert_flags(f_flags); 306 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data, 307 poplock, full_path, cifs_sb->local_nls, 308 cifs_remap(cifs_sb)); 309 cifs_put_tlink(tlink); 310 311 if (rc) 312 goto posix_open_ret; 313 314 if (presp_data->Type == cpu_to_le32(-1)) 315 goto posix_open_ret; /* open ok, caller does qpathinfo */ 316 317 if (!pinode) 318 goto posix_open_ret; /* caller does not need info */ 319 320 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb); 321 322 /* get new inode and set it up */ 323 if (*pinode == NULL) { 324 cifs_fill_uniqueid(sb, &fattr); 325 *pinode = cifs_iget(sb, &fattr); 326 if (!*pinode) { 327 rc = -ENOMEM; 328 goto posix_open_ret; 329 } 330 } else { 331 cifs_revalidate_mapping(*pinode); 332 rc = cifs_fattr_to_inode(*pinode, &fattr, false); 333 } 334 335 posix_open_ret: 336 kfree(presp_data); 337 return rc; 338 } 339 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 340 341 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, 342 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock, 343 struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf) 344 { 345 int rc; 346 int desired_access; 347 int disposition; 348 int create_options = CREATE_NOT_DIR; 349 struct TCP_Server_Info *server = tcon->ses->server; 350 struct cifs_open_parms oparms; 351 int rdwr_for_fscache = 0; 352 353 if (!server->ops->open) 354 return -ENOSYS; 355 356 /* If we're caching, we need to be able to fill in around partial writes. */ 357 if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY) 358 rdwr_for_fscache = 1; 359 360 desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache); 361 362 /********************************************************************* 363 * open flag mapping table: 364 * 365 * POSIX Flag CIFS Disposition 366 * ---------- ---------------- 367 * O_CREAT FILE_OPEN_IF 368 * O_CREAT | O_EXCL FILE_CREATE 369 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF 370 * O_TRUNC FILE_OVERWRITE 371 * none of the above FILE_OPEN 372 * 373 * Note that there is not a direct match between disposition 374 * FILE_SUPERSEDE (ie create whether or not file exists although 375 * O_CREAT | O_TRUNC is similar but truncates the existing 376 * file rather than creating a new file as FILE_SUPERSEDE does 377 * (which uses the attributes / metadata passed in on open call) 378 *? 379 *? O_SYNC is a reasonable match to CIFS writethrough flag 380 *? and the read write flags match reasonably. O_LARGEFILE 381 *? is irrelevant because largefile support is always used 382 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY, 383 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation 384 *********************************************************************/ 385 386 disposition = cifs_get_disposition(f_flags); 387 388 /* BB pass O_SYNC flag through on file attributes .. BB */ 389 390 /* O_SYNC also has bit for O_DSYNC so following check picks up either */ 391 if (f_flags & O_SYNC) 392 create_options |= CREATE_WRITE_THROUGH; 393 394 if (f_flags & O_DIRECT) 395 create_options |= CREATE_NO_BUFFER; 396 397 retry_open: 398 oparms = (struct cifs_open_parms) { 399 .tcon = tcon, 400 .cifs_sb = cifs_sb, 401 .desired_access = desired_access, 402 .create_options = cifs_create_options(cifs_sb, create_options), 403 .disposition = disposition, 404 .path = full_path, 405 .fid = fid, 406 }; 407 408 rc = server->ops->open(xid, &oparms, oplock, buf); 409 if (rc) { 410 if (rc == -EACCES && rdwr_for_fscache == 1) { 411 desired_access = cifs_convert_flags(f_flags, 0); 412 rdwr_for_fscache = 2; 413 goto retry_open; 414 } 415 return rc; 416 } 417 if (rdwr_for_fscache == 2) 418 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); 419 420 /* TODO: Add support for calling posix query info but with passing in fid */ 421 if (tcon->unix_ext) 422 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb, 423 xid); 424 else 425 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, 426 xid, fid); 427 428 if (rc) { 429 server->ops->close(xid, tcon, fid); 430 if (rc == -ESTALE) 431 rc = -EOPENSTALE; 432 } 433 434 return rc; 435 } 436 437 static bool 438 cifs_has_mand_locks(struct cifsInodeInfo *cinode) 439 { 440 struct cifs_fid_locks *cur; 441 bool has_locks = false; 442 443 down_read(&cinode->lock_sem); 444 list_for_each_entry(cur, &cinode->llist, llist) { 445 if (!list_empty(&cur->locks)) { 446 has_locks = true; 447 break; 448 } 449 } 450 up_read(&cinode->lock_sem); 451 return has_locks; 452 } 453 454 void 455 cifs_down_write(struct rw_semaphore *sem) 456 { 457 while (!down_write_trylock(sem)) 458 msleep(10); 459 } 460 461 static void cifsFileInfo_put_work(struct work_struct *work); 462 void serverclose_work(struct work_struct *work); 463 464 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, 465 struct tcon_link *tlink, __u32 oplock, 466 const char *symlink_target) 467 { 468 struct dentry *dentry = file_dentry(file); 469 struct inode *inode = d_inode(dentry); 470 struct cifsInodeInfo *cinode = CIFS_I(inode); 471 struct cifsFileInfo *cfile; 472 struct cifs_fid_locks *fdlocks; 473 struct cifs_tcon *tcon = tlink_tcon(tlink); 474 struct TCP_Server_Info *server = tcon->ses->server; 475 476 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); 477 if (cfile == NULL) 478 return cfile; 479 480 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL); 481 if (!fdlocks) { 482 kfree(cfile); 483 return NULL; 484 } 485 486 if (symlink_target) { 487 cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL); 488 if (!cfile->symlink_target) { 489 kfree(fdlocks); 490 kfree(cfile); 491 return NULL; 492 } 493 } 494 495 INIT_LIST_HEAD(&fdlocks->locks); 496 fdlocks->cfile = cfile; 497 cfile->llist = fdlocks; 498 499 cfile->count = 1; 500 cfile->pid = current->tgid; 501 cfile->uid = current_fsuid(); 502 cfile->dentry = dget(dentry); 503 cfile->f_flags = file->f_flags; 504 cfile->invalidHandle = false; 505 cfile->deferred_close_scheduled = false; 506 cfile->tlink = cifs_get_tlink(tlink); 507 INIT_WORK(&cfile->oplock_break, cifs_oplock_break); 508 INIT_WORK(&cfile->put, cifsFileInfo_put_work); 509 INIT_WORK(&cfile->serverclose, serverclose_work); 510 INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close); 511 mutex_init(&cfile->fh_mutex); 512 spin_lock_init(&cfile->file_info_lock); 513 514 cifs_sb_active(inode->i_sb); 515 516 /* 517 * If the server returned a read oplock and we have mandatory brlocks, 518 * set oplock level to None. 519 */ 520 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) { 521 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); 522 oplock = 0; 523 } 524 525 cifs_down_write(&cinode->lock_sem); 526 list_add(&fdlocks->llist, &cinode->llist); 527 up_write(&cinode->lock_sem); 528 529 spin_lock(&tcon->open_file_lock); 530 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock) 531 oplock = fid->pending_open->oplock; 532 list_del(&fid->pending_open->olist); 533 534 fid->purge_cache = false; 535 server->ops->set_fid(cfile, fid, oplock); 536 537 list_add(&cfile->tlist, &tcon->openFileList); 538 atomic_inc(&tcon->num_local_opens); 539 540 /* if readable file instance put first in list*/ 541 spin_lock(&cinode->open_file_lock); 542 if (file->f_mode & FMODE_READ) 543 list_add(&cfile->flist, &cinode->openFileList); 544 else 545 list_add_tail(&cfile->flist, &cinode->openFileList); 546 spin_unlock(&cinode->open_file_lock); 547 spin_unlock(&tcon->open_file_lock); 548 549 if (fid->purge_cache) 550 cifs_zap_mapping(inode); 551 552 file->private_data = cfile; 553 return cfile; 554 } 555 556 struct cifsFileInfo * 557 cifsFileInfo_get(struct cifsFileInfo *cifs_file) 558 { 559 spin_lock(&cifs_file->file_info_lock); 560 cifsFileInfo_get_locked(cifs_file); 561 spin_unlock(&cifs_file->file_info_lock); 562 return cifs_file; 563 } 564 565 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file) 566 { 567 struct inode *inode = d_inode(cifs_file->dentry); 568 struct cifsInodeInfo *cifsi = CIFS_I(inode); 569 struct cifsLockInfo *li, *tmp; 570 struct super_block *sb = inode->i_sb; 571 572 /* 573 * Delete any outstanding lock records. We'll lose them when the file 574 * is closed anyway. 575 */ 576 cifs_down_write(&cifsi->lock_sem); 577 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) { 578 list_del(&li->llist); 579 cifs_del_lock_waiters(li); 580 kfree(li); 581 } 582 list_del(&cifs_file->llist->llist); 583 kfree(cifs_file->llist); 584 up_write(&cifsi->lock_sem); 585 586 cifs_put_tlink(cifs_file->tlink); 587 dput(cifs_file->dentry); 588 cifs_sb_deactive(sb); 589 kfree(cifs_file->symlink_target); 590 kfree(cifs_file); 591 } 592 593 static void cifsFileInfo_put_work(struct work_struct *work) 594 { 595 struct cifsFileInfo *cifs_file = container_of(work, 596 struct cifsFileInfo, put); 597 598 cifsFileInfo_put_final(cifs_file); 599 } 600 601 void serverclose_work(struct work_struct *work) 602 { 603 struct cifsFileInfo *cifs_file = container_of(work, 604 struct cifsFileInfo, serverclose); 605 606 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); 607 608 struct TCP_Server_Info *server = tcon->ses->server; 609 int rc = 0; 610 int retries = 0; 611 int MAX_RETRIES = 4; 612 613 do { 614 if (server->ops->close_getattr) 615 rc = server->ops->close_getattr(0, tcon, cifs_file); 616 else if (server->ops->close) 617 rc = server->ops->close(0, tcon, &cifs_file->fid); 618 619 if (rc == -EBUSY || rc == -EAGAIN) { 620 retries++; 621 msleep(250); 622 } 623 } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES) 624 ); 625 626 if (retries == MAX_RETRIES) 627 pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES); 628 629 if (cifs_file->offload) 630 queue_work(fileinfo_put_wq, &cifs_file->put); 631 else 632 cifsFileInfo_put_final(cifs_file); 633 } 634 635 /** 636 * cifsFileInfo_put - release a reference of file priv data 637 * 638 * Always potentially wait for oplock handler. See _cifsFileInfo_put(). 639 * 640 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file 641 */ 642 void cifsFileInfo_put(struct cifsFileInfo *cifs_file) 643 { 644 _cifsFileInfo_put(cifs_file, true, true); 645 } 646 647 /** 648 * _cifsFileInfo_put - release a reference of file priv data 649 * 650 * This may involve closing the filehandle @cifs_file out on the 651 * server. Must be called without holding tcon->open_file_lock, 652 * cinode->open_file_lock and cifs_file->file_info_lock. 653 * 654 * If @wait_for_oplock_handler is true and we are releasing the last 655 * reference, wait for any running oplock break handler of the file 656 * and cancel any pending one. 657 * 658 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file 659 * @wait_oplock_handler: must be false if called from oplock_break_handler 660 * @offload: not offloaded on close and oplock breaks 661 * 662 */ 663 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, 664 bool wait_oplock_handler, bool offload) 665 { 666 struct inode *inode = d_inode(cifs_file->dentry); 667 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); 668 struct TCP_Server_Info *server = tcon->ses->server; 669 struct cifsInodeInfo *cifsi = CIFS_I(inode); 670 struct super_block *sb = inode->i_sb; 671 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 672 struct cifs_fid fid = {}; 673 struct cifs_pending_open open; 674 bool oplock_break_cancelled; 675 bool serverclose_offloaded = false; 676 677 spin_lock(&tcon->open_file_lock); 678 spin_lock(&cifsi->open_file_lock); 679 spin_lock(&cifs_file->file_info_lock); 680 681 cifs_file->offload = offload; 682 if (--cifs_file->count > 0) { 683 spin_unlock(&cifs_file->file_info_lock); 684 spin_unlock(&cifsi->open_file_lock); 685 spin_unlock(&tcon->open_file_lock); 686 return; 687 } 688 spin_unlock(&cifs_file->file_info_lock); 689 690 if (server->ops->get_lease_key) 691 server->ops->get_lease_key(inode, &fid); 692 693 /* store open in pending opens to make sure we don't miss lease break */ 694 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open); 695 696 /* remove it from the lists */ 697 list_del(&cifs_file->flist); 698 list_del(&cifs_file->tlist); 699 atomic_dec(&tcon->num_local_opens); 700 701 if (list_empty(&cifsi->openFileList)) { 702 cifs_dbg(FYI, "closing last open instance for inode %p\n", 703 d_inode(cifs_file->dentry)); 704 /* 705 * In strict cache mode we need invalidate mapping on the last 706 * close because it may cause a error when we open this file 707 * again and get at least level II oplock. 708 */ 709 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) 710 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags); 711 cifs_set_oplock_level(cifsi, 0); 712 } 713 714 spin_unlock(&cifsi->open_file_lock); 715 spin_unlock(&tcon->open_file_lock); 716 717 oplock_break_cancelled = wait_oplock_handler ? 718 cancel_work_sync(&cifs_file->oplock_break) : false; 719 720 if (!tcon->need_reconnect && !cifs_file->invalidHandle) { 721 struct TCP_Server_Info *server = tcon->ses->server; 722 unsigned int xid; 723 int rc = 0; 724 725 xid = get_xid(); 726 if (server->ops->close_getattr) 727 rc = server->ops->close_getattr(xid, tcon, cifs_file); 728 else if (server->ops->close) 729 rc = server->ops->close(xid, tcon, &cifs_file->fid); 730 _free_xid(xid); 731 732 if (rc == -EBUSY || rc == -EAGAIN) { 733 // Server close failed, hence offloading it as an async op 734 queue_work(serverclose_wq, &cifs_file->serverclose); 735 serverclose_offloaded = true; 736 } 737 } 738 739 if (oplock_break_cancelled) 740 cifs_done_oplock_break(cifsi); 741 742 cifs_del_pending_open(&open); 743 744 // if serverclose has been offloaded to wq (on failure), it will 745 // handle offloading put as well. If serverclose not offloaded, 746 // we need to handle offloading put here. 747 if (!serverclose_offloaded) { 748 if (offload) 749 queue_work(fileinfo_put_wq, &cifs_file->put); 750 else 751 cifsFileInfo_put_final(cifs_file); 752 } 753 } 754 755 int cifs_open(struct inode *inode, struct file *file) 756 757 { 758 int rc = -EACCES; 759 unsigned int xid; 760 __u32 oplock; 761 struct cifs_sb_info *cifs_sb; 762 struct TCP_Server_Info *server; 763 struct cifs_tcon *tcon; 764 struct tcon_link *tlink; 765 struct cifsFileInfo *cfile = NULL; 766 void *page; 767 const char *full_path; 768 bool posix_open_ok = false; 769 struct cifs_fid fid = {}; 770 struct cifs_pending_open open; 771 struct cifs_open_info_data data = {}; 772 773 xid = get_xid(); 774 775 cifs_sb = CIFS_SB(inode->i_sb); 776 if (unlikely(cifs_forced_shutdown(cifs_sb))) { 777 free_xid(xid); 778 return -EIO; 779 } 780 781 tlink = cifs_sb_tlink(cifs_sb); 782 if (IS_ERR(tlink)) { 783 free_xid(xid); 784 return PTR_ERR(tlink); 785 } 786 tcon = tlink_tcon(tlink); 787 server = tcon->ses->server; 788 789 page = alloc_dentry_path(); 790 full_path = build_path_from_dentry(file_dentry(file), page); 791 if (IS_ERR(full_path)) { 792 rc = PTR_ERR(full_path); 793 goto out; 794 } 795 796 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n", 797 inode, file->f_flags, full_path); 798 799 if (file->f_flags & O_DIRECT && 800 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { 801 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) 802 file->f_op = &cifs_file_direct_nobrl_ops; 803 else 804 file->f_op = &cifs_file_direct_ops; 805 } 806 807 /* Get the cached handle as SMB2 close is deferred */ 808 rc = cifs_get_readable_path(tcon, full_path, &cfile); 809 if (rc == 0) { 810 if (file->f_flags == cfile->f_flags) { 811 file->private_data = cfile; 812 spin_lock(&CIFS_I(inode)->deferred_lock); 813 cifs_del_deferred_close(cfile); 814 spin_unlock(&CIFS_I(inode)->deferred_lock); 815 goto use_cache; 816 } else { 817 _cifsFileInfo_put(cfile, true, false); 818 } 819 } 820 821 if (server->oplocks) 822 oplock = REQ_OPLOCK; 823 else 824 oplock = 0; 825 826 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 827 if (!tcon->broken_posix_open && tcon->unix_ext && 828 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & 829 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 830 /* can not refresh inode info since size could be stale */ 831 rc = cifs_posix_open(full_path, &inode, inode->i_sb, 832 cifs_sb->ctx->file_mode /* ignored */, 833 file->f_flags, &oplock, &fid.netfid, xid); 834 if (rc == 0) { 835 cifs_dbg(FYI, "posix open succeeded\n"); 836 posix_open_ok = true; 837 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 838 if (tcon->ses->serverNOS) 839 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n", 840 tcon->ses->ip_addr, 841 tcon->ses->serverNOS); 842 tcon->broken_posix_open = true; 843 } else if ((rc != -EIO) && (rc != -EREMOTE) && 844 (rc != -EOPNOTSUPP)) /* path not found or net err */ 845 goto out; 846 /* 847 * Else fallthrough to retry open the old way on network i/o 848 * or DFS errors. 849 */ 850 } 851 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 852 853 if (server->ops->get_lease_key) 854 server->ops->get_lease_key(inode, &fid); 855 856 cifs_add_pending_open(&fid, tlink, &open); 857 858 if (!posix_open_ok) { 859 if (server->ops->get_lease_key) 860 server->ops->get_lease_key(inode, &fid); 861 862 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid, 863 xid, &data); 864 if (rc) { 865 cifs_del_pending_open(&open); 866 goto out; 867 } 868 } 869 870 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target); 871 if (cfile == NULL) { 872 if (server->ops->close) 873 server->ops->close(xid, tcon, &fid); 874 cifs_del_pending_open(&open); 875 rc = -ENOMEM; 876 goto out; 877 } 878 879 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 880 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) { 881 /* 882 * Time to set mode which we can not set earlier due to 883 * problems creating new read-only files. 884 */ 885 struct cifs_unix_set_info_args args = { 886 .mode = inode->i_mode, 887 .uid = INVALID_UID, /* no change */ 888 .gid = INVALID_GID, /* no change */ 889 .ctime = NO_CHANGE_64, 890 .atime = NO_CHANGE_64, 891 .mtime = NO_CHANGE_64, 892 .device = 0, 893 }; 894 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid, 895 cfile->pid); 896 } 897 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 898 899 use_cache: 900 fscache_use_cookie(cifs_inode_cookie(file_inode(file)), 901 file->f_mode & FMODE_WRITE); 902 if (!(file->f_flags & O_DIRECT)) 903 goto out; 904 if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY) 905 goto out; 906 cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE); 907 908 out: 909 free_dentry_path(page); 910 free_xid(xid); 911 cifs_put_tlink(tlink); 912 cifs_free_open_info(&data); 913 return rc; 914 } 915 916 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 917 static int cifs_push_posix_locks(struct cifsFileInfo *cfile); 918 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 919 920 /* 921 * Try to reacquire byte range locks that were released when session 922 * to server was lost. 923 */ 924 static int 925 cifs_relock_file(struct cifsFileInfo *cfile) 926 { 927 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 928 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 929 int rc = 0; 930 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 931 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 932 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 933 934 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING); 935 if (cinode->can_cache_brlcks) { 936 /* can cache locks - no need to relock */ 937 up_read(&cinode->lock_sem); 938 return rc; 939 } 940 941 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 942 if (cap_unix(tcon->ses) && 943 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 944 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 945 rc = cifs_push_posix_locks(cfile); 946 else 947 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 948 rc = tcon->ses->server->ops->push_mand_locks(cfile); 949 950 up_read(&cinode->lock_sem); 951 return rc; 952 } 953 954 static int 955 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) 956 { 957 int rc = -EACCES; 958 unsigned int xid; 959 __u32 oplock; 960 struct cifs_sb_info *cifs_sb; 961 struct cifs_tcon *tcon; 962 struct TCP_Server_Info *server; 963 struct cifsInodeInfo *cinode; 964 struct inode *inode; 965 void *page; 966 const char *full_path; 967 int desired_access; 968 int disposition = FILE_OPEN; 969 int create_options = CREATE_NOT_DIR; 970 struct cifs_open_parms oparms; 971 int rdwr_for_fscache = 0; 972 973 xid = get_xid(); 974 mutex_lock(&cfile->fh_mutex); 975 if (!cfile->invalidHandle) { 976 mutex_unlock(&cfile->fh_mutex); 977 free_xid(xid); 978 return 0; 979 } 980 981 inode = d_inode(cfile->dentry); 982 cifs_sb = CIFS_SB(inode->i_sb); 983 tcon = tlink_tcon(cfile->tlink); 984 server = tcon->ses->server; 985 986 /* 987 * Can not grab rename sem here because various ops, including those 988 * that already have the rename sem can end up causing writepage to get 989 * called and if the server was down that means we end up here, and we 990 * can never tell if the caller already has the rename_sem. 991 */ 992 page = alloc_dentry_path(); 993 full_path = build_path_from_dentry(cfile->dentry, page); 994 if (IS_ERR(full_path)) { 995 mutex_unlock(&cfile->fh_mutex); 996 free_dentry_path(page); 997 free_xid(xid); 998 return PTR_ERR(full_path); 999 } 1000 1001 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n", 1002 inode, cfile->f_flags, full_path); 1003 1004 if (tcon->ses->server->oplocks) 1005 oplock = REQ_OPLOCK; 1006 else 1007 oplock = 0; 1008 1009 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1010 if (tcon->unix_ext && cap_unix(tcon->ses) && 1011 (CIFS_UNIX_POSIX_PATH_OPS_CAP & 1012 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 1013 /* 1014 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the 1015 * original open. Must mask them off for a reopen. 1016 */ 1017 unsigned int oflags = cfile->f_flags & 1018 ~(O_CREAT | O_EXCL | O_TRUNC); 1019 1020 rc = cifs_posix_open(full_path, NULL, inode->i_sb, 1021 cifs_sb->ctx->file_mode /* ignored */, 1022 oflags, &oplock, &cfile->fid.netfid, xid); 1023 if (rc == 0) { 1024 cifs_dbg(FYI, "posix reopen succeeded\n"); 1025 oparms.reconnect = true; 1026 goto reopen_success; 1027 } 1028 /* 1029 * fallthrough to retry open the old way on errors, especially 1030 * in the reconnect path it is important to retry hard 1031 */ 1032 } 1033 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1034 1035 /* If we're caching, we need to be able to fill in around partial writes. */ 1036 if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY) 1037 rdwr_for_fscache = 1; 1038 1039 desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache); 1040 1041 /* O_SYNC also has bit for O_DSYNC so following check picks up either */ 1042 if (cfile->f_flags & O_SYNC) 1043 create_options |= CREATE_WRITE_THROUGH; 1044 1045 if (cfile->f_flags & O_DIRECT) 1046 create_options |= CREATE_NO_BUFFER; 1047 1048 if (server->ops->get_lease_key) 1049 server->ops->get_lease_key(inode, &cfile->fid); 1050 1051 retry_open: 1052 oparms = (struct cifs_open_parms) { 1053 .tcon = tcon, 1054 .cifs_sb = cifs_sb, 1055 .desired_access = desired_access, 1056 .create_options = cifs_create_options(cifs_sb, create_options), 1057 .disposition = disposition, 1058 .path = full_path, 1059 .fid = &cfile->fid, 1060 .reconnect = true, 1061 }; 1062 1063 /* 1064 * Can not refresh inode by passing in file_info buf to be returned by 1065 * ops->open and then calling get_inode_info with returned buf since 1066 * file might have write behind data that needs to be flushed and server 1067 * version of file size can be stale. If we knew for sure that inode was 1068 * not dirty locally we could do this. 1069 */ 1070 rc = server->ops->open(xid, &oparms, &oplock, NULL); 1071 if (rc == -ENOENT && oparms.reconnect == false) { 1072 /* durable handle timeout is expired - open the file again */ 1073 rc = server->ops->open(xid, &oparms, &oplock, NULL); 1074 /* indicate that we need to relock the file */ 1075 oparms.reconnect = true; 1076 } 1077 if (rc == -EACCES && rdwr_for_fscache == 1) { 1078 desired_access = cifs_convert_flags(cfile->f_flags, 0); 1079 rdwr_for_fscache = 2; 1080 goto retry_open; 1081 } 1082 1083 if (rc) { 1084 mutex_unlock(&cfile->fh_mutex); 1085 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc); 1086 cifs_dbg(FYI, "oplock: %d\n", oplock); 1087 goto reopen_error_exit; 1088 } 1089 1090 if (rdwr_for_fscache == 2) 1091 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE); 1092 1093 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1094 reopen_success: 1095 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1096 cfile->invalidHandle = false; 1097 mutex_unlock(&cfile->fh_mutex); 1098 cinode = CIFS_I(inode); 1099 1100 if (can_flush) { 1101 rc = filemap_write_and_wait(inode->i_mapping); 1102 if (!is_interrupt_error(rc)) 1103 mapping_set_error(inode->i_mapping, rc); 1104 1105 if (tcon->posix_extensions) { 1106 rc = smb311_posix_get_inode_info(&inode, full_path, 1107 NULL, inode->i_sb, xid); 1108 } else if (tcon->unix_ext) { 1109 rc = cifs_get_inode_info_unix(&inode, full_path, 1110 inode->i_sb, xid); 1111 } else { 1112 rc = cifs_get_inode_info(&inode, full_path, NULL, 1113 inode->i_sb, xid, NULL); 1114 } 1115 } 1116 /* 1117 * Else we are writing out data to server already and could deadlock if 1118 * we tried to flush data, and since we do not know if we have data that 1119 * would invalidate the current end of file on the server we can not go 1120 * to the server to get the new inode info. 1121 */ 1122 1123 /* 1124 * If the server returned a read oplock and we have mandatory brlocks, 1125 * set oplock level to None. 1126 */ 1127 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) { 1128 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); 1129 oplock = 0; 1130 } 1131 1132 server->ops->set_fid(cfile, &cfile->fid, oplock); 1133 if (oparms.reconnect) 1134 cifs_relock_file(cfile); 1135 1136 reopen_error_exit: 1137 free_dentry_path(page); 1138 free_xid(xid); 1139 return rc; 1140 } 1141 1142 void smb2_deferred_work_close(struct work_struct *work) 1143 { 1144 struct cifsFileInfo *cfile = container_of(work, 1145 struct cifsFileInfo, deferred.work); 1146 1147 spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); 1148 cifs_del_deferred_close(cfile); 1149 cfile->deferred_close_scheduled = false; 1150 spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); 1151 _cifsFileInfo_put(cfile, true, false); 1152 } 1153 1154 int cifs_close(struct inode *inode, struct file *file) 1155 { 1156 struct cifsFileInfo *cfile; 1157 struct cifsInodeInfo *cinode = CIFS_I(inode); 1158 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1159 struct cifs_deferred_close *dclose; 1160 1161 cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE); 1162 1163 if (file->private_data != NULL) { 1164 cfile = file->private_data; 1165 file->private_data = NULL; 1166 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL); 1167 if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG) 1168 && cinode->lease_granted && 1169 !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) && 1170 dclose) { 1171 if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) { 1172 inode_set_mtime_to_ts(inode, 1173 inode_set_ctime_current(inode)); 1174 } 1175 spin_lock(&cinode->deferred_lock); 1176 cifs_add_deferred_close(cfile, dclose); 1177 if (cfile->deferred_close_scheduled && 1178 delayed_work_pending(&cfile->deferred)) { 1179 /* 1180 * If there is no pending work, mod_delayed_work queues new work. 1181 * So, Increase the ref count to avoid use-after-free. 1182 */ 1183 if (!mod_delayed_work(deferredclose_wq, 1184 &cfile->deferred, cifs_sb->ctx->closetimeo)) 1185 cifsFileInfo_get(cfile); 1186 } else { 1187 /* Deferred close for files */ 1188 queue_delayed_work(deferredclose_wq, 1189 &cfile->deferred, cifs_sb->ctx->closetimeo); 1190 cfile->deferred_close_scheduled = true; 1191 spin_unlock(&cinode->deferred_lock); 1192 return 0; 1193 } 1194 spin_unlock(&cinode->deferred_lock); 1195 _cifsFileInfo_put(cfile, true, false); 1196 } else { 1197 _cifsFileInfo_put(cfile, true, false); 1198 kfree(dclose); 1199 } 1200 } 1201 1202 /* return code from the ->release op is always ignored */ 1203 return 0; 1204 } 1205 1206 void 1207 cifs_reopen_persistent_handles(struct cifs_tcon *tcon) 1208 { 1209 struct cifsFileInfo *open_file, *tmp; 1210 struct list_head tmp_list; 1211 1212 if (!tcon->use_persistent || !tcon->need_reopen_files) 1213 return; 1214 1215 tcon->need_reopen_files = false; 1216 1217 cifs_dbg(FYI, "Reopen persistent handles\n"); 1218 INIT_LIST_HEAD(&tmp_list); 1219 1220 /* list all files open on tree connection, reopen resilient handles */ 1221 spin_lock(&tcon->open_file_lock); 1222 list_for_each_entry(open_file, &tcon->openFileList, tlist) { 1223 if (!open_file->invalidHandle) 1224 continue; 1225 cifsFileInfo_get(open_file); 1226 list_add_tail(&open_file->rlist, &tmp_list); 1227 } 1228 spin_unlock(&tcon->open_file_lock); 1229 1230 list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) { 1231 if (cifs_reopen_file(open_file, false /* do not flush */)) 1232 tcon->need_reopen_files = true; 1233 list_del_init(&open_file->rlist); 1234 cifsFileInfo_put(open_file); 1235 } 1236 } 1237 1238 int cifs_closedir(struct inode *inode, struct file *file) 1239 { 1240 int rc = 0; 1241 unsigned int xid; 1242 struct cifsFileInfo *cfile = file->private_data; 1243 struct cifs_tcon *tcon; 1244 struct TCP_Server_Info *server; 1245 char *buf; 1246 1247 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode); 1248 1249 if (cfile == NULL) 1250 return rc; 1251 1252 xid = get_xid(); 1253 tcon = tlink_tcon(cfile->tlink); 1254 server = tcon->ses->server; 1255 1256 cifs_dbg(FYI, "Freeing private data in close dir\n"); 1257 spin_lock(&cfile->file_info_lock); 1258 if (server->ops->dir_needs_close(cfile)) { 1259 cfile->invalidHandle = true; 1260 spin_unlock(&cfile->file_info_lock); 1261 if (server->ops->close_dir) 1262 rc = server->ops->close_dir(xid, tcon, &cfile->fid); 1263 else 1264 rc = -ENOSYS; 1265 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc); 1266 /* not much we can do if it fails anyway, ignore rc */ 1267 rc = 0; 1268 } else 1269 spin_unlock(&cfile->file_info_lock); 1270 1271 buf = cfile->srch_inf.ntwrk_buf_start; 1272 if (buf) { 1273 cifs_dbg(FYI, "closedir free smb buf in srch struct\n"); 1274 cfile->srch_inf.ntwrk_buf_start = NULL; 1275 if (cfile->srch_inf.smallBuf) 1276 cifs_small_buf_release(buf); 1277 else 1278 cifs_buf_release(buf); 1279 } 1280 1281 cifs_put_tlink(cfile->tlink); 1282 kfree(file->private_data); 1283 file->private_data = NULL; 1284 /* BB can we lock the filestruct while this is going on? */ 1285 free_xid(xid); 1286 return rc; 1287 } 1288 1289 static struct cifsLockInfo * 1290 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags) 1291 { 1292 struct cifsLockInfo *lock = 1293 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL); 1294 if (!lock) 1295 return lock; 1296 lock->offset = offset; 1297 lock->length = length; 1298 lock->type = type; 1299 lock->pid = current->tgid; 1300 lock->flags = flags; 1301 INIT_LIST_HEAD(&lock->blist); 1302 init_waitqueue_head(&lock->block_q); 1303 return lock; 1304 } 1305 1306 void 1307 cifs_del_lock_waiters(struct cifsLockInfo *lock) 1308 { 1309 struct cifsLockInfo *li, *tmp; 1310 list_for_each_entry_safe(li, tmp, &lock->blist, blist) { 1311 list_del_init(&li->blist); 1312 wake_up(&li->block_q); 1313 } 1314 } 1315 1316 #define CIFS_LOCK_OP 0 1317 #define CIFS_READ_OP 1 1318 #define CIFS_WRITE_OP 2 1319 1320 /* @rw_check : 0 - no op, 1 - read, 2 - write */ 1321 static bool 1322 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset, 1323 __u64 length, __u8 type, __u16 flags, 1324 struct cifsFileInfo *cfile, 1325 struct cifsLockInfo **conf_lock, int rw_check) 1326 { 1327 struct cifsLockInfo *li; 1328 struct cifsFileInfo *cur_cfile = fdlocks->cfile; 1329 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 1330 1331 list_for_each_entry(li, &fdlocks->locks, llist) { 1332 if (offset + length <= li->offset || 1333 offset >= li->offset + li->length) 1334 continue; 1335 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid && 1336 server->ops->compare_fids(cfile, cur_cfile)) { 1337 /* shared lock prevents write op through the same fid */ 1338 if (!(li->type & server->vals->shared_lock_type) || 1339 rw_check != CIFS_WRITE_OP) 1340 continue; 1341 } 1342 if ((type & server->vals->shared_lock_type) && 1343 ((server->ops->compare_fids(cfile, cur_cfile) && 1344 current->tgid == li->pid) || type == li->type)) 1345 continue; 1346 if (rw_check == CIFS_LOCK_OP && 1347 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) && 1348 server->ops->compare_fids(cfile, cur_cfile)) 1349 continue; 1350 if (conf_lock) 1351 *conf_lock = li; 1352 return true; 1353 } 1354 return false; 1355 } 1356 1357 bool 1358 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length, 1359 __u8 type, __u16 flags, 1360 struct cifsLockInfo **conf_lock, int rw_check) 1361 { 1362 bool rc = false; 1363 struct cifs_fid_locks *cur; 1364 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1365 1366 list_for_each_entry(cur, &cinode->llist, llist) { 1367 rc = cifs_find_fid_lock_conflict(cur, offset, length, type, 1368 flags, cfile, conf_lock, 1369 rw_check); 1370 if (rc) 1371 break; 1372 } 1373 1374 return rc; 1375 } 1376 1377 /* 1378 * Check if there is another lock that prevents us to set the lock (mandatory 1379 * style). If such a lock exists, update the flock structure with its 1380 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks 1381 * or leave it the same if we can't. Returns 0 if we don't need to request to 1382 * the server or 1 otherwise. 1383 */ 1384 static int 1385 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length, 1386 __u8 type, struct file_lock *flock) 1387 { 1388 int rc = 0; 1389 struct cifsLockInfo *conf_lock; 1390 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1391 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 1392 bool exist; 1393 1394 down_read(&cinode->lock_sem); 1395 1396 exist = cifs_find_lock_conflict(cfile, offset, length, type, 1397 flock->fl_flags, &conf_lock, 1398 CIFS_LOCK_OP); 1399 if (exist) { 1400 flock->fl_start = conf_lock->offset; 1401 flock->fl_end = conf_lock->offset + conf_lock->length - 1; 1402 flock->fl_pid = conf_lock->pid; 1403 if (conf_lock->type & server->vals->shared_lock_type) 1404 flock->fl_type = F_RDLCK; 1405 else 1406 flock->fl_type = F_WRLCK; 1407 } else if (!cinode->can_cache_brlcks) 1408 rc = 1; 1409 else 1410 flock->fl_type = F_UNLCK; 1411 1412 up_read(&cinode->lock_sem); 1413 return rc; 1414 } 1415 1416 static void 1417 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock) 1418 { 1419 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1420 cifs_down_write(&cinode->lock_sem); 1421 list_add_tail(&lock->llist, &cfile->llist->locks); 1422 up_write(&cinode->lock_sem); 1423 } 1424 1425 /* 1426 * Set the byte-range lock (mandatory style). Returns: 1427 * 1) 0, if we set the lock and don't need to request to the server; 1428 * 2) 1, if no locks prevent us but we need to request to the server; 1429 * 3) -EACCES, if there is a lock that prevents us and wait is false. 1430 */ 1431 static int 1432 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock, 1433 bool wait) 1434 { 1435 struct cifsLockInfo *conf_lock; 1436 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1437 bool exist; 1438 int rc = 0; 1439 1440 try_again: 1441 exist = false; 1442 cifs_down_write(&cinode->lock_sem); 1443 1444 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, 1445 lock->type, lock->flags, &conf_lock, 1446 CIFS_LOCK_OP); 1447 if (!exist && cinode->can_cache_brlcks) { 1448 list_add_tail(&lock->llist, &cfile->llist->locks); 1449 up_write(&cinode->lock_sem); 1450 return rc; 1451 } 1452 1453 if (!exist) 1454 rc = 1; 1455 else if (!wait) 1456 rc = -EACCES; 1457 else { 1458 list_add_tail(&lock->blist, &conf_lock->blist); 1459 up_write(&cinode->lock_sem); 1460 rc = wait_event_interruptible(lock->block_q, 1461 (lock->blist.prev == &lock->blist) && 1462 (lock->blist.next == &lock->blist)); 1463 if (!rc) 1464 goto try_again; 1465 cifs_down_write(&cinode->lock_sem); 1466 list_del_init(&lock->blist); 1467 } 1468 1469 up_write(&cinode->lock_sem); 1470 return rc; 1471 } 1472 1473 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1474 /* 1475 * Check if there is another lock that prevents us to set the lock (posix 1476 * style). If such a lock exists, update the flock structure with its 1477 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks 1478 * or leave it the same if we can't. Returns 0 if we don't need to request to 1479 * the server or 1 otherwise. 1480 */ 1481 static int 1482 cifs_posix_lock_test(struct file *file, struct file_lock *flock) 1483 { 1484 int rc = 0; 1485 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); 1486 unsigned char saved_type = flock->fl_type; 1487 1488 if ((flock->fl_flags & FL_POSIX) == 0) 1489 return 1; 1490 1491 down_read(&cinode->lock_sem); 1492 posix_test_lock(file, flock); 1493 1494 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) { 1495 flock->fl_type = saved_type; 1496 rc = 1; 1497 } 1498 1499 up_read(&cinode->lock_sem); 1500 return rc; 1501 } 1502 1503 /* 1504 * Set the byte-range lock (posix style). Returns: 1505 * 1) <0, if the error occurs while setting the lock; 1506 * 2) 0, if we set the lock and don't need to request to the server; 1507 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock; 1508 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server. 1509 */ 1510 static int 1511 cifs_posix_lock_set(struct file *file, struct file_lock *flock) 1512 { 1513 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); 1514 int rc = FILE_LOCK_DEFERRED + 1; 1515 1516 if ((flock->fl_flags & FL_POSIX) == 0) 1517 return rc; 1518 1519 cifs_down_write(&cinode->lock_sem); 1520 if (!cinode->can_cache_brlcks) { 1521 up_write(&cinode->lock_sem); 1522 return rc; 1523 } 1524 1525 rc = posix_lock_file(file, flock, NULL); 1526 up_write(&cinode->lock_sem); 1527 return rc; 1528 } 1529 1530 int 1531 cifs_push_mandatory_locks(struct cifsFileInfo *cfile) 1532 { 1533 unsigned int xid; 1534 int rc = 0, stored_rc; 1535 struct cifsLockInfo *li, *tmp; 1536 struct cifs_tcon *tcon; 1537 unsigned int num, max_num, max_buf; 1538 LOCKING_ANDX_RANGE *buf, *cur; 1539 static const int types[] = { 1540 LOCKING_ANDX_LARGE_FILES, 1541 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES 1542 }; 1543 int i; 1544 1545 xid = get_xid(); 1546 tcon = tlink_tcon(cfile->tlink); 1547 1548 /* 1549 * Accessing maxBuf is racy with cifs_reconnect - need to store value 1550 * and check it before using. 1551 */ 1552 max_buf = tcon->ses->server->maxBuf; 1553 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) { 1554 free_xid(xid); 1555 return -EINVAL; 1556 } 1557 1558 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > 1559 PAGE_SIZE); 1560 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), 1561 PAGE_SIZE); 1562 max_num = (max_buf - sizeof(struct smb_hdr)) / 1563 sizeof(LOCKING_ANDX_RANGE); 1564 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 1565 if (!buf) { 1566 free_xid(xid); 1567 return -ENOMEM; 1568 } 1569 1570 for (i = 0; i < 2; i++) { 1571 cur = buf; 1572 num = 0; 1573 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { 1574 if (li->type != types[i]) 1575 continue; 1576 cur->Pid = cpu_to_le16(li->pid); 1577 cur->LengthLow = cpu_to_le32((u32)li->length); 1578 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); 1579 cur->OffsetLow = cpu_to_le32((u32)li->offset); 1580 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); 1581 if (++num == max_num) { 1582 stored_rc = cifs_lockv(xid, tcon, 1583 cfile->fid.netfid, 1584 (__u8)li->type, 0, num, 1585 buf); 1586 if (stored_rc) 1587 rc = stored_rc; 1588 cur = buf; 1589 num = 0; 1590 } else 1591 cur++; 1592 } 1593 1594 if (num) { 1595 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, 1596 (__u8)types[i], 0, num, buf); 1597 if (stored_rc) 1598 rc = stored_rc; 1599 } 1600 } 1601 1602 kfree(buf); 1603 free_xid(xid); 1604 return rc; 1605 } 1606 1607 static __u32 1608 hash_lockowner(fl_owner_t owner) 1609 { 1610 return cifs_lock_secret ^ hash32_ptr((const void *)owner); 1611 } 1612 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1613 1614 struct lock_to_push { 1615 struct list_head llist; 1616 __u64 offset; 1617 __u64 length; 1618 __u32 pid; 1619 __u16 netfid; 1620 __u8 type; 1621 }; 1622 1623 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1624 static int 1625 cifs_push_posix_locks(struct cifsFileInfo *cfile) 1626 { 1627 struct inode *inode = d_inode(cfile->dentry); 1628 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1629 struct file_lock *flock; 1630 struct file_lock_context *flctx = locks_inode_context(inode); 1631 unsigned int count = 0, i; 1632 int rc = 0, xid, type; 1633 struct list_head locks_to_send, *el; 1634 struct lock_to_push *lck, *tmp; 1635 __u64 length; 1636 1637 xid = get_xid(); 1638 1639 if (!flctx) 1640 goto out; 1641 1642 spin_lock(&flctx->flc_lock); 1643 list_for_each(el, &flctx->flc_posix) { 1644 count++; 1645 } 1646 spin_unlock(&flctx->flc_lock); 1647 1648 INIT_LIST_HEAD(&locks_to_send); 1649 1650 /* 1651 * Allocating count locks is enough because no FL_POSIX locks can be 1652 * added to the list while we are holding cinode->lock_sem that 1653 * protects locking operations of this inode. 1654 */ 1655 for (i = 0; i < count; i++) { 1656 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); 1657 if (!lck) { 1658 rc = -ENOMEM; 1659 goto err_out; 1660 } 1661 list_add_tail(&lck->llist, &locks_to_send); 1662 } 1663 1664 el = locks_to_send.next; 1665 spin_lock(&flctx->flc_lock); 1666 list_for_each_entry(flock, &flctx->flc_posix, fl_list) { 1667 if (el == &locks_to_send) { 1668 /* 1669 * The list ended. We don't have enough allocated 1670 * structures - something is really wrong. 1671 */ 1672 cifs_dbg(VFS, "Can't push all brlocks!\n"); 1673 break; 1674 } 1675 length = cifs_flock_len(flock); 1676 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK) 1677 type = CIFS_RDLCK; 1678 else 1679 type = CIFS_WRLCK; 1680 lck = list_entry(el, struct lock_to_push, llist); 1681 lck->pid = hash_lockowner(flock->fl_owner); 1682 lck->netfid = cfile->fid.netfid; 1683 lck->length = length; 1684 lck->type = type; 1685 lck->offset = flock->fl_start; 1686 } 1687 spin_unlock(&flctx->flc_lock); 1688 1689 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { 1690 int stored_rc; 1691 1692 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid, 1693 lck->offset, lck->length, NULL, 1694 lck->type, 0); 1695 if (stored_rc) 1696 rc = stored_rc; 1697 list_del(&lck->llist); 1698 kfree(lck); 1699 } 1700 1701 out: 1702 free_xid(xid); 1703 return rc; 1704 err_out: 1705 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { 1706 list_del(&lck->llist); 1707 kfree(lck); 1708 } 1709 goto out; 1710 } 1711 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1712 1713 static int 1714 cifs_push_locks(struct cifsFileInfo *cfile) 1715 { 1716 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1717 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1718 int rc = 0; 1719 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1720 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 1721 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1722 1723 /* we are going to update can_cache_brlcks here - need a write access */ 1724 cifs_down_write(&cinode->lock_sem); 1725 if (!cinode->can_cache_brlcks) { 1726 up_write(&cinode->lock_sem); 1727 return rc; 1728 } 1729 1730 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1731 if (cap_unix(tcon->ses) && 1732 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 1733 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 1734 rc = cifs_push_posix_locks(cfile); 1735 else 1736 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1737 rc = tcon->ses->server->ops->push_mand_locks(cfile); 1738 1739 cinode->can_cache_brlcks = false; 1740 up_write(&cinode->lock_sem); 1741 return rc; 1742 } 1743 1744 static void 1745 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock, 1746 bool *wait_flag, struct TCP_Server_Info *server) 1747 { 1748 if (flock->fl_flags & FL_POSIX) 1749 cifs_dbg(FYI, "Posix\n"); 1750 if (flock->fl_flags & FL_FLOCK) 1751 cifs_dbg(FYI, "Flock\n"); 1752 if (flock->fl_flags & FL_SLEEP) { 1753 cifs_dbg(FYI, "Blocking lock\n"); 1754 *wait_flag = true; 1755 } 1756 if (flock->fl_flags & FL_ACCESS) 1757 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n"); 1758 if (flock->fl_flags & FL_LEASE) 1759 cifs_dbg(FYI, "Lease on file - not implemented yet\n"); 1760 if (flock->fl_flags & 1761 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | 1762 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK))) 1763 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags); 1764 1765 *type = server->vals->large_lock_type; 1766 if (flock->fl_type == F_WRLCK) { 1767 cifs_dbg(FYI, "F_WRLCK\n"); 1768 *type |= server->vals->exclusive_lock_type; 1769 *lock = 1; 1770 } else if (flock->fl_type == F_UNLCK) { 1771 cifs_dbg(FYI, "F_UNLCK\n"); 1772 *type |= server->vals->unlock_lock_type; 1773 *unlock = 1; 1774 /* Check if unlock includes more than one lock range */ 1775 } else if (flock->fl_type == F_RDLCK) { 1776 cifs_dbg(FYI, "F_RDLCK\n"); 1777 *type |= server->vals->shared_lock_type; 1778 *lock = 1; 1779 } else if (flock->fl_type == F_EXLCK) { 1780 cifs_dbg(FYI, "F_EXLCK\n"); 1781 *type |= server->vals->exclusive_lock_type; 1782 *lock = 1; 1783 } else if (flock->fl_type == F_SHLCK) { 1784 cifs_dbg(FYI, "F_SHLCK\n"); 1785 *type |= server->vals->shared_lock_type; 1786 *lock = 1; 1787 } else 1788 cifs_dbg(FYI, "Unknown type of lock\n"); 1789 } 1790 1791 static int 1792 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, 1793 bool wait_flag, bool posix_lck, unsigned int xid) 1794 { 1795 int rc = 0; 1796 __u64 length = cifs_flock_len(flock); 1797 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 1798 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1799 struct TCP_Server_Info *server = tcon->ses->server; 1800 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1801 __u16 netfid = cfile->fid.netfid; 1802 1803 if (posix_lck) { 1804 int posix_lock_type; 1805 1806 rc = cifs_posix_lock_test(file, flock); 1807 if (!rc) 1808 return rc; 1809 1810 if (type & server->vals->shared_lock_type) 1811 posix_lock_type = CIFS_RDLCK; 1812 else 1813 posix_lock_type = CIFS_WRLCK; 1814 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1815 hash_lockowner(flock->fl_owner), 1816 flock->fl_start, length, flock, 1817 posix_lock_type, wait_flag); 1818 return rc; 1819 } 1820 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1821 1822 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock); 1823 if (!rc) 1824 return rc; 1825 1826 /* BB we could chain these into one lock request BB */ 1827 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type, 1828 1, 0, false); 1829 if (rc == 0) { 1830 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1831 type, 0, 1, false); 1832 flock->fl_type = F_UNLCK; 1833 if (rc != 0) 1834 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", 1835 rc); 1836 return 0; 1837 } 1838 1839 if (type & server->vals->shared_lock_type) { 1840 flock->fl_type = F_WRLCK; 1841 return 0; 1842 } 1843 1844 type &= ~server->vals->exclusive_lock_type; 1845 1846 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1847 type | server->vals->shared_lock_type, 1848 1, 0, false); 1849 if (rc == 0) { 1850 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1851 type | server->vals->shared_lock_type, 0, 1, false); 1852 flock->fl_type = F_RDLCK; 1853 if (rc != 0) 1854 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", 1855 rc); 1856 } else 1857 flock->fl_type = F_WRLCK; 1858 1859 return 0; 1860 } 1861 1862 void 1863 cifs_move_llist(struct list_head *source, struct list_head *dest) 1864 { 1865 struct list_head *li, *tmp; 1866 list_for_each_safe(li, tmp, source) 1867 list_move(li, dest); 1868 } 1869 1870 void 1871 cifs_free_llist(struct list_head *llist) 1872 { 1873 struct cifsLockInfo *li, *tmp; 1874 list_for_each_entry_safe(li, tmp, llist, llist) { 1875 cifs_del_lock_waiters(li); 1876 list_del(&li->llist); 1877 kfree(li); 1878 } 1879 } 1880 1881 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 1882 int 1883 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, 1884 unsigned int xid) 1885 { 1886 int rc = 0, stored_rc; 1887 static const int types[] = { 1888 LOCKING_ANDX_LARGE_FILES, 1889 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES 1890 }; 1891 unsigned int i; 1892 unsigned int max_num, num, max_buf; 1893 LOCKING_ANDX_RANGE *buf, *cur; 1894 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1895 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); 1896 struct cifsLockInfo *li, *tmp; 1897 __u64 length = cifs_flock_len(flock); 1898 struct list_head tmp_llist; 1899 1900 INIT_LIST_HEAD(&tmp_llist); 1901 1902 /* 1903 * Accessing maxBuf is racy with cifs_reconnect - need to store value 1904 * and check it before using. 1905 */ 1906 max_buf = tcon->ses->server->maxBuf; 1907 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) 1908 return -EINVAL; 1909 1910 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > 1911 PAGE_SIZE); 1912 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), 1913 PAGE_SIZE); 1914 max_num = (max_buf - sizeof(struct smb_hdr)) / 1915 sizeof(LOCKING_ANDX_RANGE); 1916 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 1917 if (!buf) 1918 return -ENOMEM; 1919 1920 cifs_down_write(&cinode->lock_sem); 1921 for (i = 0; i < 2; i++) { 1922 cur = buf; 1923 num = 0; 1924 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { 1925 if (flock->fl_start > li->offset || 1926 (flock->fl_start + length) < 1927 (li->offset + li->length)) 1928 continue; 1929 if (current->tgid != li->pid) 1930 continue; 1931 if (types[i] != li->type) 1932 continue; 1933 if (cinode->can_cache_brlcks) { 1934 /* 1935 * We can cache brlock requests - simply remove 1936 * a lock from the file's list. 1937 */ 1938 list_del(&li->llist); 1939 cifs_del_lock_waiters(li); 1940 kfree(li); 1941 continue; 1942 } 1943 cur->Pid = cpu_to_le16(li->pid); 1944 cur->LengthLow = cpu_to_le32((u32)li->length); 1945 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); 1946 cur->OffsetLow = cpu_to_le32((u32)li->offset); 1947 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); 1948 /* 1949 * We need to save a lock here to let us add it again to 1950 * the file's list if the unlock range request fails on 1951 * the server. 1952 */ 1953 list_move(&li->llist, &tmp_llist); 1954 if (++num == max_num) { 1955 stored_rc = cifs_lockv(xid, tcon, 1956 cfile->fid.netfid, 1957 li->type, num, 0, buf); 1958 if (stored_rc) { 1959 /* 1960 * We failed on the unlock range 1961 * request - add all locks from the tmp 1962 * list to the head of the file's list. 1963 */ 1964 cifs_move_llist(&tmp_llist, 1965 &cfile->llist->locks); 1966 rc = stored_rc; 1967 } else 1968 /* 1969 * The unlock range request succeed - 1970 * free the tmp list. 1971 */ 1972 cifs_free_llist(&tmp_llist); 1973 cur = buf; 1974 num = 0; 1975 } else 1976 cur++; 1977 } 1978 if (num) { 1979 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, 1980 types[i], num, 0, buf); 1981 if (stored_rc) { 1982 cifs_move_llist(&tmp_llist, 1983 &cfile->llist->locks); 1984 rc = stored_rc; 1985 } else 1986 cifs_free_llist(&tmp_llist); 1987 } 1988 } 1989 1990 up_write(&cinode->lock_sem); 1991 kfree(buf); 1992 return rc; 1993 } 1994 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 1995 1996 static int 1997 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, 1998 bool wait_flag, bool posix_lck, int lock, int unlock, 1999 unsigned int xid) 2000 { 2001 int rc = 0; 2002 __u64 length = cifs_flock_len(flock); 2003 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 2004 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 2005 struct TCP_Server_Info *server = tcon->ses->server; 2006 struct inode *inode = d_inode(cfile->dentry); 2007 2008 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY 2009 if (posix_lck) { 2010 int posix_lock_type; 2011 2012 rc = cifs_posix_lock_set(file, flock); 2013 if (rc <= FILE_LOCK_DEFERRED) 2014 return rc; 2015 2016 if (type & server->vals->shared_lock_type) 2017 posix_lock_type = CIFS_RDLCK; 2018 else 2019 posix_lock_type = CIFS_WRLCK; 2020 2021 if (unlock == 1) 2022 posix_lock_type = CIFS_UNLCK; 2023 2024 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid, 2025 hash_lockowner(flock->fl_owner), 2026 flock->fl_start, length, 2027 NULL, posix_lock_type, wait_flag); 2028 goto out; 2029 } 2030 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ 2031 if (lock) { 2032 struct cifsLockInfo *lock; 2033 2034 lock = cifs_lock_init(flock->fl_start, length, type, 2035 flock->fl_flags); 2036 if (!lock) 2037 return -ENOMEM; 2038 2039 rc = cifs_lock_add_if(cfile, lock, wait_flag); 2040 if (rc < 0) { 2041 kfree(lock); 2042 return rc; 2043 } 2044 if (!rc) 2045 goto out; 2046 2047 /* 2048 * Windows 7 server can delay breaking lease from read to None 2049 * if we set a byte-range lock on a file - break it explicitly 2050 * before sending the lock to the server to be sure the next 2051 * read won't conflict with non-overlapted locks due to 2052 * pagereading. 2053 */ 2054 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) && 2055 CIFS_CACHE_READ(CIFS_I(inode))) { 2056 cifs_zap_mapping(inode); 2057 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n", 2058 inode); 2059 CIFS_I(inode)->oplock = 0; 2060 } 2061 2062 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 2063 type, 1, 0, wait_flag); 2064 if (rc) { 2065 kfree(lock); 2066 return rc; 2067 } 2068 2069 cifs_lock_add(cfile, lock); 2070 } else if (unlock) 2071 rc = server->ops->mand_unlock_range(cfile, flock, xid); 2072 2073 out: 2074 if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) { 2075 /* 2076 * If this is a request to remove all locks because we 2077 * are closing the file, it doesn't matter if the 2078 * unlocking failed as both cifs.ko and the SMB server 2079 * remove the lock on file close 2080 */ 2081 if (rc) { 2082 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc); 2083 if (!(flock->fl_flags & FL_CLOSE)) 2084 return rc; 2085 } 2086 rc = locks_lock_file_wait(file, flock); 2087 } 2088 return rc; 2089 } 2090 2091 int cifs_flock(struct file *file, int cmd, struct file_lock *fl) 2092 { 2093 int rc, xid; 2094 int lock = 0, unlock = 0; 2095 bool wait_flag = false; 2096 bool posix_lck = false; 2097 struct cifs_sb_info *cifs_sb; 2098 struct cifs_tcon *tcon; 2099 struct cifsFileInfo *cfile; 2100 __u32 type; 2101 2102 xid = get_xid(); 2103 2104 if (!(fl->fl_flags & FL_FLOCK)) { 2105 rc = -ENOLCK; 2106 free_xid(xid); 2107 return rc; 2108 } 2109 2110 cfile = (struct cifsFileInfo *)file->private_data; 2111 tcon = tlink_tcon(cfile->tlink); 2112 2113 cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag, 2114 tcon->ses->server); 2115 cifs_sb = CIFS_FILE_SB(file); 2116 2117 if (cap_unix(tcon->ses) && 2118 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 2119 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2120 posix_lck = true; 2121 2122 if (!lock && !unlock) { 2123 /* 2124 * if no lock or unlock then nothing to do since we do not 2125 * know what it is 2126 */ 2127 rc = -EOPNOTSUPP; 2128 free_xid(xid); 2129 return rc; 2130 } 2131 2132 rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock, 2133 xid); 2134 free_xid(xid); 2135 return rc; 2136 2137 2138 } 2139 2140 int cifs_lock(struct file *file, int cmd, struct file_lock *flock) 2141 { 2142 int rc, xid; 2143 int lock = 0, unlock = 0; 2144 bool wait_flag = false; 2145 bool posix_lck = false; 2146 struct cifs_sb_info *cifs_sb; 2147 struct cifs_tcon *tcon; 2148 struct cifsFileInfo *cfile; 2149 __u32 type; 2150 2151 rc = -EACCES; 2152 xid = get_xid(); 2153 2154 cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd, 2155 flock->fl_flags, flock->fl_type, (long long)flock->fl_start, 2156 (long long)flock->fl_end); 2157 2158 cfile = (struct cifsFileInfo *)file->private_data; 2159 tcon = tlink_tcon(cfile->tlink); 2160 2161 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag, 2162 tcon->ses->server); 2163 cifs_sb = CIFS_FILE_SB(file); 2164 set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags); 2165 2166 if (cap_unix(tcon->ses) && 2167 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 2168 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2169 posix_lck = true; 2170 /* 2171 * BB add code here to normalize offset and length to account for 2172 * negative length which we can not accept over the wire. 2173 */ 2174 if (IS_GETLK(cmd)) { 2175 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid); 2176 free_xid(xid); 2177 return rc; 2178 } 2179 2180 if (!lock && !unlock) { 2181 /* 2182 * if no lock or unlock then nothing to do since we do not 2183 * know what it is 2184 */ 2185 free_xid(xid); 2186 return -EOPNOTSUPP; 2187 } 2188 2189 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock, 2190 xid); 2191 free_xid(xid); 2192 return rc; 2193 } 2194 2195 /* 2196 * update the file size (if needed) after a write. Should be called with 2197 * the inode->i_lock held 2198 */ 2199 void 2200 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, 2201 unsigned int bytes_written) 2202 { 2203 loff_t end_of_write = offset + bytes_written; 2204 2205 if (end_of_write > cifsi->server_eof) 2206 cifsi->server_eof = end_of_write; 2207 } 2208 2209 static ssize_t 2210 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data, 2211 size_t write_size, loff_t *offset) 2212 { 2213 int rc = 0; 2214 unsigned int bytes_written = 0; 2215 unsigned int total_written; 2216 struct cifs_tcon *tcon; 2217 struct TCP_Server_Info *server; 2218 unsigned int xid; 2219 struct dentry *dentry = open_file->dentry; 2220 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry)); 2221 struct cifs_io_parms io_parms = {0}; 2222 2223 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n", 2224 write_size, *offset, dentry); 2225 2226 tcon = tlink_tcon(open_file->tlink); 2227 server = tcon->ses->server; 2228 2229 if (!server->ops->sync_write) 2230 return -ENOSYS; 2231 2232 xid = get_xid(); 2233 2234 for (total_written = 0; write_size > total_written; 2235 total_written += bytes_written) { 2236 rc = -EAGAIN; 2237 while (rc == -EAGAIN) { 2238 struct kvec iov[2]; 2239 unsigned int len; 2240 2241 if (open_file->invalidHandle) { 2242 /* we could deadlock if we called 2243 filemap_fdatawait from here so tell 2244 reopen_file not to flush data to 2245 server now */ 2246 rc = cifs_reopen_file(open_file, false); 2247 if (rc != 0) 2248 break; 2249 } 2250 2251 len = min(server->ops->wp_retry_size(d_inode(dentry)), 2252 (unsigned int)write_size - total_written); 2253 /* iov[0] is reserved for smb header */ 2254 iov[1].iov_base = (char *)write_data + total_written; 2255 iov[1].iov_len = len; 2256 io_parms.pid = pid; 2257 io_parms.tcon = tcon; 2258 io_parms.offset = *offset; 2259 io_parms.length = len; 2260 rc = server->ops->sync_write(xid, &open_file->fid, 2261 &io_parms, &bytes_written, iov, 1); 2262 } 2263 if (rc || (bytes_written == 0)) { 2264 if (total_written) 2265 break; 2266 else { 2267 free_xid(xid); 2268 return rc; 2269 } 2270 } else { 2271 spin_lock(&d_inode(dentry)->i_lock); 2272 cifs_update_eof(cifsi, *offset, bytes_written); 2273 spin_unlock(&d_inode(dentry)->i_lock); 2274 *offset += bytes_written; 2275 } 2276 } 2277 2278 cifs_stats_bytes_written(tcon, total_written); 2279 2280 if (total_written > 0) { 2281 spin_lock(&d_inode(dentry)->i_lock); 2282 if (*offset > d_inode(dentry)->i_size) { 2283 i_size_write(d_inode(dentry), *offset); 2284 d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9; 2285 } 2286 spin_unlock(&d_inode(dentry)->i_lock); 2287 } 2288 mark_inode_dirty_sync(d_inode(dentry)); 2289 free_xid(xid); 2290 return total_written; 2291 } 2292 2293 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, 2294 bool fsuid_only) 2295 { 2296 struct cifsFileInfo *open_file = NULL; 2297 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); 2298 2299 /* only filter by fsuid on multiuser mounts */ 2300 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 2301 fsuid_only = false; 2302 2303 spin_lock(&cifs_inode->open_file_lock); 2304 /* we could simply get the first_list_entry since write-only entries 2305 are always at the end of the list but since the first entry might 2306 have a close pending, we go through the whole list */ 2307 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 2308 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) 2309 continue; 2310 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) { 2311 if ((!open_file->invalidHandle)) { 2312 /* found a good file */ 2313 /* lock it so it will not be closed on us */ 2314 cifsFileInfo_get(open_file); 2315 spin_unlock(&cifs_inode->open_file_lock); 2316 return open_file; 2317 } /* else might as well continue, and look for 2318 another, or simply have the caller reopen it 2319 again rather than trying to fix this handle */ 2320 } else /* write only file */ 2321 break; /* write only files are last so must be done */ 2322 } 2323 spin_unlock(&cifs_inode->open_file_lock); 2324 return NULL; 2325 } 2326 2327 /* Return -EBADF if no handle is found and general rc otherwise */ 2328 int 2329 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, 2330 struct cifsFileInfo **ret_file) 2331 { 2332 struct cifsFileInfo *open_file, *inv_file = NULL; 2333 struct cifs_sb_info *cifs_sb; 2334 bool any_available = false; 2335 int rc = -EBADF; 2336 unsigned int refind = 0; 2337 bool fsuid_only = flags & FIND_WR_FSUID_ONLY; 2338 bool with_delete = flags & FIND_WR_WITH_DELETE; 2339 *ret_file = NULL; 2340 2341 /* 2342 * Having a null inode here (because mapping->host was set to zero by 2343 * the VFS or MM) should not happen but we had reports of on oops (due 2344 * to it being zero) during stress testcases so we need to check for it 2345 */ 2346 2347 if (cifs_inode == NULL) { 2348 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n"); 2349 dump_stack(); 2350 return rc; 2351 } 2352 2353 cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); 2354 2355 /* only filter by fsuid on multiuser mounts */ 2356 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 2357 fsuid_only = false; 2358 2359 spin_lock(&cifs_inode->open_file_lock); 2360 refind_writable: 2361 if (refind > MAX_REOPEN_ATT) { 2362 spin_unlock(&cifs_inode->open_file_lock); 2363 return rc; 2364 } 2365 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 2366 if (!any_available && open_file->pid != current->tgid) 2367 continue; 2368 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) 2369 continue; 2370 if (with_delete && !(open_file->fid.access & DELETE)) 2371 continue; 2372 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { 2373 if (!open_file->invalidHandle) { 2374 /* found a good writable file */ 2375 cifsFileInfo_get(open_file); 2376 spin_unlock(&cifs_inode->open_file_lock); 2377 *ret_file = open_file; 2378 return 0; 2379 } else { 2380 if (!inv_file) 2381 inv_file = open_file; 2382 } 2383 } 2384 } 2385 /* couldn't find useable FH with same pid, try any available */ 2386 if (!any_available) { 2387 any_available = true; 2388 goto refind_writable; 2389 } 2390 2391 if (inv_file) { 2392 any_available = false; 2393 cifsFileInfo_get(inv_file); 2394 } 2395 2396 spin_unlock(&cifs_inode->open_file_lock); 2397 2398 if (inv_file) { 2399 rc = cifs_reopen_file(inv_file, false); 2400 if (!rc) { 2401 *ret_file = inv_file; 2402 return 0; 2403 } 2404 2405 spin_lock(&cifs_inode->open_file_lock); 2406 list_move_tail(&inv_file->flist, &cifs_inode->openFileList); 2407 spin_unlock(&cifs_inode->open_file_lock); 2408 cifsFileInfo_put(inv_file); 2409 ++refind; 2410 inv_file = NULL; 2411 spin_lock(&cifs_inode->open_file_lock); 2412 goto refind_writable; 2413 } 2414 2415 return rc; 2416 } 2417 2418 struct cifsFileInfo * 2419 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags) 2420 { 2421 struct cifsFileInfo *cfile; 2422 int rc; 2423 2424 rc = cifs_get_writable_file(cifs_inode, flags, &cfile); 2425 if (rc) 2426 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc); 2427 2428 return cfile; 2429 } 2430 2431 int 2432 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, 2433 int flags, 2434 struct cifsFileInfo **ret_file) 2435 { 2436 struct cifsFileInfo *cfile; 2437 void *page = alloc_dentry_path(); 2438 2439 *ret_file = NULL; 2440 2441 spin_lock(&tcon->open_file_lock); 2442 list_for_each_entry(cfile, &tcon->openFileList, tlist) { 2443 struct cifsInodeInfo *cinode; 2444 const char *full_path = build_path_from_dentry(cfile->dentry, page); 2445 if (IS_ERR(full_path)) { 2446 spin_unlock(&tcon->open_file_lock); 2447 free_dentry_path(page); 2448 return PTR_ERR(full_path); 2449 } 2450 if (strcmp(full_path, name)) 2451 continue; 2452 2453 cinode = CIFS_I(d_inode(cfile->dentry)); 2454 spin_unlock(&tcon->open_file_lock); 2455 free_dentry_path(page); 2456 return cifs_get_writable_file(cinode, flags, ret_file); 2457 } 2458 2459 spin_unlock(&tcon->open_file_lock); 2460 free_dentry_path(page); 2461 return -ENOENT; 2462 } 2463 2464 int 2465 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, 2466 struct cifsFileInfo **ret_file) 2467 { 2468 struct cifsFileInfo *cfile; 2469 void *page = alloc_dentry_path(); 2470 2471 *ret_file = NULL; 2472 2473 spin_lock(&tcon->open_file_lock); 2474 list_for_each_entry(cfile, &tcon->openFileList, tlist) { 2475 struct cifsInodeInfo *cinode; 2476 const char *full_path = build_path_from_dentry(cfile->dentry, page); 2477 if (IS_ERR(full_path)) { 2478 spin_unlock(&tcon->open_file_lock); 2479 free_dentry_path(page); 2480 return PTR_ERR(full_path); 2481 } 2482 if (strcmp(full_path, name)) 2483 continue; 2484 2485 cinode = CIFS_I(d_inode(cfile->dentry)); 2486 spin_unlock(&tcon->open_file_lock); 2487 free_dentry_path(page); 2488 *ret_file = find_readable_file(cinode, 0); 2489 return *ret_file ? 0 : -ENOENT; 2490 } 2491 2492 spin_unlock(&tcon->open_file_lock); 2493 free_dentry_path(page); 2494 return -ENOENT; 2495 } 2496 2497 void 2498 cifs_writedata_release(struct kref *refcount) 2499 { 2500 struct cifs_writedata *wdata = container_of(refcount, 2501 struct cifs_writedata, refcount); 2502 #ifdef CONFIG_CIFS_SMB_DIRECT 2503 if (wdata->mr) { 2504 smbd_deregister_mr(wdata->mr); 2505 wdata->mr = NULL; 2506 } 2507 #endif 2508 2509 if (wdata->cfile) 2510 cifsFileInfo_put(wdata->cfile); 2511 2512 kfree(wdata); 2513 } 2514 2515 /* 2516 * Write failed with a retryable error. Resend the write request. It's also 2517 * possible that the page was redirtied so re-clean the page. 2518 */ 2519 static void 2520 cifs_writev_requeue(struct cifs_writedata *wdata) 2521 { 2522 int rc = 0; 2523 struct inode *inode = d_inode(wdata->cfile->dentry); 2524 struct TCP_Server_Info *server; 2525 unsigned int rest_len = wdata->bytes; 2526 loff_t fpos = wdata->offset; 2527 2528 server = tlink_tcon(wdata->cfile->tlink)->ses->server; 2529 do { 2530 struct cifs_writedata *wdata2; 2531 unsigned int wsize, cur_len; 2532 2533 wsize = server->ops->wp_retry_size(inode); 2534 if (wsize < rest_len) { 2535 if (wsize < PAGE_SIZE) { 2536 rc = -EOPNOTSUPP; 2537 break; 2538 } 2539 cur_len = min(round_down(wsize, PAGE_SIZE), rest_len); 2540 } else { 2541 cur_len = rest_len; 2542 } 2543 2544 wdata2 = cifs_writedata_alloc(cifs_writev_complete); 2545 if (!wdata2) { 2546 rc = -ENOMEM; 2547 break; 2548 } 2549 2550 wdata2->sync_mode = wdata->sync_mode; 2551 wdata2->offset = fpos; 2552 wdata2->bytes = cur_len; 2553 wdata2->iter = wdata->iter; 2554 2555 iov_iter_advance(&wdata2->iter, fpos - wdata->offset); 2556 iov_iter_truncate(&wdata2->iter, wdata2->bytes); 2557 2558 if (iov_iter_is_xarray(&wdata2->iter)) 2559 /* Check for pages having been redirtied and clean 2560 * them. We can do this by walking the xarray. If 2561 * it's not an xarray, then it's a DIO and we shouldn't 2562 * be mucking around with the page bits. 2563 */ 2564 cifs_undirty_folios(inode, fpos, cur_len); 2565 2566 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, 2567 &wdata2->cfile); 2568 if (!wdata2->cfile) { 2569 cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n", 2570 rc); 2571 if (!is_retryable_error(rc)) 2572 rc = -EBADF; 2573 } else { 2574 wdata2->pid = wdata2->cfile->pid; 2575 rc = server->ops->async_writev(wdata2, 2576 cifs_writedata_release); 2577 } 2578 2579 kref_put(&wdata2->refcount, cifs_writedata_release); 2580 if (rc) { 2581 if (is_retryable_error(rc)) 2582 continue; 2583 fpos += cur_len; 2584 rest_len -= cur_len; 2585 break; 2586 } 2587 2588 fpos += cur_len; 2589 rest_len -= cur_len; 2590 } while (rest_len > 0); 2591 2592 /* Clean up remaining pages from the original wdata */ 2593 if (iov_iter_is_xarray(&wdata->iter)) 2594 cifs_pages_write_failed(inode, fpos, rest_len); 2595 2596 if (rc != 0 && !is_retryable_error(rc)) 2597 mapping_set_error(inode->i_mapping, rc); 2598 kref_put(&wdata->refcount, cifs_writedata_release); 2599 } 2600 2601 void 2602 cifs_writev_complete(struct work_struct *work) 2603 { 2604 struct cifs_writedata *wdata = container_of(work, 2605 struct cifs_writedata, work); 2606 struct inode *inode = d_inode(wdata->cfile->dentry); 2607 2608 if (wdata->result == 0) { 2609 spin_lock(&inode->i_lock); 2610 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes); 2611 spin_unlock(&inode->i_lock); 2612 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink), 2613 wdata->bytes); 2614 } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN) 2615 return cifs_writev_requeue(wdata); 2616 2617 if (wdata->result == -EAGAIN) 2618 cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes); 2619 else if (wdata->result < 0) 2620 cifs_pages_write_failed(inode, wdata->offset, wdata->bytes); 2621 else 2622 cifs_pages_written_back(inode, wdata->offset, wdata->bytes); 2623 2624 if (wdata->result != -EAGAIN) 2625 mapping_set_error(inode->i_mapping, wdata->result); 2626 kref_put(&wdata->refcount, cifs_writedata_release); 2627 } 2628 2629 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete) 2630 { 2631 struct cifs_writedata *wdata; 2632 2633 wdata = kzalloc(sizeof(*wdata), GFP_NOFS); 2634 if (wdata != NULL) { 2635 kref_init(&wdata->refcount); 2636 INIT_LIST_HEAD(&wdata->list); 2637 init_completion(&wdata->done); 2638 INIT_WORK(&wdata->work, complete); 2639 } 2640 return wdata; 2641 } 2642 2643 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) 2644 { 2645 struct address_space *mapping = page->mapping; 2646 loff_t offset = (loff_t)page->index << PAGE_SHIFT; 2647 char *write_data; 2648 int rc = -EFAULT; 2649 int bytes_written = 0; 2650 struct inode *inode; 2651 struct cifsFileInfo *open_file; 2652 2653 if (!mapping || !mapping->host) 2654 return -EFAULT; 2655 2656 inode = page->mapping->host; 2657 2658 offset += (loff_t)from; 2659 write_data = kmap(page); 2660 write_data += from; 2661 2662 if ((to > PAGE_SIZE) || (from > to)) { 2663 kunmap(page); 2664 return -EIO; 2665 } 2666 2667 /* racing with truncate? */ 2668 if (offset > mapping->host->i_size) { 2669 kunmap(page); 2670 return 0; /* don't care */ 2671 } 2672 2673 /* check to make sure that we are not extending the file */ 2674 if (mapping->host->i_size - offset < (loff_t)to) 2675 to = (unsigned)(mapping->host->i_size - offset); 2676 2677 rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY, 2678 &open_file); 2679 if (!rc) { 2680 bytes_written = cifs_write(open_file, open_file->pid, 2681 write_data, to - from, &offset); 2682 cifsFileInfo_put(open_file); 2683 /* Does mm or vfs already set times? */ 2684 simple_inode_init_ts(inode); 2685 if ((bytes_written > 0) && (offset)) 2686 rc = 0; 2687 else if (bytes_written < 0) 2688 rc = bytes_written; 2689 else 2690 rc = -EFAULT; 2691 } else { 2692 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc); 2693 if (!is_retryable_error(rc)) 2694 rc = -EIO; 2695 } 2696 2697 kunmap(page); 2698 return rc; 2699 } 2700 2701 /* 2702 * Extend the region to be written back to include subsequent contiguously 2703 * dirty pages if possible, but don't sleep while doing so. 2704 */ 2705 static void cifs_extend_writeback(struct address_space *mapping, 2706 struct xa_state *xas, 2707 long *_count, 2708 loff_t start, 2709 int max_pages, 2710 loff_t max_len, 2711 size_t *_len) 2712 { 2713 struct folio_batch batch; 2714 struct folio *folio; 2715 unsigned int nr_pages; 2716 pgoff_t index = (start + *_len) / PAGE_SIZE; 2717 size_t len; 2718 bool stop = true; 2719 unsigned int i; 2720 2721 folio_batch_init(&batch); 2722 2723 do { 2724 /* Firstly, we gather up a batch of contiguous dirty pages 2725 * under the RCU read lock - but we can't clear the dirty flags 2726 * there if any of those pages are mapped. 2727 */ 2728 rcu_read_lock(); 2729 2730 xas_for_each(xas, folio, ULONG_MAX) { 2731 stop = true; 2732 if (xas_retry(xas, folio)) 2733 continue; 2734 if (xa_is_value(folio)) 2735 break; 2736 if (folio->index != index) { 2737 xas_reset(xas); 2738 break; 2739 } 2740 2741 if (!folio_try_get_rcu(folio)) { 2742 xas_reset(xas); 2743 continue; 2744 } 2745 nr_pages = folio_nr_pages(folio); 2746 if (nr_pages > max_pages) { 2747 xas_reset(xas); 2748 break; 2749 } 2750 2751 /* Has the page moved or been split? */ 2752 if (unlikely(folio != xas_reload(xas))) { 2753 folio_put(folio); 2754 xas_reset(xas); 2755 break; 2756 } 2757 2758 if (!folio_trylock(folio)) { 2759 folio_put(folio); 2760 xas_reset(xas); 2761 break; 2762 } 2763 if (!folio_test_dirty(folio) || 2764 folio_test_writeback(folio)) { 2765 folio_unlock(folio); 2766 folio_put(folio); 2767 xas_reset(xas); 2768 break; 2769 } 2770 2771 max_pages -= nr_pages; 2772 len = folio_size(folio); 2773 stop = false; 2774 2775 index += nr_pages; 2776 *_count -= nr_pages; 2777 *_len += len; 2778 if (max_pages <= 0 || *_len >= max_len || *_count <= 0) 2779 stop = true; 2780 2781 if (!folio_batch_add(&batch, folio)) 2782 break; 2783 if (stop) 2784 break; 2785 } 2786 2787 xas_pause(xas); 2788 rcu_read_unlock(); 2789 2790 /* Now, if we obtained any pages, we can shift them to being 2791 * writable and mark them for caching. 2792 */ 2793 if (!folio_batch_count(&batch)) 2794 break; 2795 2796 for (i = 0; i < folio_batch_count(&batch); i++) { 2797 folio = batch.folios[i]; 2798 /* The folio should be locked, dirty and not undergoing 2799 * writeback from the loop above. 2800 */ 2801 if (!folio_clear_dirty_for_io(folio)) 2802 WARN_ON(1); 2803 folio_start_writeback(folio); 2804 folio_unlock(folio); 2805 } 2806 2807 folio_batch_release(&batch); 2808 cond_resched(); 2809 } while (!stop); 2810 } 2811 2812 /* 2813 * Write back the locked page and any subsequent non-locked dirty pages. 2814 */ 2815 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping, 2816 struct writeback_control *wbc, 2817 struct xa_state *xas, 2818 struct folio *folio, 2819 unsigned long long start, 2820 unsigned long long end) 2821 { 2822 struct inode *inode = mapping->host; 2823 struct TCP_Server_Info *server; 2824 struct cifs_writedata *wdata; 2825 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 2826 struct cifs_credits credits_on_stack; 2827 struct cifs_credits *credits = &credits_on_stack; 2828 struct cifsFileInfo *cfile = NULL; 2829 unsigned long long i_size = i_size_read(inode), max_len; 2830 unsigned int xid, wsize; 2831 size_t len = folio_size(folio); 2832 long count = wbc->nr_to_write; 2833 int rc; 2834 2835 /* The folio should be locked, dirty and not undergoing writeback. */ 2836 if (!folio_clear_dirty_for_io(folio)) 2837 WARN_ON_ONCE(1); 2838 folio_start_writeback(folio); 2839 2840 count -= folio_nr_pages(folio); 2841 2842 xid = get_xid(); 2843 server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses); 2844 2845 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile); 2846 if (rc) { 2847 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc); 2848 goto err_xid; 2849 } 2850 2851 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, 2852 &wsize, credits); 2853 if (rc != 0) 2854 goto err_close; 2855 2856 wdata = cifs_writedata_alloc(cifs_writev_complete); 2857 if (!wdata) { 2858 rc = -ENOMEM; 2859 goto err_uncredit; 2860 } 2861 2862 wdata->sync_mode = wbc->sync_mode; 2863 wdata->offset = folio_pos(folio); 2864 wdata->pid = cfile->pid; 2865 wdata->credits = credits_on_stack; 2866 wdata->cfile = cfile; 2867 wdata->server = server; 2868 cfile = NULL; 2869 2870 /* Find all consecutive lockable dirty pages that have contiguous 2871 * written regions, stopping when we find a page that is not 2872 * immediately lockable, is not dirty or is missing, or we reach the 2873 * end of the range. 2874 */ 2875 if (start < i_size) { 2876 /* Trim the write to the EOF; the extra data is ignored. Also 2877 * put an upper limit on the size of a single storedata op. 2878 */ 2879 max_len = wsize; 2880 max_len = min_t(unsigned long long, max_len, end - start + 1); 2881 max_len = min_t(unsigned long long, max_len, i_size - start); 2882 2883 if (len < max_len) { 2884 int max_pages = INT_MAX; 2885 2886 #ifdef CONFIG_CIFS_SMB_DIRECT 2887 if (server->smbd_conn) 2888 max_pages = server->smbd_conn->max_frmr_depth; 2889 #endif 2890 max_pages -= folio_nr_pages(folio); 2891 2892 if (max_pages > 0) 2893 cifs_extend_writeback(mapping, xas, &count, start, 2894 max_pages, max_len, &len); 2895 } 2896 } 2897 len = min_t(unsigned long long, len, i_size - start); 2898 2899 /* We now have a contiguous set of dirty pages, each with writeback 2900 * set; the first page is still locked at this point, but all the rest 2901 * have been unlocked. 2902 */ 2903 folio_unlock(folio); 2904 wdata->bytes = len; 2905 2906 if (start < i_size) { 2907 iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages, 2908 start, len); 2909 2910 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes); 2911 if (rc) 2912 goto err_wdata; 2913 2914 if (wdata->cfile->invalidHandle) 2915 rc = -EAGAIN; 2916 else 2917 rc = wdata->server->ops->async_writev(wdata, 2918 cifs_writedata_release); 2919 if (rc >= 0) { 2920 kref_put(&wdata->refcount, cifs_writedata_release); 2921 goto err_close; 2922 } 2923 } else { 2924 /* The dirty region was entirely beyond the EOF. */ 2925 cifs_pages_written_back(inode, start, len); 2926 rc = 0; 2927 } 2928 2929 err_wdata: 2930 kref_put(&wdata->refcount, cifs_writedata_release); 2931 err_uncredit: 2932 add_credits_and_wake_if(server, credits, 0); 2933 err_close: 2934 if (cfile) 2935 cifsFileInfo_put(cfile); 2936 err_xid: 2937 free_xid(xid); 2938 if (rc == 0) { 2939 wbc->nr_to_write = count; 2940 rc = len; 2941 } else if (is_retryable_error(rc)) { 2942 cifs_pages_write_redirty(inode, start, len); 2943 } else { 2944 cifs_pages_write_failed(inode, start, len); 2945 mapping_set_error(mapping, rc); 2946 } 2947 /* Indication to update ctime and mtime as close is deferred */ 2948 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 2949 return rc; 2950 } 2951 2952 /* 2953 * write a region of pages back to the server 2954 */ 2955 static ssize_t cifs_writepages_begin(struct address_space *mapping, 2956 struct writeback_control *wbc, 2957 struct xa_state *xas, 2958 unsigned long long *_start, 2959 unsigned long long end) 2960 { 2961 struct folio *folio; 2962 unsigned long long start = *_start; 2963 ssize_t ret; 2964 int skips = 0; 2965 2966 search_again: 2967 /* Find the first dirty page. */ 2968 rcu_read_lock(); 2969 2970 for (;;) { 2971 folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY); 2972 if (xas_retry(xas, folio) || xa_is_value(folio)) 2973 continue; 2974 if (!folio) 2975 break; 2976 2977 if (!folio_try_get_rcu(folio)) { 2978 xas_reset(xas); 2979 continue; 2980 } 2981 2982 if (unlikely(folio != xas_reload(xas))) { 2983 folio_put(folio); 2984 xas_reset(xas); 2985 continue; 2986 } 2987 2988 xas_pause(xas); 2989 break; 2990 } 2991 rcu_read_unlock(); 2992 if (!folio) 2993 return 0; 2994 2995 start = folio_pos(folio); /* May regress with THPs */ 2996 2997 /* At this point we hold neither the i_pages lock nor the page lock: 2998 * the page may be truncated or invalidated (changing page->mapping to 2999 * NULL), or even swizzled back from swapper_space to tmpfs file 3000 * mapping 3001 */ 3002 lock_again: 3003 if (wbc->sync_mode != WB_SYNC_NONE) { 3004 ret = folio_lock_killable(folio); 3005 if (ret < 0) 3006 return ret; 3007 } else { 3008 if (!folio_trylock(folio)) 3009 goto search_again; 3010 } 3011 3012 if (folio->mapping != mapping || 3013 !folio_test_dirty(folio)) { 3014 start += folio_size(folio); 3015 folio_unlock(folio); 3016 goto search_again; 3017 } 3018 3019 if (folio_test_writeback(folio) || 3020 folio_test_fscache(folio)) { 3021 folio_unlock(folio); 3022 if (wbc->sync_mode != WB_SYNC_NONE) { 3023 folio_wait_writeback(folio); 3024 #ifdef CONFIG_CIFS_FSCACHE 3025 folio_wait_fscache(folio); 3026 #endif 3027 goto lock_again; 3028 } 3029 3030 start += folio_size(folio); 3031 if (wbc->sync_mode == WB_SYNC_NONE) { 3032 if (skips >= 5 || need_resched()) { 3033 ret = 0; 3034 goto out; 3035 } 3036 skips++; 3037 } 3038 goto search_again; 3039 } 3040 3041 ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end); 3042 out: 3043 if (ret > 0) 3044 *_start = start + ret; 3045 return ret; 3046 } 3047 3048 /* 3049 * Write a region of pages back to the server 3050 */ 3051 static int cifs_writepages_region(struct address_space *mapping, 3052 struct writeback_control *wbc, 3053 unsigned long long *_start, 3054 unsigned long long end) 3055 { 3056 ssize_t ret; 3057 3058 XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE); 3059 3060 do { 3061 ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end); 3062 if (ret > 0 && wbc->nr_to_write > 0) 3063 cond_resched(); 3064 } while (ret > 0 && wbc->nr_to_write > 0); 3065 3066 return ret > 0 ? 0 : ret; 3067 } 3068 3069 /* 3070 * Write some of the pending data back to the server 3071 */ 3072 static int cifs_writepages(struct address_space *mapping, 3073 struct writeback_control *wbc) 3074 { 3075 loff_t start, end; 3076 int ret; 3077 3078 /* We have to be careful as we can end up racing with setattr() 3079 * truncating the pagecache since the caller doesn't take a lock here 3080 * to prevent it. 3081 */ 3082 3083 if (wbc->range_cyclic && mapping->writeback_index) { 3084 start = mapping->writeback_index * PAGE_SIZE; 3085 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX); 3086 if (ret < 0) 3087 goto out; 3088 3089 if (wbc->nr_to_write <= 0) { 3090 mapping->writeback_index = start / PAGE_SIZE; 3091 goto out; 3092 } 3093 3094 start = 0; 3095 end = mapping->writeback_index * PAGE_SIZE; 3096 mapping->writeback_index = 0; 3097 ret = cifs_writepages_region(mapping, wbc, &start, end); 3098 if (ret == 0) 3099 mapping->writeback_index = start / PAGE_SIZE; 3100 } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { 3101 start = 0; 3102 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX); 3103 if (wbc->nr_to_write > 0 && ret == 0) 3104 mapping->writeback_index = start / PAGE_SIZE; 3105 } else { 3106 start = wbc->range_start; 3107 ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end); 3108 } 3109 3110 out: 3111 return ret; 3112 } 3113 3114 static int 3115 cifs_writepage_locked(struct page *page, struct writeback_control *wbc) 3116 { 3117 int rc; 3118 unsigned int xid; 3119 3120 xid = get_xid(); 3121 /* BB add check for wbc flags */ 3122 get_page(page); 3123 if (!PageUptodate(page)) 3124 cifs_dbg(FYI, "ppw - page not up to date\n"); 3125 3126 /* 3127 * Set the "writeback" flag, and clear "dirty" in the radix tree. 3128 * 3129 * A writepage() implementation always needs to do either this, 3130 * or re-dirty the page with "redirty_page_for_writepage()" in 3131 * the case of a failure. 3132 * 3133 * Just unlocking the page will cause the radix tree tag-bits 3134 * to fail to update with the state of the page correctly. 3135 */ 3136 set_page_writeback(page); 3137 retry_write: 3138 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE); 3139 if (is_retryable_error(rc)) { 3140 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) 3141 goto retry_write; 3142 redirty_page_for_writepage(wbc, page); 3143 } else if (rc != 0) { 3144 SetPageError(page); 3145 mapping_set_error(page->mapping, rc); 3146 } else { 3147 SetPageUptodate(page); 3148 } 3149 end_page_writeback(page); 3150 put_page(page); 3151 free_xid(xid); 3152 return rc; 3153 } 3154 3155 static int cifs_write_end(struct file *file, struct address_space *mapping, 3156 loff_t pos, unsigned len, unsigned copied, 3157 struct page *page, void *fsdata) 3158 { 3159 int rc; 3160 struct inode *inode = mapping->host; 3161 struct cifsFileInfo *cfile = file->private_data; 3162 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); 3163 struct folio *folio = page_folio(page); 3164 __u32 pid; 3165 3166 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3167 pid = cfile->pid; 3168 else 3169 pid = current->tgid; 3170 3171 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n", 3172 page, pos, copied); 3173 3174 if (folio_test_checked(folio)) { 3175 if (copied == len) 3176 folio_mark_uptodate(folio); 3177 folio_clear_checked(folio); 3178 } else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE) 3179 folio_mark_uptodate(folio); 3180 3181 if (!folio_test_uptodate(folio)) { 3182 char *page_data; 3183 unsigned offset = pos & (PAGE_SIZE - 1); 3184 unsigned int xid; 3185 3186 xid = get_xid(); 3187 /* this is probably better than directly calling 3188 partialpage_write since in this function the file handle is 3189 known which we might as well leverage */ 3190 /* BB check if anything else missing out of ppw 3191 such as updating last write time */ 3192 page_data = kmap(page); 3193 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos); 3194 /* if (rc < 0) should we set writebehind rc? */ 3195 kunmap(page); 3196 3197 free_xid(xid); 3198 } else { 3199 rc = copied; 3200 pos += copied; 3201 set_page_dirty(page); 3202 } 3203 3204 if (rc > 0) { 3205 spin_lock(&inode->i_lock); 3206 if (pos > inode->i_size) { 3207 i_size_write(inode, pos); 3208 inode->i_blocks = (512 - 1 + pos) >> 9; 3209 } 3210 spin_unlock(&inode->i_lock); 3211 } 3212 3213 unlock_page(page); 3214 put_page(page); 3215 /* Indication to update ctime and mtime as close is deferred */ 3216 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); 3217 3218 return rc; 3219 } 3220 3221 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, 3222 int datasync) 3223 { 3224 unsigned int xid; 3225 int rc = 0; 3226 struct cifs_tcon *tcon; 3227 struct TCP_Server_Info *server; 3228 struct cifsFileInfo *smbfile = file->private_data; 3229 struct inode *inode = file_inode(file); 3230 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 3231 3232 rc = file_write_and_wait_range(file, start, end); 3233 if (rc) { 3234 trace_cifs_fsync_err(inode->i_ino, rc); 3235 return rc; 3236 } 3237 3238 xid = get_xid(); 3239 3240 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", 3241 file, datasync); 3242 3243 if (!CIFS_CACHE_READ(CIFS_I(inode))) { 3244 rc = cifs_zap_mapping(inode); 3245 if (rc) { 3246 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc); 3247 rc = 0; /* don't care about it in fsync */ 3248 } 3249 } 3250 3251 tcon = tlink_tcon(smbfile->tlink); 3252 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { 3253 server = tcon->ses->server; 3254 if (server->ops->flush == NULL) { 3255 rc = -ENOSYS; 3256 goto strict_fsync_exit; 3257 } 3258 3259 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { 3260 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); 3261 if (smbfile) { 3262 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3263 cifsFileInfo_put(smbfile); 3264 } else 3265 cifs_dbg(FYI, "ignore fsync for file not open for write\n"); 3266 } else 3267 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3268 } 3269 3270 strict_fsync_exit: 3271 free_xid(xid); 3272 return rc; 3273 } 3274 3275 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) 3276 { 3277 unsigned int xid; 3278 int rc = 0; 3279 struct cifs_tcon *tcon; 3280 struct TCP_Server_Info *server; 3281 struct cifsFileInfo *smbfile = file->private_data; 3282 struct inode *inode = file_inode(file); 3283 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); 3284 3285 rc = file_write_and_wait_range(file, start, end); 3286 if (rc) { 3287 trace_cifs_fsync_err(file_inode(file)->i_ino, rc); 3288 return rc; 3289 } 3290 3291 xid = get_xid(); 3292 3293 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", 3294 file, datasync); 3295 3296 tcon = tlink_tcon(smbfile->tlink); 3297 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { 3298 server = tcon->ses->server; 3299 if (server->ops->flush == NULL) { 3300 rc = -ENOSYS; 3301 goto fsync_exit; 3302 } 3303 3304 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { 3305 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); 3306 if (smbfile) { 3307 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3308 cifsFileInfo_put(smbfile); 3309 } else 3310 cifs_dbg(FYI, "ignore fsync for file not open for write\n"); 3311 } else 3312 rc = server->ops->flush(xid, tcon, &smbfile->fid); 3313 } 3314 3315 fsync_exit: 3316 free_xid(xid); 3317 return rc; 3318 } 3319 3320 /* 3321 * As file closes, flush all cached write data for this inode checking 3322 * for write behind errors. 3323 */ 3324 int cifs_flush(struct file *file, fl_owner_t id) 3325 { 3326 struct inode *inode = file_inode(file); 3327 int rc = 0; 3328 3329 if (file->f_mode & FMODE_WRITE) 3330 rc = filemap_write_and_wait(inode->i_mapping); 3331 3332 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc); 3333 if (rc) { 3334 /* get more nuanced writeback errors */ 3335 rc = filemap_check_wb_err(file->f_mapping, 0); 3336 trace_cifs_flush_err(inode->i_ino, rc); 3337 } 3338 return rc; 3339 } 3340 3341 static void 3342 cifs_uncached_writedata_release(struct kref *refcount) 3343 { 3344 struct cifs_writedata *wdata = container_of(refcount, 3345 struct cifs_writedata, refcount); 3346 3347 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release); 3348 cifs_writedata_release(refcount); 3349 } 3350 3351 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx); 3352 3353 static void 3354 cifs_uncached_writev_complete(struct work_struct *work) 3355 { 3356 struct cifs_writedata *wdata = container_of(work, 3357 struct cifs_writedata, work); 3358 struct inode *inode = d_inode(wdata->cfile->dentry); 3359 struct cifsInodeInfo *cifsi = CIFS_I(inode); 3360 3361 spin_lock(&inode->i_lock); 3362 cifs_update_eof(cifsi, wdata->offset, wdata->bytes); 3363 if (cifsi->server_eof > inode->i_size) 3364 i_size_write(inode, cifsi->server_eof); 3365 spin_unlock(&inode->i_lock); 3366 3367 complete(&wdata->done); 3368 collect_uncached_write_data(wdata->ctx); 3369 /* the below call can possibly free the last ref to aio ctx */ 3370 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3371 } 3372 3373 static int 3374 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list, 3375 struct cifs_aio_ctx *ctx) 3376 { 3377 unsigned int wsize; 3378 struct cifs_credits credits; 3379 int rc; 3380 struct TCP_Server_Info *server = wdata->server; 3381 3382 do { 3383 if (wdata->cfile->invalidHandle) { 3384 rc = cifs_reopen_file(wdata->cfile, false); 3385 if (rc == -EAGAIN) 3386 continue; 3387 else if (rc) 3388 break; 3389 } 3390 3391 3392 /* 3393 * Wait for credits to resend this wdata. 3394 * Note: we are attempting to resend the whole wdata not in 3395 * segments 3396 */ 3397 do { 3398 rc = server->ops->wait_mtu_credits(server, wdata->bytes, 3399 &wsize, &credits); 3400 if (rc) 3401 goto fail; 3402 3403 if (wsize < wdata->bytes) { 3404 add_credits_and_wake_if(server, &credits, 0); 3405 msleep(1000); 3406 } 3407 } while (wsize < wdata->bytes); 3408 wdata->credits = credits; 3409 3410 rc = adjust_credits(server, &wdata->credits, wdata->bytes); 3411 3412 if (!rc) { 3413 if (wdata->cfile->invalidHandle) 3414 rc = -EAGAIN; 3415 else { 3416 wdata->replay = true; 3417 #ifdef CONFIG_CIFS_SMB_DIRECT 3418 if (wdata->mr) { 3419 wdata->mr->need_invalidate = true; 3420 smbd_deregister_mr(wdata->mr); 3421 wdata->mr = NULL; 3422 } 3423 #endif 3424 rc = server->ops->async_writev(wdata, 3425 cifs_uncached_writedata_release); 3426 } 3427 } 3428 3429 /* If the write was successfully sent, we are done */ 3430 if (!rc) { 3431 list_add_tail(&wdata->list, wdata_list); 3432 return 0; 3433 } 3434 3435 /* Roll back credits and retry if needed */ 3436 add_credits_and_wake_if(server, &wdata->credits, 0); 3437 } while (rc == -EAGAIN); 3438 3439 fail: 3440 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3441 return rc; 3442 } 3443 3444 /* 3445 * Select span of a bvec iterator we're going to use. Limit it by both maximum 3446 * size and maximum number of segments. 3447 */ 3448 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size, 3449 size_t max_segs, unsigned int *_nsegs) 3450 { 3451 const struct bio_vec *bvecs = iter->bvec; 3452 unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0; 3453 size_t len, span = 0, n = iter->count; 3454 size_t skip = iter->iov_offset; 3455 3456 if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0) 3457 return 0; 3458 3459 while (n && ix < nbv && skip) { 3460 len = bvecs[ix].bv_len; 3461 if (skip < len) 3462 break; 3463 skip -= len; 3464 n -= len; 3465 ix++; 3466 } 3467 3468 while (n && ix < nbv) { 3469 len = min3(n, bvecs[ix].bv_len - skip, max_size); 3470 span += len; 3471 max_size -= len; 3472 nsegs++; 3473 ix++; 3474 if (max_size == 0 || nsegs >= max_segs) 3475 break; 3476 skip = 0; 3477 n -= len; 3478 } 3479 3480 *_nsegs = nsegs; 3481 return span; 3482 } 3483 3484 static int 3485 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from, 3486 struct cifsFileInfo *open_file, 3487 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list, 3488 struct cifs_aio_ctx *ctx) 3489 { 3490 int rc = 0; 3491 size_t cur_len, max_len; 3492 struct cifs_writedata *wdata; 3493 pid_t pid; 3494 struct TCP_Server_Info *server; 3495 unsigned int xid, max_segs = INT_MAX; 3496 3497 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 3498 pid = open_file->pid; 3499 else 3500 pid = current->tgid; 3501 3502 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 3503 xid = get_xid(); 3504 3505 #ifdef CONFIG_CIFS_SMB_DIRECT 3506 if (server->smbd_conn) 3507 max_segs = server->smbd_conn->max_frmr_depth; 3508 #endif 3509 3510 do { 3511 struct cifs_credits credits_on_stack; 3512 struct cifs_credits *credits = &credits_on_stack; 3513 unsigned int wsize, nsegs = 0; 3514 3515 if (signal_pending(current)) { 3516 rc = -EINTR; 3517 break; 3518 } 3519 3520 if (open_file->invalidHandle) { 3521 rc = cifs_reopen_file(open_file, false); 3522 if (rc == -EAGAIN) 3523 continue; 3524 else if (rc) 3525 break; 3526 } 3527 3528 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, 3529 &wsize, credits); 3530 if (rc) 3531 break; 3532 3533 max_len = min_t(const size_t, len, wsize); 3534 if (!max_len) { 3535 rc = -EAGAIN; 3536 add_credits_and_wake_if(server, credits, 0); 3537 break; 3538 } 3539 3540 cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs); 3541 cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n", 3542 cur_len, max_len, nsegs, from->nr_segs, max_segs); 3543 if (cur_len == 0) { 3544 rc = -EIO; 3545 add_credits_and_wake_if(server, credits, 0); 3546 break; 3547 } 3548 3549 wdata = cifs_writedata_alloc(cifs_uncached_writev_complete); 3550 if (!wdata) { 3551 rc = -ENOMEM; 3552 add_credits_and_wake_if(server, credits, 0); 3553 break; 3554 } 3555 3556 wdata->sync_mode = WB_SYNC_ALL; 3557 wdata->offset = (__u64)fpos; 3558 wdata->cfile = cifsFileInfo_get(open_file); 3559 wdata->server = server; 3560 wdata->pid = pid; 3561 wdata->bytes = cur_len; 3562 wdata->credits = credits_on_stack; 3563 wdata->iter = *from; 3564 wdata->ctx = ctx; 3565 kref_get(&ctx->refcount); 3566 3567 iov_iter_truncate(&wdata->iter, cur_len); 3568 3569 rc = adjust_credits(server, &wdata->credits, wdata->bytes); 3570 3571 if (!rc) { 3572 if (wdata->cfile->invalidHandle) 3573 rc = -EAGAIN; 3574 else 3575 rc = server->ops->async_writev(wdata, 3576 cifs_uncached_writedata_release); 3577 } 3578 3579 if (rc) { 3580 add_credits_and_wake_if(server, &wdata->credits, 0); 3581 kref_put(&wdata->refcount, 3582 cifs_uncached_writedata_release); 3583 if (rc == -EAGAIN) 3584 continue; 3585 break; 3586 } 3587 3588 list_add_tail(&wdata->list, wdata_list); 3589 iov_iter_advance(from, cur_len); 3590 fpos += cur_len; 3591 len -= cur_len; 3592 } while (len > 0); 3593 3594 free_xid(xid); 3595 return rc; 3596 } 3597 3598 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx) 3599 { 3600 struct cifs_writedata *wdata, *tmp; 3601 struct cifs_tcon *tcon; 3602 struct cifs_sb_info *cifs_sb; 3603 struct dentry *dentry = ctx->cfile->dentry; 3604 ssize_t rc; 3605 3606 tcon = tlink_tcon(ctx->cfile->tlink); 3607 cifs_sb = CIFS_SB(dentry->d_sb); 3608 3609 mutex_lock(&ctx->aio_mutex); 3610 3611 if (list_empty(&ctx->list)) { 3612 mutex_unlock(&ctx->aio_mutex); 3613 return; 3614 } 3615 3616 rc = ctx->rc; 3617 /* 3618 * Wait for and collect replies for any successful sends in order of 3619 * increasing offset. Once an error is hit, then return without waiting 3620 * for any more replies. 3621 */ 3622 restart_loop: 3623 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) { 3624 if (!rc) { 3625 if (!try_wait_for_completion(&wdata->done)) { 3626 mutex_unlock(&ctx->aio_mutex); 3627 return; 3628 } 3629 3630 if (wdata->result) 3631 rc = wdata->result; 3632 else 3633 ctx->total_len += wdata->bytes; 3634 3635 /* resend call if it's a retryable error */ 3636 if (rc == -EAGAIN) { 3637 struct list_head tmp_list; 3638 struct iov_iter tmp_from = ctx->iter; 3639 3640 INIT_LIST_HEAD(&tmp_list); 3641 list_del_init(&wdata->list); 3642 3643 if (ctx->direct_io) 3644 rc = cifs_resend_wdata( 3645 wdata, &tmp_list, ctx); 3646 else { 3647 iov_iter_advance(&tmp_from, 3648 wdata->offset - ctx->pos); 3649 3650 rc = cifs_write_from_iter(wdata->offset, 3651 wdata->bytes, &tmp_from, 3652 ctx->cfile, cifs_sb, &tmp_list, 3653 ctx); 3654 3655 kref_put(&wdata->refcount, 3656 cifs_uncached_writedata_release); 3657 } 3658 3659 list_splice(&tmp_list, &ctx->list); 3660 goto restart_loop; 3661 } 3662 } 3663 list_del_init(&wdata->list); 3664 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 3665 } 3666 3667 cifs_stats_bytes_written(tcon, ctx->total_len); 3668 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags); 3669 3670 ctx->rc = (rc == 0) ? ctx->total_len : rc; 3671 3672 mutex_unlock(&ctx->aio_mutex); 3673 3674 if (ctx->iocb && ctx->iocb->ki_complete) 3675 ctx->iocb->ki_complete(ctx->iocb, ctx->rc); 3676 else 3677 complete(&ctx->done); 3678 } 3679 3680 static ssize_t __cifs_writev( 3681 struct kiocb *iocb, struct iov_iter *from, bool direct) 3682 { 3683 struct file *file = iocb->ki_filp; 3684 ssize_t total_written = 0; 3685 struct cifsFileInfo *cfile; 3686 struct cifs_tcon *tcon; 3687 struct cifs_sb_info *cifs_sb; 3688 struct cifs_aio_ctx *ctx; 3689 int rc; 3690 3691 rc = generic_write_checks(iocb, from); 3692 if (rc <= 0) 3693 return rc; 3694 3695 cifs_sb = CIFS_FILE_SB(file); 3696 cfile = file->private_data; 3697 tcon = tlink_tcon(cfile->tlink); 3698 3699 if (!tcon->ses->server->ops->async_writev) 3700 return -ENOSYS; 3701 3702 ctx = cifs_aio_ctx_alloc(); 3703 if (!ctx) 3704 return -ENOMEM; 3705 3706 ctx->cfile = cifsFileInfo_get(cfile); 3707 3708 if (!is_sync_kiocb(iocb)) 3709 ctx->iocb = iocb; 3710 3711 ctx->pos = iocb->ki_pos; 3712 ctx->direct_io = direct; 3713 ctx->nr_pinned_pages = 0; 3714 3715 if (user_backed_iter(from)) { 3716 /* 3717 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as 3718 * they contain references to the calling process's virtual 3719 * memory layout which won't be available in an async worker 3720 * thread. This also takes a pin on every folio involved. 3721 */ 3722 rc = netfs_extract_user_iter(from, iov_iter_count(from), 3723 &ctx->iter, 0); 3724 if (rc < 0) { 3725 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3726 return rc; 3727 } 3728 3729 ctx->nr_pinned_pages = rc; 3730 ctx->bv = (void *)ctx->iter.bvec; 3731 ctx->bv_need_unpin = iov_iter_extract_will_pin(from); 3732 } else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) && 3733 !is_sync_kiocb(iocb)) { 3734 /* 3735 * If the op is asynchronous, we need to copy the list attached 3736 * to a BVEC/KVEC-type iterator, but we assume that the storage 3737 * will be pinned by the caller; in any case, we may or may not 3738 * be able to pin the pages, so we don't try. 3739 */ 3740 ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL); 3741 if (!ctx->bv) { 3742 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3743 return -ENOMEM; 3744 } 3745 } else { 3746 /* 3747 * Otherwise, we just pass the iterator down as-is and rely on 3748 * the caller to make sure the pages referred to by the 3749 * iterator don't evaporate. 3750 */ 3751 ctx->iter = *from; 3752 } 3753 3754 ctx->len = iov_iter_count(&ctx->iter); 3755 3756 /* grab a lock here due to read response handlers can access ctx */ 3757 mutex_lock(&ctx->aio_mutex); 3758 3759 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter, 3760 cfile, cifs_sb, &ctx->list, ctx); 3761 3762 /* 3763 * If at least one write was successfully sent, then discard any rc 3764 * value from the later writes. If the other write succeeds, then 3765 * we'll end up returning whatever was written. If it fails, then 3766 * we'll get a new rc value from that. 3767 */ 3768 if (!list_empty(&ctx->list)) 3769 rc = 0; 3770 3771 mutex_unlock(&ctx->aio_mutex); 3772 3773 if (rc) { 3774 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3775 return rc; 3776 } 3777 3778 if (!is_sync_kiocb(iocb)) { 3779 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3780 return -EIOCBQUEUED; 3781 } 3782 3783 rc = wait_for_completion_killable(&ctx->done); 3784 if (rc) { 3785 mutex_lock(&ctx->aio_mutex); 3786 ctx->rc = rc = -EINTR; 3787 total_written = ctx->total_len; 3788 mutex_unlock(&ctx->aio_mutex); 3789 } else { 3790 rc = ctx->rc; 3791 total_written = ctx->total_len; 3792 } 3793 3794 kref_put(&ctx->refcount, cifs_aio_ctx_release); 3795 3796 if (unlikely(!total_written)) 3797 return rc; 3798 3799 iocb->ki_pos += total_written; 3800 return total_written; 3801 } 3802 3803 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from) 3804 { 3805 struct file *file = iocb->ki_filp; 3806 3807 cifs_revalidate_mapping(file->f_inode); 3808 return __cifs_writev(iocb, from, true); 3809 } 3810 3811 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) 3812 { 3813 return __cifs_writev(iocb, from, false); 3814 } 3815 3816 static ssize_t 3817 cifs_writev(struct kiocb *iocb, struct iov_iter *from) 3818 { 3819 struct file *file = iocb->ki_filp; 3820 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 3821 struct inode *inode = file->f_mapping->host; 3822 struct cifsInodeInfo *cinode = CIFS_I(inode); 3823 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 3824 ssize_t rc; 3825 3826 inode_lock(inode); 3827 /* 3828 * We need to hold the sem to be sure nobody modifies lock list 3829 * with a brlock that prevents writing. 3830 */ 3831 down_read(&cinode->lock_sem); 3832 3833 rc = generic_write_checks(iocb, from); 3834 if (rc <= 0) 3835 goto out; 3836 3837 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), 3838 server->vals->exclusive_lock_type, 0, 3839 NULL, CIFS_WRITE_OP)) 3840 rc = __generic_file_write_iter(iocb, from); 3841 else 3842 rc = -EACCES; 3843 out: 3844 up_read(&cinode->lock_sem); 3845 inode_unlock(inode); 3846 3847 if (rc > 0) 3848 rc = generic_write_sync(iocb, rc); 3849 return rc; 3850 } 3851 3852 ssize_t 3853 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from) 3854 { 3855 struct inode *inode = file_inode(iocb->ki_filp); 3856 struct cifsInodeInfo *cinode = CIFS_I(inode); 3857 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 3858 struct cifsFileInfo *cfile = (struct cifsFileInfo *) 3859 iocb->ki_filp->private_data; 3860 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 3861 ssize_t written; 3862 3863 written = cifs_get_writer(cinode); 3864 if (written) 3865 return written; 3866 3867 if (CIFS_CACHE_WRITE(cinode)) { 3868 if (cap_unix(tcon->ses) && 3869 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) 3870 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { 3871 written = generic_file_write_iter(iocb, from); 3872 goto out; 3873 } 3874 written = cifs_writev(iocb, from); 3875 goto out; 3876 } 3877 /* 3878 * For non-oplocked files in strict cache mode we need to write the data 3879 * to the server exactly from the pos to pos+len-1 rather than flush all 3880 * affected pages because it may cause a error with mandatory locks on 3881 * these pages but not on the region from pos to ppos+len-1. 3882 */ 3883 written = cifs_user_writev(iocb, from); 3884 if (CIFS_CACHE_READ(cinode)) { 3885 /* 3886 * We have read level caching and we have just sent a write 3887 * request to the server thus making data in the cache stale. 3888 * Zap the cache and set oplock/lease level to NONE to avoid 3889 * reading stale data from the cache. All subsequent read 3890 * operations will read new data from the server. 3891 */ 3892 cifs_zap_mapping(inode); 3893 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n", 3894 inode); 3895 cinode->oplock = 0; 3896 } 3897 out: 3898 cifs_put_writer(cinode); 3899 return written; 3900 } 3901 3902 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete) 3903 { 3904 struct cifs_readdata *rdata; 3905 3906 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL); 3907 if (rdata) { 3908 kref_init(&rdata->refcount); 3909 INIT_LIST_HEAD(&rdata->list); 3910 init_completion(&rdata->done); 3911 INIT_WORK(&rdata->work, complete); 3912 } 3913 3914 return rdata; 3915 } 3916 3917 void 3918 cifs_readdata_release(struct kref *refcount) 3919 { 3920 struct cifs_readdata *rdata = container_of(refcount, 3921 struct cifs_readdata, refcount); 3922 3923 if (rdata->ctx) 3924 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release); 3925 #ifdef CONFIG_CIFS_SMB_DIRECT 3926 if (rdata->mr) { 3927 smbd_deregister_mr(rdata->mr); 3928 rdata->mr = NULL; 3929 } 3930 #endif 3931 if (rdata->cfile) 3932 cifsFileInfo_put(rdata->cfile); 3933 3934 kfree(rdata); 3935 } 3936 3937 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx); 3938 3939 static void 3940 cifs_uncached_readv_complete(struct work_struct *work) 3941 { 3942 struct cifs_readdata *rdata = container_of(work, 3943 struct cifs_readdata, work); 3944 3945 complete(&rdata->done); 3946 collect_uncached_read_data(rdata->ctx); 3947 /* the below call can possibly free the last ref to aio ctx */ 3948 kref_put(&rdata->refcount, cifs_readdata_release); 3949 } 3950 3951 static int cifs_resend_rdata(struct cifs_readdata *rdata, 3952 struct list_head *rdata_list, 3953 struct cifs_aio_ctx *ctx) 3954 { 3955 unsigned int rsize; 3956 struct cifs_credits credits; 3957 int rc; 3958 struct TCP_Server_Info *server; 3959 3960 /* XXX: should we pick a new channel here? */ 3961 server = rdata->server; 3962 3963 do { 3964 if (rdata->cfile->invalidHandle) { 3965 rc = cifs_reopen_file(rdata->cfile, true); 3966 if (rc == -EAGAIN) 3967 continue; 3968 else if (rc) 3969 break; 3970 } 3971 3972 /* 3973 * Wait for credits to resend this rdata. 3974 * Note: we are attempting to resend the whole rdata not in 3975 * segments 3976 */ 3977 do { 3978 rc = server->ops->wait_mtu_credits(server, rdata->bytes, 3979 &rsize, &credits); 3980 3981 if (rc) 3982 goto fail; 3983 3984 if (rsize < rdata->bytes) { 3985 add_credits_and_wake_if(server, &credits, 0); 3986 msleep(1000); 3987 } 3988 } while (rsize < rdata->bytes); 3989 rdata->credits = credits; 3990 3991 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 3992 if (!rc) { 3993 if (rdata->cfile->invalidHandle) 3994 rc = -EAGAIN; 3995 else { 3996 #ifdef CONFIG_CIFS_SMB_DIRECT 3997 if (rdata->mr) { 3998 rdata->mr->need_invalidate = true; 3999 smbd_deregister_mr(rdata->mr); 4000 rdata->mr = NULL; 4001 } 4002 #endif 4003 rc = server->ops->async_readv(rdata); 4004 } 4005 } 4006 4007 /* If the read was successfully sent, we are done */ 4008 if (!rc) { 4009 /* Add to aio pending list */ 4010 list_add_tail(&rdata->list, rdata_list); 4011 return 0; 4012 } 4013 4014 /* Roll back credits and retry if needed */ 4015 add_credits_and_wake_if(server, &rdata->credits, 0); 4016 } while (rc == -EAGAIN); 4017 4018 fail: 4019 kref_put(&rdata->refcount, cifs_readdata_release); 4020 return rc; 4021 } 4022 4023 static int 4024 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file, 4025 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list, 4026 struct cifs_aio_ctx *ctx) 4027 { 4028 struct cifs_readdata *rdata; 4029 unsigned int rsize, nsegs, max_segs = INT_MAX; 4030 struct cifs_credits credits_on_stack; 4031 struct cifs_credits *credits = &credits_on_stack; 4032 size_t cur_len, max_len; 4033 int rc; 4034 pid_t pid; 4035 struct TCP_Server_Info *server; 4036 4037 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 4038 4039 #ifdef CONFIG_CIFS_SMB_DIRECT 4040 if (server->smbd_conn) 4041 max_segs = server->smbd_conn->max_frmr_depth; 4042 #endif 4043 4044 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4045 pid = open_file->pid; 4046 else 4047 pid = current->tgid; 4048 4049 do { 4050 if (open_file->invalidHandle) { 4051 rc = cifs_reopen_file(open_file, true); 4052 if (rc == -EAGAIN) 4053 continue; 4054 else if (rc) 4055 break; 4056 } 4057 4058 if (cifs_sb->ctx->rsize == 0) 4059 cifs_sb->ctx->rsize = 4060 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), 4061 cifs_sb->ctx); 4062 4063 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, 4064 &rsize, credits); 4065 if (rc) 4066 break; 4067 4068 max_len = min_t(size_t, len, rsize); 4069 4070 cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len, 4071 max_segs, &nsegs); 4072 cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n", 4073 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs); 4074 if (cur_len == 0) { 4075 rc = -EIO; 4076 add_credits_and_wake_if(server, credits, 0); 4077 break; 4078 } 4079 4080 rdata = cifs_readdata_alloc(cifs_uncached_readv_complete); 4081 if (!rdata) { 4082 add_credits_and_wake_if(server, credits, 0); 4083 rc = -ENOMEM; 4084 break; 4085 } 4086 4087 rdata->server = server; 4088 rdata->cfile = cifsFileInfo_get(open_file); 4089 rdata->offset = fpos; 4090 rdata->bytes = cur_len; 4091 rdata->pid = pid; 4092 rdata->credits = credits_on_stack; 4093 rdata->ctx = ctx; 4094 kref_get(&ctx->refcount); 4095 4096 rdata->iter = ctx->iter; 4097 iov_iter_truncate(&rdata->iter, cur_len); 4098 4099 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 4100 4101 if (!rc) { 4102 if (rdata->cfile->invalidHandle) 4103 rc = -EAGAIN; 4104 else 4105 rc = server->ops->async_readv(rdata); 4106 } 4107 4108 if (rc) { 4109 add_credits_and_wake_if(server, &rdata->credits, 0); 4110 kref_put(&rdata->refcount, cifs_readdata_release); 4111 if (rc == -EAGAIN) 4112 continue; 4113 break; 4114 } 4115 4116 list_add_tail(&rdata->list, rdata_list); 4117 iov_iter_advance(&ctx->iter, cur_len); 4118 fpos += cur_len; 4119 len -= cur_len; 4120 } while (len > 0); 4121 4122 return rc; 4123 } 4124 4125 static void 4126 collect_uncached_read_data(struct cifs_aio_ctx *ctx) 4127 { 4128 struct cifs_readdata *rdata, *tmp; 4129 struct cifs_sb_info *cifs_sb; 4130 int rc; 4131 4132 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb); 4133 4134 mutex_lock(&ctx->aio_mutex); 4135 4136 if (list_empty(&ctx->list)) { 4137 mutex_unlock(&ctx->aio_mutex); 4138 return; 4139 } 4140 4141 rc = ctx->rc; 4142 /* the loop below should proceed in the order of increasing offsets */ 4143 again: 4144 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) { 4145 if (!rc) { 4146 if (!try_wait_for_completion(&rdata->done)) { 4147 mutex_unlock(&ctx->aio_mutex); 4148 return; 4149 } 4150 4151 if (rdata->result == -EAGAIN) { 4152 /* resend call if it's a retryable error */ 4153 struct list_head tmp_list; 4154 unsigned int got_bytes = rdata->got_bytes; 4155 4156 list_del_init(&rdata->list); 4157 INIT_LIST_HEAD(&tmp_list); 4158 4159 if (ctx->direct_io) { 4160 /* 4161 * Re-use rdata as this is a 4162 * direct I/O 4163 */ 4164 rc = cifs_resend_rdata( 4165 rdata, 4166 &tmp_list, ctx); 4167 } else { 4168 rc = cifs_send_async_read( 4169 rdata->offset + got_bytes, 4170 rdata->bytes - got_bytes, 4171 rdata->cfile, cifs_sb, 4172 &tmp_list, ctx); 4173 4174 kref_put(&rdata->refcount, 4175 cifs_readdata_release); 4176 } 4177 4178 list_splice(&tmp_list, &ctx->list); 4179 4180 goto again; 4181 } else if (rdata->result) 4182 rc = rdata->result; 4183 4184 /* if there was a short read -- discard anything left */ 4185 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes) 4186 rc = -ENODATA; 4187 4188 ctx->total_len += rdata->got_bytes; 4189 } 4190 list_del_init(&rdata->list); 4191 kref_put(&rdata->refcount, cifs_readdata_release); 4192 } 4193 4194 /* mask nodata case */ 4195 if (rc == -ENODATA) 4196 rc = 0; 4197 4198 ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc; 4199 4200 mutex_unlock(&ctx->aio_mutex); 4201 4202 if (ctx->iocb && ctx->iocb->ki_complete) 4203 ctx->iocb->ki_complete(ctx->iocb, ctx->rc); 4204 else 4205 complete(&ctx->done); 4206 } 4207 4208 static ssize_t __cifs_readv( 4209 struct kiocb *iocb, struct iov_iter *to, bool direct) 4210 { 4211 size_t len; 4212 struct file *file = iocb->ki_filp; 4213 struct cifs_sb_info *cifs_sb; 4214 struct cifsFileInfo *cfile; 4215 struct cifs_tcon *tcon; 4216 ssize_t rc, total_read = 0; 4217 loff_t offset = iocb->ki_pos; 4218 struct cifs_aio_ctx *ctx; 4219 4220 len = iov_iter_count(to); 4221 if (!len) 4222 return 0; 4223 4224 cifs_sb = CIFS_FILE_SB(file); 4225 cfile = file->private_data; 4226 tcon = tlink_tcon(cfile->tlink); 4227 4228 if (!tcon->ses->server->ops->async_readv) 4229 return -ENOSYS; 4230 4231 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 4232 cifs_dbg(FYI, "attempting read on write only file instance\n"); 4233 4234 ctx = cifs_aio_ctx_alloc(); 4235 if (!ctx) 4236 return -ENOMEM; 4237 4238 ctx->pos = offset; 4239 ctx->direct_io = direct; 4240 ctx->len = len; 4241 ctx->cfile = cifsFileInfo_get(cfile); 4242 ctx->nr_pinned_pages = 0; 4243 4244 if (!is_sync_kiocb(iocb)) 4245 ctx->iocb = iocb; 4246 4247 if (user_backed_iter(to)) { 4248 /* 4249 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as 4250 * they contain references to the calling process's virtual 4251 * memory layout which won't be available in an async worker 4252 * thread. This also takes a pin on every folio involved. 4253 */ 4254 rc = netfs_extract_user_iter(to, iov_iter_count(to), 4255 &ctx->iter, 0); 4256 if (rc < 0) { 4257 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4258 return rc; 4259 } 4260 4261 ctx->nr_pinned_pages = rc; 4262 ctx->bv = (void *)ctx->iter.bvec; 4263 ctx->bv_need_unpin = iov_iter_extract_will_pin(to); 4264 ctx->should_dirty = true; 4265 } else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) && 4266 !is_sync_kiocb(iocb)) { 4267 /* 4268 * If the op is asynchronous, we need to copy the list attached 4269 * to a BVEC/KVEC-type iterator, but we assume that the storage 4270 * will be retained by the caller; in any case, we may or may 4271 * not be able to pin the pages, so we don't try. 4272 */ 4273 ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL); 4274 if (!ctx->bv) { 4275 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4276 return -ENOMEM; 4277 } 4278 } else { 4279 /* 4280 * Otherwise, we just pass the iterator down as-is and rely on 4281 * the caller to make sure the pages referred to by the 4282 * iterator don't evaporate. 4283 */ 4284 ctx->iter = *to; 4285 } 4286 4287 if (direct) { 4288 rc = filemap_write_and_wait_range(file->f_inode->i_mapping, 4289 offset, offset + len - 1); 4290 if (rc) { 4291 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4292 return -EAGAIN; 4293 } 4294 } 4295 4296 /* grab a lock here due to read response handlers can access ctx */ 4297 mutex_lock(&ctx->aio_mutex); 4298 4299 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx); 4300 4301 /* if at least one read request send succeeded, then reset rc */ 4302 if (!list_empty(&ctx->list)) 4303 rc = 0; 4304 4305 mutex_unlock(&ctx->aio_mutex); 4306 4307 if (rc) { 4308 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4309 return rc; 4310 } 4311 4312 if (!is_sync_kiocb(iocb)) { 4313 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4314 return -EIOCBQUEUED; 4315 } 4316 4317 rc = wait_for_completion_killable(&ctx->done); 4318 if (rc) { 4319 mutex_lock(&ctx->aio_mutex); 4320 ctx->rc = rc = -EINTR; 4321 total_read = ctx->total_len; 4322 mutex_unlock(&ctx->aio_mutex); 4323 } else { 4324 rc = ctx->rc; 4325 total_read = ctx->total_len; 4326 } 4327 4328 kref_put(&ctx->refcount, cifs_aio_ctx_release); 4329 4330 if (total_read) { 4331 iocb->ki_pos += total_read; 4332 return total_read; 4333 } 4334 return rc; 4335 } 4336 4337 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to) 4338 { 4339 return __cifs_readv(iocb, to, true); 4340 } 4341 4342 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) 4343 { 4344 return __cifs_readv(iocb, to, false); 4345 } 4346 4347 ssize_t 4348 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) 4349 { 4350 struct inode *inode = file_inode(iocb->ki_filp); 4351 struct cifsInodeInfo *cinode = CIFS_I(inode); 4352 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 4353 struct cifsFileInfo *cfile = (struct cifsFileInfo *) 4354 iocb->ki_filp->private_data; 4355 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 4356 int rc = -EACCES; 4357 4358 /* 4359 * In strict cache mode we need to read from the server all the time 4360 * if we don't have level II oplock because the server can delay mtime 4361 * change - so we can't make a decision about inode invalidating. 4362 * And we can also fail with pagereading if there are mandatory locks 4363 * on pages affected by this read but not on the region from pos to 4364 * pos+len-1. 4365 */ 4366 if (!CIFS_CACHE_READ(cinode)) 4367 return cifs_user_readv(iocb, to); 4368 4369 if (cap_unix(tcon->ses) && 4370 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 4371 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 4372 return generic_file_read_iter(iocb, to); 4373 4374 /* 4375 * We need to hold the sem to be sure nobody modifies lock list 4376 * with a brlock that prevents reading. 4377 */ 4378 down_read(&cinode->lock_sem); 4379 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to), 4380 tcon->ses->server->vals->shared_lock_type, 4381 0, NULL, CIFS_READ_OP)) 4382 rc = generic_file_read_iter(iocb, to); 4383 up_read(&cinode->lock_sem); 4384 return rc; 4385 } 4386 4387 static ssize_t 4388 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) 4389 { 4390 int rc = -EACCES; 4391 unsigned int bytes_read = 0; 4392 unsigned int total_read; 4393 unsigned int current_read_size; 4394 unsigned int rsize; 4395 struct cifs_sb_info *cifs_sb; 4396 struct cifs_tcon *tcon; 4397 struct TCP_Server_Info *server; 4398 unsigned int xid; 4399 char *cur_offset; 4400 struct cifsFileInfo *open_file; 4401 struct cifs_io_parms io_parms = {0}; 4402 int buf_type = CIFS_NO_BUFFER; 4403 __u32 pid; 4404 4405 xid = get_xid(); 4406 cifs_sb = CIFS_FILE_SB(file); 4407 4408 /* FIXME: set up handlers for larger reads and/or convert to async */ 4409 rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize); 4410 4411 if (file->private_data == NULL) { 4412 rc = -EBADF; 4413 free_xid(xid); 4414 return rc; 4415 } 4416 open_file = file->private_data; 4417 tcon = tlink_tcon(open_file->tlink); 4418 server = cifs_pick_channel(tcon->ses); 4419 4420 if (!server->ops->sync_read) { 4421 free_xid(xid); 4422 return -ENOSYS; 4423 } 4424 4425 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4426 pid = open_file->pid; 4427 else 4428 pid = current->tgid; 4429 4430 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 4431 cifs_dbg(FYI, "attempting read on write only file instance\n"); 4432 4433 for (total_read = 0, cur_offset = read_data; read_size > total_read; 4434 total_read += bytes_read, cur_offset += bytes_read) { 4435 do { 4436 current_read_size = min_t(uint, read_size - total_read, 4437 rsize); 4438 /* 4439 * For windows me and 9x we do not want to request more 4440 * than it negotiated since it will refuse the read 4441 * then. 4442 */ 4443 if (!(tcon->ses->capabilities & 4444 tcon->ses->server->vals->cap_large_files)) { 4445 current_read_size = min_t(uint, 4446 current_read_size, CIFSMaxBufSize); 4447 } 4448 if (open_file->invalidHandle) { 4449 rc = cifs_reopen_file(open_file, true); 4450 if (rc != 0) 4451 break; 4452 } 4453 io_parms.pid = pid; 4454 io_parms.tcon = tcon; 4455 io_parms.offset = *offset; 4456 io_parms.length = current_read_size; 4457 io_parms.server = server; 4458 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms, 4459 &bytes_read, &cur_offset, 4460 &buf_type); 4461 } while (rc == -EAGAIN); 4462 4463 if (rc || (bytes_read == 0)) { 4464 if (total_read) { 4465 break; 4466 } else { 4467 free_xid(xid); 4468 return rc; 4469 } 4470 } else { 4471 cifs_stats_bytes_read(tcon, total_read); 4472 *offset += bytes_read; 4473 } 4474 } 4475 free_xid(xid); 4476 return total_read; 4477 } 4478 4479 /* 4480 * If the page is mmap'ed into a process' page tables, then we need to make 4481 * sure that it doesn't change while being written back. 4482 */ 4483 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf) 4484 { 4485 struct folio *folio = page_folio(vmf->page); 4486 4487 /* Wait for the folio to be written to the cache before we allow it to 4488 * be modified. We then assume the entire folio will need writing back. 4489 */ 4490 #ifdef CONFIG_CIFS_FSCACHE 4491 if (folio_test_fscache(folio) && 4492 folio_wait_fscache_killable(folio) < 0) 4493 return VM_FAULT_RETRY; 4494 #endif 4495 4496 folio_wait_writeback(folio); 4497 4498 if (folio_lock_killable(folio) < 0) 4499 return VM_FAULT_RETRY; 4500 return VM_FAULT_LOCKED; 4501 } 4502 4503 static const struct vm_operations_struct cifs_file_vm_ops = { 4504 .fault = filemap_fault, 4505 .map_pages = filemap_map_pages, 4506 .page_mkwrite = cifs_page_mkwrite, 4507 }; 4508 4509 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) 4510 { 4511 int xid, rc = 0; 4512 struct inode *inode = file_inode(file); 4513 4514 xid = get_xid(); 4515 4516 if (!CIFS_CACHE_READ(CIFS_I(inode))) 4517 rc = cifs_zap_mapping(inode); 4518 if (!rc) 4519 rc = generic_file_mmap(file, vma); 4520 if (!rc) 4521 vma->vm_ops = &cifs_file_vm_ops; 4522 4523 free_xid(xid); 4524 return rc; 4525 } 4526 4527 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) 4528 { 4529 int rc, xid; 4530 4531 xid = get_xid(); 4532 4533 rc = cifs_revalidate_file(file); 4534 if (rc) 4535 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n", 4536 rc); 4537 if (!rc) 4538 rc = generic_file_mmap(file, vma); 4539 if (!rc) 4540 vma->vm_ops = &cifs_file_vm_ops; 4541 4542 free_xid(xid); 4543 return rc; 4544 } 4545 4546 /* 4547 * Unlock a bunch of folios in the pagecache. 4548 */ 4549 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last) 4550 { 4551 struct folio *folio; 4552 XA_STATE(xas, &mapping->i_pages, first); 4553 4554 rcu_read_lock(); 4555 xas_for_each(&xas, folio, last) { 4556 folio_unlock(folio); 4557 } 4558 rcu_read_unlock(); 4559 } 4560 4561 static void cifs_readahead_complete(struct work_struct *work) 4562 { 4563 struct cifs_readdata *rdata = container_of(work, 4564 struct cifs_readdata, work); 4565 struct folio *folio; 4566 pgoff_t last; 4567 bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes); 4568 4569 XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE); 4570 4571 if (good) 4572 cifs_readahead_to_fscache(rdata->mapping->host, 4573 rdata->offset, rdata->bytes); 4574 4575 if (iov_iter_count(&rdata->iter) > 0) 4576 iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter); 4577 4578 last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE; 4579 4580 rcu_read_lock(); 4581 xas_for_each(&xas, folio, last) { 4582 if (good) { 4583 flush_dcache_folio(folio); 4584 folio_mark_uptodate(folio); 4585 } 4586 folio_unlock(folio); 4587 } 4588 rcu_read_unlock(); 4589 4590 kref_put(&rdata->refcount, cifs_readdata_release); 4591 } 4592 4593 static void cifs_readahead(struct readahead_control *ractl) 4594 { 4595 struct cifsFileInfo *open_file = ractl->file->private_data; 4596 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file); 4597 struct TCP_Server_Info *server; 4598 unsigned int xid, nr_pages, cache_nr_pages = 0; 4599 unsigned int ra_pages; 4600 pgoff_t next_cached = ULONG_MAX, ra_index; 4601 bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) && 4602 cifs_inode_cookie(ractl->mapping->host)->cache_priv; 4603 bool check_cache = caching; 4604 pid_t pid; 4605 int rc = 0; 4606 4607 /* Note that readahead_count() lags behind our dequeuing of pages from 4608 * the ractl, wo we have to keep track for ourselves. 4609 */ 4610 ra_pages = readahead_count(ractl); 4611 ra_index = readahead_index(ractl); 4612 4613 xid = get_xid(); 4614 4615 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 4616 pid = open_file->pid; 4617 else 4618 pid = current->tgid; 4619 4620 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); 4621 4622 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n", 4623 __func__, ractl->file, ractl->mapping, ra_pages); 4624 4625 /* 4626 * Chop the readahead request up into rsize-sized read requests. 4627 */ 4628 while ((nr_pages = ra_pages)) { 4629 unsigned int i, rsize; 4630 struct cifs_readdata *rdata; 4631 struct cifs_credits credits_on_stack; 4632 struct cifs_credits *credits = &credits_on_stack; 4633 struct folio *folio; 4634 pgoff_t fsize; 4635 4636 /* 4637 * Find out if we have anything cached in the range of 4638 * interest, and if so, where the next chunk of cached data is. 4639 */ 4640 if (caching) { 4641 if (check_cache) { 4642 rc = cifs_fscache_query_occupancy( 4643 ractl->mapping->host, ra_index, nr_pages, 4644 &next_cached, &cache_nr_pages); 4645 if (rc < 0) 4646 caching = false; 4647 check_cache = false; 4648 } 4649 4650 if (ra_index == next_cached) { 4651 /* 4652 * TODO: Send a whole batch of pages to be read 4653 * by the cache. 4654 */ 4655 folio = readahead_folio(ractl); 4656 fsize = folio_nr_pages(folio); 4657 ra_pages -= fsize; 4658 ra_index += fsize; 4659 if (cifs_readpage_from_fscache(ractl->mapping->host, 4660 &folio->page) < 0) { 4661 /* 4662 * TODO: Deal with cache read failure 4663 * here, but for the moment, delegate 4664 * that to readpage. 4665 */ 4666 caching = false; 4667 } 4668 folio_unlock(folio); 4669 next_cached += fsize; 4670 cache_nr_pages -= fsize; 4671 if (cache_nr_pages == 0) 4672 check_cache = true; 4673 continue; 4674 } 4675 } 4676 4677 if (open_file->invalidHandle) { 4678 rc = cifs_reopen_file(open_file, true); 4679 if (rc) { 4680 if (rc == -EAGAIN) 4681 continue; 4682 break; 4683 } 4684 } 4685 4686 if (cifs_sb->ctx->rsize == 0) 4687 cifs_sb->ctx->rsize = 4688 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), 4689 cifs_sb->ctx); 4690 4691 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, 4692 &rsize, credits); 4693 if (rc) 4694 break; 4695 nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages); 4696 if (next_cached != ULONG_MAX) 4697 nr_pages = min_t(size_t, nr_pages, next_cached - ra_index); 4698 4699 /* 4700 * Give up immediately if rsize is too small to read an entire 4701 * page. The VFS will fall back to readpage. We should never 4702 * reach this point however since we set ra_pages to 0 when the 4703 * rsize is smaller than a cache page. 4704 */ 4705 if (unlikely(!nr_pages)) { 4706 add_credits_and_wake_if(server, credits, 0); 4707 break; 4708 } 4709 4710 rdata = cifs_readdata_alloc(cifs_readahead_complete); 4711 if (!rdata) { 4712 /* best to give up if we're out of mem */ 4713 add_credits_and_wake_if(server, credits, 0); 4714 break; 4715 } 4716 4717 rdata->offset = ra_index * PAGE_SIZE; 4718 rdata->bytes = nr_pages * PAGE_SIZE; 4719 rdata->cfile = cifsFileInfo_get(open_file); 4720 rdata->server = server; 4721 rdata->mapping = ractl->mapping; 4722 rdata->pid = pid; 4723 rdata->credits = credits_on_stack; 4724 4725 for (i = 0; i < nr_pages; i++) { 4726 if (!readahead_folio(ractl)) 4727 WARN_ON(1); 4728 } 4729 ra_pages -= nr_pages; 4730 ra_index += nr_pages; 4731 4732 iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages, 4733 rdata->offset, rdata->bytes); 4734 4735 rc = adjust_credits(server, &rdata->credits, rdata->bytes); 4736 if (!rc) { 4737 if (rdata->cfile->invalidHandle) 4738 rc = -EAGAIN; 4739 else 4740 rc = server->ops->async_readv(rdata); 4741 } 4742 4743 if (rc) { 4744 add_credits_and_wake_if(server, &rdata->credits, 0); 4745 cifs_unlock_folios(rdata->mapping, 4746 rdata->offset / PAGE_SIZE, 4747 (rdata->offset + rdata->bytes - 1) / PAGE_SIZE); 4748 /* Fallback to the readpage in error/reconnect cases */ 4749 kref_put(&rdata->refcount, cifs_readdata_release); 4750 break; 4751 } 4752 4753 kref_put(&rdata->refcount, cifs_readdata_release); 4754 } 4755 4756 free_xid(xid); 4757 } 4758 4759 /* 4760 * cifs_readpage_worker must be called with the page pinned 4761 */ 4762 static int cifs_readpage_worker(struct file *file, struct page *page, 4763 loff_t *poffset) 4764 { 4765 struct inode *inode = file_inode(file); 4766 struct timespec64 atime, mtime; 4767 char *read_data; 4768 int rc; 4769 4770 /* Is the page cached? */ 4771 rc = cifs_readpage_from_fscache(inode, page); 4772 if (rc == 0) 4773 goto read_complete; 4774 4775 read_data = kmap(page); 4776 /* for reads over a certain size could initiate async read ahead */ 4777 4778 rc = cifs_read(file, read_data, PAGE_SIZE, poffset); 4779 4780 if (rc < 0) 4781 goto io_error; 4782 else 4783 cifs_dbg(FYI, "Bytes read %d\n", rc); 4784 4785 /* we do not want atime to be less than mtime, it broke some apps */ 4786 atime = inode_set_atime_to_ts(inode, current_time(inode)); 4787 mtime = inode_get_mtime(inode); 4788 if (timespec64_compare(&atime, &mtime) < 0) 4789 inode_set_atime_to_ts(inode, inode_get_mtime(inode)); 4790 4791 if (PAGE_SIZE > rc) 4792 memset(read_data + rc, 0, PAGE_SIZE - rc); 4793 4794 flush_dcache_page(page); 4795 SetPageUptodate(page); 4796 rc = 0; 4797 4798 io_error: 4799 kunmap(page); 4800 4801 read_complete: 4802 unlock_page(page); 4803 return rc; 4804 } 4805 4806 static int cifs_read_folio(struct file *file, struct folio *folio) 4807 { 4808 struct page *page = &folio->page; 4809 loff_t offset = page_file_offset(page); 4810 int rc = -EACCES; 4811 unsigned int xid; 4812 4813 xid = get_xid(); 4814 4815 if (file->private_data == NULL) { 4816 rc = -EBADF; 4817 free_xid(xid); 4818 return rc; 4819 } 4820 4821 cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n", 4822 page, (int)offset, (int)offset); 4823 4824 rc = cifs_readpage_worker(file, page, &offset); 4825 4826 free_xid(xid); 4827 return rc; 4828 } 4829 4830 static int is_inode_writable(struct cifsInodeInfo *cifs_inode) 4831 { 4832 struct cifsFileInfo *open_file; 4833 4834 spin_lock(&cifs_inode->open_file_lock); 4835 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 4836 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { 4837 spin_unlock(&cifs_inode->open_file_lock); 4838 return 1; 4839 } 4840 } 4841 spin_unlock(&cifs_inode->open_file_lock); 4842 return 0; 4843 } 4844 4845 /* We do not want to update the file size from server for inodes 4846 open for write - to avoid races with writepage extending 4847 the file - in the future we could consider allowing 4848 refreshing the inode only on increases in the file size 4849 but this is tricky to do without racing with writebehind 4850 page caching in the current Linux kernel design */ 4851 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file, 4852 bool from_readdir) 4853 { 4854 if (!cifsInode) 4855 return true; 4856 4857 if (is_inode_writable(cifsInode) || 4858 ((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) { 4859 /* This inode is open for write at least once */ 4860 struct cifs_sb_info *cifs_sb; 4861 4862 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb); 4863 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { 4864 /* since no page cache to corrupt on directio 4865 we can change size safely */ 4866 return true; 4867 } 4868 4869 if (i_size_read(&cifsInode->netfs.inode) < end_of_file) 4870 return true; 4871 4872 return false; 4873 } else 4874 return true; 4875 } 4876 4877 static int cifs_write_begin(struct file *file, struct address_space *mapping, 4878 loff_t pos, unsigned len, 4879 struct page **pagep, void **fsdata) 4880 { 4881 int oncethru = 0; 4882 pgoff_t index = pos >> PAGE_SHIFT; 4883 loff_t offset = pos & (PAGE_SIZE - 1); 4884 loff_t page_start = pos & PAGE_MASK; 4885 loff_t i_size; 4886 struct page *page; 4887 int rc = 0; 4888 4889 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len); 4890 4891 start: 4892 page = grab_cache_page_write_begin(mapping, index); 4893 if (!page) { 4894 rc = -ENOMEM; 4895 goto out; 4896 } 4897 4898 if (PageUptodate(page)) 4899 goto out; 4900 4901 /* 4902 * If we write a full page it will be up to date, no need to read from 4903 * the server. If the write is short, we'll end up doing a sync write 4904 * instead. 4905 */ 4906 if (len == PAGE_SIZE) 4907 goto out; 4908 4909 /* 4910 * optimize away the read when we have an oplock, and we're not 4911 * expecting to use any of the data we'd be reading in. That 4912 * is, when the page lies beyond the EOF, or straddles the EOF 4913 * and the write will cover all of the existing data. 4914 */ 4915 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) { 4916 i_size = i_size_read(mapping->host); 4917 if (page_start >= i_size || 4918 (offset == 0 && (pos + len) >= i_size)) { 4919 zero_user_segments(page, 0, offset, 4920 offset + len, 4921 PAGE_SIZE); 4922 /* 4923 * PageChecked means that the parts of the page 4924 * to which we're not writing are considered up 4925 * to date. Once the data is copied to the 4926 * page, it can be set uptodate. 4927 */ 4928 SetPageChecked(page); 4929 goto out; 4930 } 4931 } 4932 4933 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) { 4934 /* 4935 * might as well read a page, it is fast enough. If we get 4936 * an error, we don't need to return it. cifs_write_end will 4937 * do a sync write instead since PG_uptodate isn't set. 4938 */ 4939 cifs_readpage_worker(file, page, &page_start); 4940 put_page(page); 4941 oncethru = 1; 4942 goto start; 4943 } else { 4944 /* we could try using another file handle if there is one - 4945 but how would we lock it to prevent close of that handle 4946 racing with this read? In any case 4947 this will be written out by write_end so is fine */ 4948 } 4949 out: 4950 *pagep = page; 4951 return rc; 4952 } 4953 4954 static bool cifs_release_folio(struct folio *folio, gfp_t gfp) 4955 { 4956 if (folio_test_private(folio)) 4957 return 0; 4958 if (folio_test_fscache(folio)) { 4959 if (current_is_kswapd() || !(gfp & __GFP_FS)) 4960 return false; 4961 folio_wait_fscache(folio); 4962 } 4963 fscache_note_page_release(cifs_inode_cookie(folio->mapping->host)); 4964 return true; 4965 } 4966 4967 static void cifs_invalidate_folio(struct folio *folio, size_t offset, 4968 size_t length) 4969 { 4970 folio_wait_fscache(folio); 4971 } 4972 4973 static int cifs_launder_folio(struct folio *folio) 4974 { 4975 int rc = 0; 4976 loff_t range_start = folio_pos(folio); 4977 loff_t range_end = range_start + folio_size(folio); 4978 struct writeback_control wbc = { 4979 .sync_mode = WB_SYNC_ALL, 4980 .nr_to_write = 0, 4981 .range_start = range_start, 4982 .range_end = range_end, 4983 }; 4984 4985 cifs_dbg(FYI, "Launder page: %lu\n", folio->index); 4986 4987 if (folio_clear_dirty_for_io(folio)) 4988 rc = cifs_writepage_locked(&folio->page, &wbc); 4989 4990 folio_wait_fscache(folio); 4991 return rc; 4992 } 4993 4994 void cifs_oplock_break(struct work_struct *work) 4995 { 4996 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, 4997 oplock_break); 4998 struct inode *inode = d_inode(cfile->dentry); 4999 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 5000 struct cifsInodeInfo *cinode = CIFS_I(inode); 5001 struct cifs_tcon *tcon; 5002 struct TCP_Server_Info *server; 5003 struct tcon_link *tlink; 5004 int rc = 0; 5005 bool purge_cache = false, oplock_break_cancelled; 5006 __u64 persistent_fid, volatile_fid; 5007 __u16 net_fid; 5008 5009 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, 5010 TASK_UNINTERRUPTIBLE); 5011 5012 tlink = cifs_sb_tlink(cifs_sb); 5013 if (IS_ERR(tlink)) 5014 goto out; 5015 tcon = tlink_tcon(tlink); 5016 server = tcon->ses->server; 5017 5018 server->ops->downgrade_oplock(server, cinode, cfile->oplock_level, 5019 cfile->oplock_epoch, &purge_cache); 5020 5021 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) && 5022 cifs_has_mand_locks(cinode)) { 5023 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n", 5024 inode); 5025 cinode->oplock = 0; 5026 } 5027 5028 if (inode && S_ISREG(inode->i_mode)) { 5029 if (CIFS_CACHE_READ(cinode)) 5030 break_lease(inode, O_RDONLY); 5031 else 5032 break_lease(inode, O_WRONLY); 5033 rc = filemap_fdatawrite(inode->i_mapping); 5034 if (!CIFS_CACHE_READ(cinode) || purge_cache) { 5035 rc = filemap_fdatawait(inode->i_mapping); 5036 mapping_set_error(inode->i_mapping, rc); 5037 cifs_zap_mapping(inode); 5038 } 5039 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc); 5040 if (CIFS_CACHE_WRITE(cinode)) 5041 goto oplock_break_ack; 5042 } 5043 5044 rc = cifs_push_locks(cfile); 5045 if (rc) 5046 cifs_dbg(VFS, "Push locks rc = %d\n", rc); 5047 5048 oplock_break_ack: 5049 /* 5050 * When oplock break is received and there are no active 5051 * file handles but cached, then schedule deferred close immediately. 5052 * So, new open will not use cached handle. 5053 */ 5054 5055 if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes)) 5056 cifs_close_deferred_file(cinode); 5057 5058 persistent_fid = cfile->fid.persistent_fid; 5059 volatile_fid = cfile->fid.volatile_fid; 5060 net_fid = cfile->fid.netfid; 5061 oplock_break_cancelled = cfile->oplock_break_cancelled; 5062 5063 _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false); 5064 /* 5065 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require 5066 * an acknowledgment to be sent when the file has already been closed. 5067 */ 5068 spin_lock(&cinode->open_file_lock); 5069 /* check list empty since can race with kill_sb calling tree disconnect */ 5070 if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) { 5071 spin_unlock(&cinode->open_file_lock); 5072 rc = server->ops->oplock_response(tcon, persistent_fid, 5073 volatile_fid, net_fid, cinode); 5074 cifs_dbg(FYI, "Oplock release rc = %d\n", rc); 5075 } else 5076 spin_unlock(&cinode->open_file_lock); 5077 5078 cifs_put_tlink(tlink); 5079 out: 5080 cifs_done_oplock_break(cinode); 5081 } 5082 5083 /* 5084 * The presence of cifs_direct_io() in the address space ops vector 5085 * allowes open() O_DIRECT flags which would have failed otherwise. 5086 * 5087 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests 5088 * so this method should never be called. 5089 * 5090 * Direct IO is not yet supported in the cached mode. 5091 */ 5092 static ssize_t 5093 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter) 5094 { 5095 /* 5096 * FIXME 5097 * Eventually need to support direct IO for non forcedirectio mounts 5098 */ 5099 return -EINVAL; 5100 } 5101 5102 static int cifs_swap_activate(struct swap_info_struct *sis, 5103 struct file *swap_file, sector_t *span) 5104 { 5105 struct cifsFileInfo *cfile = swap_file->private_data; 5106 struct inode *inode = swap_file->f_mapping->host; 5107 unsigned long blocks; 5108 long long isize; 5109 5110 cifs_dbg(FYI, "swap activate\n"); 5111 5112 if (!swap_file->f_mapping->a_ops->swap_rw) 5113 /* Cannot support swap */ 5114 return -EINVAL; 5115 5116 spin_lock(&inode->i_lock); 5117 blocks = inode->i_blocks; 5118 isize = inode->i_size; 5119 spin_unlock(&inode->i_lock); 5120 if (blocks*512 < isize) { 5121 pr_warn("swap activate: swapfile has holes\n"); 5122 return -EINVAL; 5123 } 5124 *span = sis->pages; 5125 5126 pr_warn_once("Swap support over SMB3 is experimental\n"); 5127 5128 /* 5129 * TODO: consider adding ACL (or documenting how) to prevent other 5130 * users (on this or other systems) from reading it 5131 */ 5132 5133 5134 /* TODO: add sk_set_memalloc(inet) or similar */ 5135 5136 if (cfile) 5137 cfile->swapfile = true; 5138 /* 5139 * TODO: Since file already open, we can't open with DENY_ALL here 5140 * but we could add call to grab a byte range lock to prevent others 5141 * from reading or writing the file 5142 */ 5143 5144 sis->flags |= SWP_FS_OPS; 5145 return add_swap_extent(sis, 0, sis->max, 0); 5146 } 5147 5148 static void cifs_swap_deactivate(struct file *file) 5149 { 5150 struct cifsFileInfo *cfile = file->private_data; 5151 5152 cifs_dbg(FYI, "swap deactivate\n"); 5153 5154 /* TODO: undo sk_set_memalloc(inet) will eventually be needed */ 5155 5156 if (cfile) 5157 cfile->swapfile = false; 5158 5159 /* do we need to unpin (or unlock) the file */ 5160 } 5161 5162 /* 5163 * Mark a page as having been made dirty and thus needing writeback. We also 5164 * need to pin the cache object to write back to. 5165 */ 5166 #ifdef CONFIG_CIFS_FSCACHE 5167 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio) 5168 { 5169 return fscache_dirty_folio(mapping, folio, 5170 cifs_inode_cookie(mapping->host)); 5171 } 5172 #else 5173 #define cifs_dirty_folio filemap_dirty_folio 5174 #endif 5175 5176 const struct address_space_operations cifs_addr_ops = { 5177 .read_folio = cifs_read_folio, 5178 .readahead = cifs_readahead, 5179 .writepages = cifs_writepages, 5180 .write_begin = cifs_write_begin, 5181 .write_end = cifs_write_end, 5182 .dirty_folio = cifs_dirty_folio, 5183 .release_folio = cifs_release_folio, 5184 .direct_IO = cifs_direct_io, 5185 .invalidate_folio = cifs_invalidate_folio, 5186 .launder_folio = cifs_launder_folio, 5187 .migrate_folio = filemap_migrate_folio, 5188 /* 5189 * TODO: investigate and if useful we could add an is_dirty_writeback 5190 * helper if needed 5191 */ 5192 .swap_activate = cifs_swap_activate, 5193 .swap_deactivate = cifs_swap_deactivate, 5194 }; 5195 5196 /* 5197 * cifs_readahead requires the server to support a buffer large enough to 5198 * contain the header plus one complete page of data. Otherwise, we need 5199 * to leave cifs_readahead out of the address space operations. 5200 */ 5201 const struct address_space_operations cifs_addr_ops_smallbuf = { 5202 .read_folio = cifs_read_folio, 5203 .writepages = cifs_writepages, 5204 .write_begin = cifs_write_begin, 5205 .write_end = cifs_write_end, 5206 .dirty_folio = cifs_dirty_folio, 5207 .release_folio = cifs_release_folio, 5208 .invalidate_folio = cifs_invalidate_folio, 5209 .launder_folio = cifs_launder_folio, 5210 .migrate_folio = filemap_migrate_folio, 5211 }; 5212