xref: /openbmc/linux/fs/smb/client/file.c (revision b6e27f7f)
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/filelock.h>
13 #include <linux/backing-dev.h>
14 #include <linux/stat.h>
15 #include <linux/fcntl.h>
16 #include <linux/pagemap.h>
17 #include <linux/pagevec.h>
18 #include <linux/writeback.h>
19 #include <linux/task_io_accounting_ops.h>
20 #include <linux/delay.h>
21 #include <linux/mount.h>
22 #include <linux/slab.h>
23 #include <linux/swap.h>
24 #include <linux/mm.h>
25 #include <asm/div64.h>
26 #include "cifsfs.h"
27 #include "cifspdu.h"
28 #include "cifsglob.h"
29 #include "cifsproto.h"
30 #include "smb2proto.h"
31 #include "cifs_unicode.h"
32 #include "cifs_debug.h"
33 #include "cifs_fs_sb.h"
34 #include "fscache.h"
35 #include "smbdirect.h"
36 #include "fs_context.h"
37 #include "cifs_ioctl.h"
38 #include "cached_dir.h"
39 
40 /*
41  * Remove the dirty flags from a span of pages.
42  */
43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
44 {
45 	struct address_space *mapping = inode->i_mapping;
46 	struct folio *folio;
47 	pgoff_t end;
48 
49 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
50 
51 	rcu_read_lock();
52 
53 	end = (start + len - 1) / PAGE_SIZE;
54 	xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
55 		if (xas_retry(&xas, folio))
56 			continue;
57 		xas_pause(&xas);
58 		rcu_read_unlock();
59 		folio_lock(folio);
60 		folio_clear_dirty_for_io(folio);
61 		folio_unlock(folio);
62 		rcu_read_lock();
63 	}
64 
65 	rcu_read_unlock();
66 }
67 
68 /*
69  * Completion of write to server.
70  */
71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
72 {
73 	struct address_space *mapping = inode->i_mapping;
74 	struct folio *folio;
75 	pgoff_t end;
76 
77 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
78 
79 	if (!len)
80 		return;
81 
82 	rcu_read_lock();
83 
84 	end = (start + len - 1) / PAGE_SIZE;
85 	xas_for_each(&xas, folio, end) {
86 		if (xas_retry(&xas, folio))
87 			continue;
88 		if (!folio_test_writeback(folio)) {
89 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
90 				  len, start, folio->index, end);
91 			continue;
92 		}
93 
94 		folio_detach_private(folio);
95 		folio_end_writeback(folio);
96 	}
97 
98 	rcu_read_unlock();
99 }
100 
101 /*
102  * Failure of write to server.
103  */
104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len)
105 {
106 	struct address_space *mapping = inode->i_mapping;
107 	struct folio *folio;
108 	pgoff_t end;
109 
110 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
111 
112 	if (!len)
113 		return;
114 
115 	rcu_read_lock();
116 
117 	end = (start + len - 1) / PAGE_SIZE;
118 	xas_for_each(&xas, folio, end) {
119 		if (xas_retry(&xas, folio))
120 			continue;
121 		if (!folio_test_writeback(folio)) {
122 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
123 				  len, start, folio->index, end);
124 			continue;
125 		}
126 
127 		folio_set_error(folio);
128 		folio_end_writeback(folio);
129 	}
130 
131 	rcu_read_unlock();
132 }
133 
134 /*
135  * Redirty pages after a temporary failure.
136  */
137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len)
138 {
139 	struct address_space *mapping = inode->i_mapping;
140 	struct folio *folio;
141 	pgoff_t end;
142 
143 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
144 
145 	if (!len)
146 		return;
147 
148 	rcu_read_lock();
149 
150 	end = (start + len - 1) / PAGE_SIZE;
151 	xas_for_each(&xas, folio, end) {
152 		if (!folio_test_writeback(folio)) {
153 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
154 				  len, start, folio->index, end);
155 			continue;
156 		}
157 
158 		filemap_dirty_folio(folio->mapping, folio);
159 		folio_end_writeback(folio);
160 	}
161 
162 	rcu_read_unlock();
163 }
164 
165 /*
166  * Mark as invalid, all open files on tree connections since they
167  * were closed when session to server was lost.
168  */
169 void
170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
171 {
172 	struct cifsFileInfo *open_file = NULL;
173 	struct list_head *tmp;
174 	struct list_head *tmp1;
175 
176 	/* only send once per connect */
177 	spin_lock(&tcon->tc_lock);
178 	if (tcon->need_reconnect)
179 		tcon->status = TID_NEED_RECON;
180 
181 	if (tcon->status != TID_NEED_RECON) {
182 		spin_unlock(&tcon->tc_lock);
183 		return;
184 	}
185 	tcon->status = TID_IN_FILES_INVALIDATE;
186 	spin_unlock(&tcon->tc_lock);
187 
188 	/* list all files open on tree connection and mark them invalid */
189 	spin_lock(&tcon->open_file_lock);
190 	list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
191 		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
192 		open_file->invalidHandle = true;
193 		open_file->oplock_break_cancelled = true;
194 	}
195 	spin_unlock(&tcon->open_file_lock);
196 
197 	invalidate_all_cached_dirs(tcon);
198 	spin_lock(&tcon->tc_lock);
199 	if (tcon->status == TID_IN_FILES_INVALIDATE)
200 		tcon->status = TID_NEED_TCON;
201 	spin_unlock(&tcon->tc_lock);
202 
203 	/*
204 	 * BB Add call to invalidate_inodes(sb) for all superblocks mounted
205 	 * to this tcon.
206 	 */
207 }
208 
209 static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache)
210 {
211 	if ((flags & O_ACCMODE) == O_RDONLY)
212 		return GENERIC_READ;
213 	else if ((flags & O_ACCMODE) == O_WRONLY)
214 		return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE;
215 	else if ((flags & O_ACCMODE) == O_RDWR) {
216 		/* GENERIC_ALL is too much permission to request
217 		   can cause unnecessary access denied on create */
218 		/* return GENERIC_ALL; */
219 		return (GENERIC_READ | GENERIC_WRITE);
220 	}
221 
222 	return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
223 		FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
224 		FILE_READ_DATA);
225 }
226 
227 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
228 static u32 cifs_posix_convert_flags(unsigned int flags)
229 {
230 	u32 posix_flags = 0;
231 
232 	if ((flags & O_ACCMODE) == O_RDONLY)
233 		posix_flags = SMB_O_RDONLY;
234 	else if ((flags & O_ACCMODE) == O_WRONLY)
235 		posix_flags = SMB_O_WRONLY;
236 	else if ((flags & O_ACCMODE) == O_RDWR)
237 		posix_flags = SMB_O_RDWR;
238 
239 	if (flags & O_CREAT) {
240 		posix_flags |= SMB_O_CREAT;
241 		if (flags & O_EXCL)
242 			posix_flags |= SMB_O_EXCL;
243 	} else if (flags & O_EXCL)
244 		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
245 			 current->comm, current->tgid);
246 
247 	if (flags & O_TRUNC)
248 		posix_flags |= SMB_O_TRUNC;
249 	/* be safe and imply O_SYNC for O_DSYNC */
250 	if (flags & O_DSYNC)
251 		posix_flags |= SMB_O_SYNC;
252 	if (flags & O_DIRECTORY)
253 		posix_flags |= SMB_O_DIRECTORY;
254 	if (flags & O_NOFOLLOW)
255 		posix_flags |= SMB_O_NOFOLLOW;
256 	if (flags & O_DIRECT)
257 		posix_flags |= SMB_O_DIRECT;
258 
259 	return posix_flags;
260 }
261 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
262 
263 static inline int cifs_get_disposition(unsigned int flags)
264 {
265 	if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
266 		return FILE_CREATE;
267 	else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
268 		return FILE_OVERWRITE_IF;
269 	else if ((flags & O_CREAT) == O_CREAT)
270 		return FILE_OPEN_IF;
271 	else if ((flags & O_TRUNC) == O_TRUNC)
272 		return FILE_OVERWRITE;
273 	else
274 		return FILE_OPEN;
275 }
276 
277 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
278 int cifs_posix_open(const char *full_path, struct inode **pinode,
279 			struct super_block *sb, int mode, unsigned int f_flags,
280 			__u32 *poplock, __u16 *pnetfid, unsigned int xid)
281 {
282 	int rc;
283 	FILE_UNIX_BASIC_INFO *presp_data;
284 	__u32 posix_flags = 0;
285 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
286 	struct cifs_fattr fattr;
287 	struct tcon_link *tlink;
288 	struct cifs_tcon *tcon;
289 
290 	cifs_dbg(FYI, "posix open %s\n", full_path);
291 
292 	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
293 	if (presp_data == NULL)
294 		return -ENOMEM;
295 
296 	tlink = cifs_sb_tlink(cifs_sb);
297 	if (IS_ERR(tlink)) {
298 		rc = PTR_ERR(tlink);
299 		goto posix_open_ret;
300 	}
301 
302 	tcon = tlink_tcon(tlink);
303 	mode &= ~current_umask();
304 
305 	posix_flags = cifs_posix_convert_flags(f_flags);
306 	rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
307 			     poplock, full_path, cifs_sb->local_nls,
308 			     cifs_remap(cifs_sb));
309 	cifs_put_tlink(tlink);
310 
311 	if (rc)
312 		goto posix_open_ret;
313 
314 	if (presp_data->Type == cpu_to_le32(-1))
315 		goto posix_open_ret; /* open ok, caller does qpathinfo */
316 
317 	if (!pinode)
318 		goto posix_open_ret; /* caller does not need info */
319 
320 	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
321 
322 	/* get new inode and set it up */
323 	if (*pinode == NULL) {
324 		cifs_fill_uniqueid(sb, &fattr);
325 		*pinode = cifs_iget(sb, &fattr);
326 		if (!*pinode) {
327 			rc = -ENOMEM;
328 			goto posix_open_ret;
329 		}
330 	} else {
331 		cifs_revalidate_mapping(*pinode);
332 		rc = cifs_fattr_to_inode(*pinode, &fattr, false);
333 	}
334 
335 posix_open_ret:
336 	kfree(presp_data);
337 	return rc;
338 }
339 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
340 
341 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
342 			struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
343 			struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
344 {
345 	int rc;
346 	int desired_access;
347 	int disposition;
348 	int create_options = CREATE_NOT_DIR;
349 	struct TCP_Server_Info *server = tcon->ses->server;
350 	struct cifs_open_parms oparms;
351 	int rdwr_for_fscache = 0;
352 
353 	if (!server->ops->open)
354 		return -ENOSYS;
355 
356 	/* If we're caching, we need to be able to fill in around partial writes. */
357 	if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY)
358 		rdwr_for_fscache = 1;
359 
360 	desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache);
361 
362 /*********************************************************************
363  *  open flag mapping table:
364  *
365  *	POSIX Flag            CIFS Disposition
366  *	----------            ----------------
367  *	O_CREAT               FILE_OPEN_IF
368  *	O_CREAT | O_EXCL      FILE_CREATE
369  *	O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
370  *	O_TRUNC               FILE_OVERWRITE
371  *	none of the above     FILE_OPEN
372  *
373  *	Note that there is not a direct match between disposition
374  *	FILE_SUPERSEDE (ie create whether or not file exists although
375  *	O_CREAT | O_TRUNC is similar but truncates the existing
376  *	file rather than creating a new file as FILE_SUPERSEDE does
377  *	(which uses the attributes / metadata passed in on open call)
378  *?
379  *?  O_SYNC is a reasonable match to CIFS writethrough flag
380  *?  and the read write flags match reasonably.  O_LARGEFILE
381  *?  is irrelevant because largefile support is always used
382  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
383  *	 O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
384  *********************************************************************/
385 
386 	disposition = cifs_get_disposition(f_flags);
387 
388 	/* BB pass O_SYNC flag through on file attributes .. BB */
389 
390 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
391 	if (f_flags & O_SYNC)
392 		create_options |= CREATE_WRITE_THROUGH;
393 
394 	if (f_flags & O_DIRECT)
395 		create_options |= CREATE_NO_BUFFER;
396 
397 retry_open:
398 	oparms = (struct cifs_open_parms) {
399 		.tcon = tcon,
400 		.cifs_sb = cifs_sb,
401 		.desired_access = desired_access,
402 		.create_options = cifs_create_options(cifs_sb, create_options),
403 		.disposition = disposition,
404 		.path = full_path,
405 		.fid = fid,
406 	};
407 
408 	rc = server->ops->open(xid, &oparms, oplock, buf);
409 	if (rc) {
410 		if (rc == -EACCES && rdwr_for_fscache == 1) {
411 			desired_access = cifs_convert_flags(f_flags, 0);
412 			rdwr_for_fscache = 2;
413 			goto retry_open;
414 		}
415 		return rc;
416 	}
417 	if (rdwr_for_fscache == 2)
418 		cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
419 
420 	/* TODO: Add support for calling posix query info but with passing in fid */
421 	if (tcon->unix_ext)
422 		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
423 					      xid);
424 	else
425 		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
426 					 xid, fid);
427 
428 	if (rc) {
429 		server->ops->close(xid, tcon, fid);
430 		if (rc == -ESTALE)
431 			rc = -EOPENSTALE;
432 	}
433 
434 	return rc;
435 }
436 
437 static bool
438 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
439 {
440 	struct cifs_fid_locks *cur;
441 	bool has_locks = false;
442 
443 	down_read(&cinode->lock_sem);
444 	list_for_each_entry(cur, &cinode->llist, llist) {
445 		if (!list_empty(&cur->locks)) {
446 			has_locks = true;
447 			break;
448 		}
449 	}
450 	up_read(&cinode->lock_sem);
451 	return has_locks;
452 }
453 
454 void
455 cifs_down_write(struct rw_semaphore *sem)
456 {
457 	while (!down_write_trylock(sem))
458 		msleep(10);
459 }
460 
461 static void cifsFileInfo_put_work(struct work_struct *work);
462 void serverclose_work(struct work_struct *work);
463 
464 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
465 				       struct tcon_link *tlink, __u32 oplock,
466 				       const char *symlink_target)
467 {
468 	struct dentry *dentry = file_dentry(file);
469 	struct inode *inode = d_inode(dentry);
470 	struct cifsInodeInfo *cinode = CIFS_I(inode);
471 	struct cifsFileInfo *cfile;
472 	struct cifs_fid_locks *fdlocks;
473 	struct cifs_tcon *tcon = tlink_tcon(tlink);
474 	struct TCP_Server_Info *server = tcon->ses->server;
475 
476 	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
477 	if (cfile == NULL)
478 		return cfile;
479 
480 	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
481 	if (!fdlocks) {
482 		kfree(cfile);
483 		return NULL;
484 	}
485 
486 	if (symlink_target) {
487 		cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
488 		if (!cfile->symlink_target) {
489 			kfree(fdlocks);
490 			kfree(cfile);
491 			return NULL;
492 		}
493 	}
494 
495 	INIT_LIST_HEAD(&fdlocks->locks);
496 	fdlocks->cfile = cfile;
497 	cfile->llist = fdlocks;
498 
499 	cfile->count = 1;
500 	cfile->pid = current->tgid;
501 	cfile->uid = current_fsuid();
502 	cfile->dentry = dget(dentry);
503 	cfile->f_flags = file->f_flags;
504 	cfile->status_file_deleted = false;
505 	cfile->invalidHandle = false;
506 	cfile->deferred_close_scheduled = false;
507 	cfile->tlink = cifs_get_tlink(tlink);
508 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
509 	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
510 	INIT_WORK(&cfile->serverclose, serverclose_work);
511 	INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
512 	mutex_init(&cfile->fh_mutex);
513 	spin_lock_init(&cfile->file_info_lock);
514 
515 	cifs_sb_active(inode->i_sb);
516 
517 	/*
518 	 * If the server returned a read oplock and we have mandatory brlocks,
519 	 * set oplock level to None.
520 	 */
521 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
522 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
523 		oplock = 0;
524 	}
525 
526 	cifs_down_write(&cinode->lock_sem);
527 	list_add(&fdlocks->llist, &cinode->llist);
528 	up_write(&cinode->lock_sem);
529 
530 	spin_lock(&tcon->open_file_lock);
531 	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
532 		oplock = fid->pending_open->oplock;
533 	list_del(&fid->pending_open->olist);
534 
535 	fid->purge_cache = false;
536 	server->ops->set_fid(cfile, fid, oplock);
537 
538 	list_add(&cfile->tlist, &tcon->openFileList);
539 	atomic_inc(&tcon->num_local_opens);
540 
541 	/* if readable file instance put first in list*/
542 	spin_lock(&cinode->open_file_lock);
543 	if (file->f_mode & FMODE_READ)
544 		list_add(&cfile->flist, &cinode->openFileList);
545 	else
546 		list_add_tail(&cfile->flist, &cinode->openFileList);
547 	spin_unlock(&cinode->open_file_lock);
548 	spin_unlock(&tcon->open_file_lock);
549 
550 	if (fid->purge_cache)
551 		cifs_zap_mapping(inode);
552 
553 	file->private_data = cfile;
554 	return cfile;
555 }
556 
557 struct cifsFileInfo *
558 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
559 {
560 	spin_lock(&cifs_file->file_info_lock);
561 	cifsFileInfo_get_locked(cifs_file);
562 	spin_unlock(&cifs_file->file_info_lock);
563 	return cifs_file;
564 }
565 
566 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
567 {
568 	struct inode *inode = d_inode(cifs_file->dentry);
569 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
570 	struct cifsLockInfo *li, *tmp;
571 	struct super_block *sb = inode->i_sb;
572 
573 	/*
574 	 * Delete any outstanding lock records. We'll lose them when the file
575 	 * is closed anyway.
576 	 */
577 	cifs_down_write(&cifsi->lock_sem);
578 	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
579 		list_del(&li->llist);
580 		cifs_del_lock_waiters(li);
581 		kfree(li);
582 	}
583 	list_del(&cifs_file->llist->llist);
584 	kfree(cifs_file->llist);
585 	up_write(&cifsi->lock_sem);
586 
587 	cifs_put_tlink(cifs_file->tlink);
588 	dput(cifs_file->dentry);
589 	cifs_sb_deactive(sb);
590 	kfree(cifs_file->symlink_target);
591 	kfree(cifs_file);
592 }
593 
594 static void cifsFileInfo_put_work(struct work_struct *work)
595 {
596 	struct cifsFileInfo *cifs_file = container_of(work,
597 			struct cifsFileInfo, put);
598 
599 	cifsFileInfo_put_final(cifs_file);
600 }
601 
602 void serverclose_work(struct work_struct *work)
603 {
604 	struct cifsFileInfo *cifs_file = container_of(work,
605 			struct cifsFileInfo, serverclose);
606 
607 	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
608 
609 	struct TCP_Server_Info *server = tcon->ses->server;
610 	int rc = 0;
611 	int retries = 0;
612 	int MAX_RETRIES = 4;
613 
614 	do {
615 		if (server->ops->close_getattr)
616 			rc = server->ops->close_getattr(0, tcon, cifs_file);
617 		else if (server->ops->close)
618 			rc = server->ops->close(0, tcon, &cifs_file->fid);
619 
620 		if (rc == -EBUSY || rc == -EAGAIN) {
621 			retries++;
622 			msleep(250);
623 		}
624 	} while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES)
625 	);
626 
627 	if (retries == MAX_RETRIES)
628 		pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES);
629 
630 	if (cifs_file->offload)
631 		queue_work(fileinfo_put_wq, &cifs_file->put);
632 	else
633 		cifsFileInfo_put_final(cifs_file);
634 }
635 
636 /**
637  * cifsFileInfo_put - release a reference of file priv data
638  *
639  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
640  *
641  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
642  */
643 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
644 {
645 	_cifsFileInfo_put(cifs_file, true, true);
646 }
647 
648 /**
649  * _cifsFileInfo_put - release a reference of file priv data
650  *
651  * This may involve closing the filehandle @cifs_file out on the
652  * server. Must be called without holding tcon->open_file_lock,
653  * cinode->open_file_lock and cifs_file->file_info_lock.
654  *
655  * If @wait_for_oplock_handler is true and we are releasing the last
656  * reference, wait for any running oplock break handler of the file
657  * and cancel any pending one.
658  *
659  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
660  * @wait_oplock_handler: must be false if called from oplock_break_handler
661  * @offload:	not offloaded on close and oplock breaks
662  *
663  */
664 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
665 		       bool wait_oplock_handler, bool offload)
666 {
667 	struct inode *inode = d_inode(cifs_file->dentry);
668 	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
669 	struct TCP_Server_Info *server = tcon->ses->server;
670 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
671 	struct super_block *sb = inode->i_sb;
672 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
673 	struct cifs_fid fid = {};
674 	struct cifs_pending_open open;
675 	bool oplock_break_cancelled;
676 	bool serverclose_offloaded = false;
677 
678 	spin_lock(&tcon->open_file_lock);
679 	spin_lock(&cifsi->open_file_lock);
680 	spin_lock(&cifs_file->file_info_lock);
681 
682 	cifs_file->offload = offload;
683 	if (--cifs_file->count > 0) {
684 		spin_unlock(&cifs_file->file_info_lock);
685 		spin_unlock(&cifsi->open_file_lock);
686 		spin_unlock(&tcon->open_file_lock);
687 		return;
688 	}
689 	spin_unlock(&cifs_file->file_info_lock);
690 
691 	if (server->ops->get_lease_key)
692 		server->ops->get_lease_key(inode, &fid);
693 
694 	/* store open in pending opens to make sure we don't miss lease break */
695 	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
696 
697 	/* remove it from the lists */
698 	list_del(&cifs_file->flist);
699 	list_del(&cifs_file->tlist);
700 	atomic_dec(&tcon->num_local_opens);
701 
702 	if (list_empty(&cifsi->openFileList)) {
703 		cifs_dbg(FYI, "closing last open instance for inode %p\n",
704 			 d_inode(cifs_file->dentry));
705 		/*
706 		 * In strict cache mode we need invalidate mapping on the last
707 		 * close  because it may cause a error when we open this file
708 		 * again and get at least level II oplock.
709 		 */
710 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
711 			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
712 		cifs_set_oplock_level(cifsi, 0);
713 	}
714 
715 	spin_unlock(&cifsi->open_file_lock);
716 	spin_unlock(&tcon->open_file_lock);
717 
718 	oplock_break_cancelled = wait_oplock_handler ?
719 		cancel_work_sync(&cifs_file->oplock_break) : false;
720 
721 	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
722 		struct TCP_Server_Info *server = tcon->ses->server;
723 		unsigned int xid;
724 		int rc = 0;
725 
726 		xid = get_xid();
727 		if (server->ops->close_getattr)
728 			rc = server->ops->close_getattr(xid, tcon, cifs_file);
729 		else if (server->ops->close)
730 			rc = server->ops->close(xid, tcon, &cifs_file->fid);
731 		_free_xid(xid);
732 
733 		if (rc == -EBUSY || rc == -EAGAIN) {
734 			// Server close failed, hence offloading it as an async op
735 			queue_work(serverclose_wq, &cifs_file->serverclose);
736 			serverclose_offloaded = true;
737 		}
738 	}
739 
740 	if (oplock_break_cancelled)
741 		cifs_done_oplock_break(cifsi);
742 
743 	cifs_del_pending_open(&open);
744 
745 	// if serverclose has been offloaded to wq (on failure), it will
746 	// handle offloading put as well. If serverclose not offloaded,
747 	// we need to handle offloading put here.
748 	if (!serverclose_offloaded) {
749 		if (offload)
750 			queue_work(fileinfo_put_wq, &cifs_file->put);
751 		else
752 			cifsFileInfo_put_final(cifs_file);
753 	}
754 }
755 
756 int cifs_open(struct inode *inode, struct file *file)
757 
758 {
759 	int rc = -EACCES;
760 	unsigned int xid;
761 	__u32 oplock;
762 	struct cifs_sb_info *cifs_sb;
763 	struct TCP_Server_Info *server;
764 	struct cifs_tcon *tcon;
765 	struct tcon_link *tlink;
766 	struct cifsFileInfo *cfile = NULL;
767 	void *page;
768 	const char *full_path;
769 	bool posix_open_ok = false;
770 	struct cifs_fid fid = {};
771 	struct cifs_pending_open open;
772 	struct cifs_open_info_data data = {};
773 
774 	xid = get_xid();
775 
776 	cifs_sb = CIFS_SB(inode->i_sb);
777 	if (unlikely(cifs_forced_shutdown(cifs_sb))) {
778 		free_xid(xid);
779 		return -EIO;
780 	}
781 
782 	tlink = cifs_sb_tlink(cifs_sb);
783 	if (IS_ERR(tlink)) {
784 		free_xid(xid);
785 		return PTR_ERR(tlink);
786 	}
787 	tcon = tlink_tcon(tlink);
788 	server = tcon->ses->server;
789 
790 	page = alloc_dentry_path();
791 	full_path = build_path_from_dentry(file_dentry(file), page);
792 	if (IS_ERR(full_path)) {
793 		rc = PTR_ERR(full_path);
794 		goto out;
795 	}
796 
797 	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
798 		 inode, file->f_flags, full_path);
799 
800 	if (file->f_flags & O_DIRECT &&
801 	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
802 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
803 			file->f_op = &cifs_file_direct_nobrl_ops;
804 		else
805 			file->f_op = &cifs_file_direct_ops;
806 	}
807 
808 	/* Get the cached handle as SMB2 close is deferred */
809 	rc = cifs_get_readable_path(tcon, full_path, &cfile);
810 	if (rc == 0) {
811 		if (file->f_flags == cfile->f_flags) {
812 			file->private_data = cfile;
813 			spin_lock(&CIFS_I(inode)->deferred_lock);
814 			cifs_del_deferred_close(cfile);
815 			spin_unlock(&CIFS_I(inode)->deferred_lock);
816 			goto use_cache;
817 		} else {
818 			_cifsFileInfo_put(cfile, true, false);
819 		}
820 	}
821 
822 	if (server->oplocks)
823 		oplock = REQ_OPLOCK;
824 	else
825 		oplock = 0;
826 
827 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
828 	if (!tcon->broken_posix_open && tcon->unix_ext &&
829 	    cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
830 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
831 		/* can not refresh inode info since size could be stale */
832 		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
833 				cifs_sb->ctx->file_mode /* ignored */,
834 				file->f_flags, &oplock, &fid.netfid, xid);
835 		if (rc == 0) {
836 			cifs_dbg(FYI, "posix open succeeded\n");
837 			posix_open_ok = true;
838 		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
839 			if (tcon->ses->serverNOS)
840 				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
841 					 tcon->ses->ip_addr,
842 					 tcon->ses->serverNOS);
843 			tcon->broken_posix_open = true;
844 		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
845 			 (rc != -EOPNOTSUPP)) /* path not found or net err */
846 			goto out;
847 		/*
848 		 * Else fallthrough to retry open the old way on network i/o
849 		 * or DFS errors.
850 		 */
851 	}
852 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
853 
854 	if (server->ops->get_lease_key)
855 		server->ops->get_lease_key(inode, &fid);
856 
857 	cifs_add_pending_open(&fid, tlink, &open);
858 
859 	if (!posix_open_ok) {
860 		if (server->ops->get_lease_key)
861 			server->ops->get_lease_key(inode, &fid);
862 
863 		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
864 				  xid, &data);
865 		if (rc) {
866 			cifs_del_pending_open(&open);
867 			goto out;
868 		}
869 	}
870 
871 	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
872 	if (cfile == NULL) {
873 		if (server->ops->close)
874 			server->ops->close(xid, tcon, &fid);
875 		cifs_del_pending_open(&open);
876 		rc = -ENOMEM;
877 		goto out;
878 	}
879 
880 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
881 	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
882 		/*
883 		 * Time to set mode which we can not set earlier due to
884 		 * problems creating new read-only files.
885 		 */
886 		struct cifs_unix_set_info_args args = {
887 			.mode	= inode->i_mode,
888 			.uid	= INVALID_UID, /* no change */
889 			.gid	= INVALID_GID, /* no change */
890 			.ctime	= NO_CHANGE_64,
891 			.atime	= NO_CHANGE_64,
892 			.mtime	= NO_CHANGE_64,
893 			.device	= 0,
894 		};
895 		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
896 				       cfile->pid);
897 	}
898 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
899 
900 use_cache:
901 	fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
902 			   file->f_mode & FMODE_WRITE);
903 	if (!(file->f_flags & O_DIRECT))
904 		goto out;
905 	if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY)
906 		goto out;
907 	cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE);
908 
909 out:
910 	free_dentry_path(page);
911 	free_xid(xid);
912 	cifs_put_tlink(tlink);
913 	cifs_free_open_info(&data);
914 	return rc;
915 }
916 
917 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
918 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
919 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
920 
921 /*
922  * Try to reacquire byte range locks that were released when session
923  * to server was lost.
924  */
925 static int
926 cifs_relock_file(struct cifsFileInfo *cfile)
927 {
928 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
929 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
930 	int rc = 0;
931 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
932 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
933 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
934 
935 	down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
936 	if (cinode->can_cache_brlcks) {
937 		/* can cache locks - no need to relock */
938 		up_read(&cinode->lock_sem);
939 		return rc;
940 	}
941 
942 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
943 	if (cap_unix(tcon->ses) &&
944 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
945 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
946 		rc = cifs_push_posix_locks(cfile);
947 	else
948 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
949 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
950 
951 	up_read(&cinode->lock_sem);
952 	return rc;
953 }
954 
955 static int
956 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
957 {
958 	int rc = -EACCES;
959 	unsigned int xid;
960 	__u32 oplock;
961 	struct cifs_sb_info *cifs_sb;
962 	struct cifs_tcon *tcon;
963 	struct TCP_Server_Info *server;
964 	struct cifsInodeInfo *cinode;
965 	struct inode *inode;
966 	void *page;
967 	const char *full_path;
968 	int desired_access;
969 	int disposition = FILE_OPEN;
970 	int create_options = CREATE_NOT_DIR;
971 	struct cifs_open_parms oparms;
972 	int rdwr_for_fscache = 0;
973 
974 	xid = get_xid();
975 	mutex_lock(&cfile->fh_mutex);
976 	if (!cfile->invalidHandle) {
977 		mutex_unlock(&cfile->fh_mutex);
978 		free_xid(xid);
979 		return 0;
980 	}
981 
982 	inode = d_inode(cfile->dentry);
983 	cifs_sb = CIFS_SB(inode->i_sb);
984 	tcon = tlink_tcon(cfile->tlink);
985 	server = tcon->ses->server;
986 
987 	/*
988 	 * Can not grab rename sem here because various ops, including those
989 	 * that already have the rename sem can end up causing writepage to get
990 	 * called and if the server was down that means we end up here, and we
991 	 * can never tell if the caller already has the rename_sem.
992 	 */
993 	page = alloc_dentry_path();
994 	full_path = build_path_from_dentry(cfile->dentry, page);
995 	if (IS_ERR(full_path)) {
996 		mutex_unlock(&cfile->fh_mutex);
997 		free_dentry_path(page);
998 		free_xid(xid);
999 		return PTR_ERR(full_path);
1000 	}
1001 
1002 	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
1003 		 inode, cfile->f_flags, full_path);
1004 
1005 	if (tcon->ses->server->oplocks)
1006 		oplock = REQ_OPLOCK;
1007 	else
1008 		oplock = 0;
1009 
1010 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1011 	if (tcon->unix_ext && cap_unix(tcon->ses) &&
1012 	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
1013 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
1014 		/*
1015 		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
1016 		 * original open. Must mask them off for a reopen.
1017 		 */
1018 		unsigned int oflags = cfile->f_flags &
1019 						~(O_CREAT | O_EXCL | O_TRUNC);
1020 
1021 		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
1022 				     cifs_sb->ctx->file_mode /* ignored */,
1023 				     oflags, &oplock, &cfile->fid.netfid, xid);
1024 		if (rc == 0) {
1025 			cifs_dbg(FYI, "posix reopen succeeded\n");
1026 			oparms.reconnect = true;
1027 			goto reopen_success;
1028 		}
1029 		/*
1030 		 * fallthrough to retry open the old way on errors, especially
1031 		 * in the reconnect path it is important to retry hard
1032 		 */
1033 	}
1034 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1035 
1036 	/* If we're caching, we need to be able to fill in around partial writes. */
1037 	if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY)
1038 		rdwr_for_fscache = 1;
1039 
1040 	desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache);
1041 
1042 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
1043 	if (cfile->f_flags & O_SYNC)
1044 		create_options |= CREATE_WRITE_THROUGH;
1045 
1046 	if (cfile->f_flags & O_DIRECT)
1047 		create_options |= CREATE_NO_BUFFER;
1048 
1049 	if (server->ops->get_lease_key)
1050 		server->ops->get_lease_key(inode, &cfile->fid);
1051 
1052 retry_open:
1053 	oparms = (struct cifs_open_parms) {
1054 		.tcon = tcon,
1055 		.cifs_sb = cifs_sb,
1056 		.desired_access = desired_access,
1057 		.create_options = cifs_create_options(cifs_sb, create_options),
1058 		.disposition = disposition,
1059 		.path = full_path,
1060 		.fid = &cfile->fid,
1061 		.reconnect = true,
1062 	};
1063 
1064 	/*
1065 	 * Can not refresh inode by passing in file_info buf to be returned by
1066 	 * ops->open and then calling get_inode_info with returned buf since
1067 	 * file might have write behind data that needs to be flushed and server
1068 	 * version of file size can be stale. If we knew for sure that inode was
1069 	 * not dirty locally we could do this.
1070 	 */
1071 	rc = server->ops->open(xid, &oparms, &oplock, NULL);
1072 	if (rc == -ENOENT && oparms.reconnect == false) {
1073 		/* durable handle timeout is expired - open the file again */
1074 		rc = server->ops->open(xid, &oparms, &oplock, NULL);
1075 		/* indicate that we need to relock the file */
1076 		oparms.reconnect = true;
1077 	}
1078 	if (rc == -EACCES && rdwr_for_fscache == 1) {
1079 		desired_access = cifs_convert_flags(cfile->f_flags, 0);
1080 		rdwr_for_fscache = 2;
1081 		goto retry_open;
1082 	}
1083 
1084 	if (rc) {
1085 		mutex_unlock(&cfile->fh_mutex);
1086 		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
1087 		cifs_dbg(FYI, "oplock: %d\n", oplock);
1088 		goto reopen_error_exit;
1089 	}
1090 
1091 	if (rdwr_for_fscache == 2)
1092 		cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
1093 
1094 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1095 reopen_success:
1096 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1097 	cfile->invalidHandle = false;
1098 	mutex_unlock(&cfile->fh_mutex);
1099 	cinode = CIFS_I(inode);
1100 
1101 	if (can_flush) {
1102 		rc = filemap_write_and_wait(inode->i_mapping);
1103 		if (!is_interrupt_error(rc))
1104 			mapping_set_error(inode->i_mapping, rc);
1105 
1106 		if (tcon->posix_extensions) {
1107 			rc = smb311_posix_get_inode_info(&inode, full_path,
1108 							 NULL, inode->i_sb, xid);
1109 		} else if (tcon->unix_ext) {
1110 			rc = cifs_get_inode_info_unix(&inode, full_path,
1111 						      inode->i_sb, xid);
1112 		} else {
1113 			rc = cifs_get_inode_info(&inode, full_path, NULL,
1114 						 inode->i_sb, xid, NULL);
1115 		}
1116 	}
1117 	/*
1118 	 * Else we are writing out data to server already and could deadlock if
1119 	 * we tried to flush data, and since we do not know if we have data that
1120 	 * would invalidate the current end of file on the server we can not go
1121 	 * to the server to get the new inode info.
1122 	 */
1123 
1124 	/*
1125 	 * If the server returned a read oplock and we have mandatory brlocks,
1126 	 * set oplock level to None.
1127 	 */
1128 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
1129 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
1130 		oplock = 0;
1131 	}
1132 
1133 	server->ops->set_fid(cfile, &cfile->fid, oplock);
1134 	if (oparms.reconnect)
1135 		cifs_relock_file(cfile);
1136 
1137 reopen_error_exit:
1138 	free_dentry_path(page);
1139 	free_xid(xid);
1140 	return rc;
1141 }
1142 
1143 void smb2_deferred_work_close(struct work_struct *work)
1144 {
1145 	struct cifsFileInfo *cfile = container_of(work,
1146 			struct cifsFileInfo, deferred.work);
1147 
1148 	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1149 	cifs_del_deferred_close(cfile);
1150 	cfile->deferred_close_scheduled = false;
1151 	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1152 	_cifsFileInfo_put(cfile, true, false);
1153 }
1154 
1155 int cifs_close(struct inode *inode, struct file *file)
1156 {
1157 	struct cifsFileInfo *cfile;
1158 	struct cifsInodeInfo *cinode = CIFS_I(inode);
1159 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1160 	struct cifs_deferred_close *dclose;
1161 
1162 	cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1163 
1164 	if (file->private_data != NULL) {
1165 		cfile = file->private_data;
1166 		file->private_data = NULL;
1167 		dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1168 		if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG)
1169 		    && cinode->lease_granted &&
1170 		    !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
1171 		    dclose && !(cfile->status_file_deleted)) {
1172 			if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1173 				inode_set_mtime_to_ts(inode,
1174 						      inode_set_ctime_current(inode));
1175 			}
1176 			spin_lock(&cinode->deferred_lock);
1177 			cifs_add_deferred_close(cfile, dclose);
1178 			if (cfile->deferred_close_scheduled &&
1179 			    delayed_work_pending(&cfile->deferred)) {
1180 				/*
1181 				 * If there is no pending work, mod_delayed_work queues new work.
1182 				 * So, Increase the ref count to avoid use-after-free.
1183 				 */
1184 				if (!mod_delayed_work(deferredclose_wq,
1185 						&cfile->deferred, cifs_sb->ctx->closetimeo))
1186 					cifsFileInfo_get(cfile);
1187 			} else {
1188 				/* Deferred close for files */
1189 				queue_delayed_work(deferredclose_wq,
1190 						&cfile->deferred, cifs_sb->ctx->closetimeo);
1191 				cfile->deferred_close_scheduled = true;
1192 				spin_unlock(&cinode->deferred_lock);
1193 				return 0;
1194 			}
1195 			spin_unlock(&cinode->deferred_lock);
1196 			_cifsFileInfo_put(cfile, true, false);
1197 		} else {
1198 			_cifsFileInfo_put(cfile, true, false);
1199 			kfree(dclose);
1200 		}
1201 	}
1202 
1203 	/* return code from the ->release op is always ignored */
1204 	return 0;
1205 }
1206 
1207 void
1208 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1209 {
1210 	struct cifsFileInfo *open_file, *tmp;
1211 	struct list_head tmp_list;
1212 
1213 	if (!tcon->use_persistent || !tcon->need_reopen_files)
1214 		return;
1215 
1216 	tcon->need_reopen_files = false;
1217 
1218 	cifs_dbg(FYI, "Reopen persistent handles\n");
1219 	INIT_LIST_HEAD(&tmp_list);
1220 
1221 	/* list all files open on tree connection, reopen resilient handles  */
1222 	spin_lock(&tcon->open_file_lock);
1223 	list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1224 		if (!open_file->invalidHandle)
1225 			continue;
1226 		cifsFileInfo_get(open_file);
1227 		list_add_tail(&open_file->rlist, &tmp_list);
1228 	}
1229 	spin_unlock(&tcon->open_file_lock);
1230 
1231 	list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1232 		if (cifs_reopen_file(open_file, false /* do not flush */))
1233 			tcon->need_reopen_files = true;
1234 		list_del_init(&open_file->rlist);
1235 		cifsFileInfo_put(open_file);
1236 	}
1237 }
1238 
1239 int cifs_closedir(struct inode *inode, struct file *file)
1240 {
1241 	int rc = 0;
1242 	unsigned int xid;
1243 	struct cifsFileInfo *cfile = file->private_data;
1244 	struct cifs_tcon *tcon;
1245 	struct TCP_Server_Info *server;
1246 	char *buf;
1247 
1248 	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1249 
1250 	if (cfile == NULL)
1251 		return rc;
1252 
1253 	xid = get_xid();
1254 	tcon = tlink_tcon(cfile->tlink);
1255 	server = tcon->ses->server;
1256 
1257 	cifs_dbg(FYI, "Freeing private data in close dir\n");
1258 	spin_lock(&cfile->file_info_lock);
1259 	if (server->ops->dir_needs_close(cfile)) {
1260 		cfile->invalidHandle = true;
1261 		spin_unlock(&cfile->file_info_lock);
1262 		if (server->ops->close_dir)
1263 			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1264 		else
1265 			rc = -ENOSYS;
1266 		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1267 		/* not much we can do if it fails anyway, ignore rc */
1268 		rc = 0;
1269 	} else
1270 		spin_unlock(&cfile->file_info_lock);
1271 
1272 	buf = cfile->srch_inf.ntwrk_buf_start;
1273 	if (buf) {
1274 		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1275 		cfile->srch_inf.ntwrk_buf_start = NULL;
1276 		if (cfile->srch_inf.smallBuf)
1277 			cifs_small_buf_release(buf);
1278 		else
1279 			cifs_buf_release(buf);
1280 	}
1281 
1282 	cifs_put_tlink(cfile->tlink);
1283 	kfree(file->private_data);
1284 	file->private_data = NULL;
1285 	/* BB can we lock the filestruct while this is going on? */
1286 	free_xid(xid);
1287 	return rc;
1288 }
1289 
1290 static struct cifsLockInfo *
1291 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1292 {
1293 	struct cifsLockInfo *lock =
1294 		kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1295 	if (!lock)
1296 		return lock;
1297 	lock->offset = offset;
1298 	lock->length = length;
1299 	lock->type = type;
1300 	lock->pid = current->tgid;
1301 	lock->flags = flags;
1302 	INIT_LIST_HEAD(&lock->blist);
1303 	init_waitqueue_head(&lock->block_q);
1304 	return lock;
1305 }
1306 
1307 void
1308 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1309 {
1310 	struct cifsLockInfo *li, *tmp;
1311 	list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1312 		list_del_init(&li->blist);
1313 		wake_up(&li->block_q);
1314 	}
1315 }
1316 
1317 #define CIFS_LOCK_OP	0
1318 #define CIFS_READ_OP	1
1319 #define CIFS_WRITE_OP	2
1320 
1321 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1322 static bool
1323 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1324 			    __u64 length, __u8 type, __u16 flags,
1325 			    struct cifsFileInfo *cfile,
1326 			    struct cifsLockInfo **conf_lock, int rw_check)
1327 {
1328 	struct cifsLockInfo *li;
1329 	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1330 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1331 
1332 	list_for_each_entry(li, &fdlocks->locks, llist) {
1333 		if (offset + length <= li->offset ||
1334 		    offset >= li->offset + li->length)
1335 			continue;
1336 		if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1337 		    server->ops->compare_fids(cfile, cur_cfile)) {
1338 			/* shared lock prevents write op through the same fid */
1339 			if (!(li->type & server->vals->shared_lock_type) ||
1340 			    rw_check != CIFS_WRITE_OP)
1341 				continue;
1342 		}
1343 		if ((type & server->vals->shared_lock_type) &&
1344 		    ((server->ops->compare_fids(cfile, cur_cfile) &&
1345 		     current->tgid == li->pid) || type == li->type))
1346 			continue;
1347 		if (rw_check == CIFS_LOCK_OP &&
1348 		    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1349 		    server->ops->compare_fids(cfile, cur_cfile))
1350 			continue;
1351 		if (conf_lock)
1352 			*conf_lock = li;
1353 		return true;
1354 	}
1355 	return false;
1356 }
1357 
1358 bool
1359 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1360 			__u8 type, __u16 flags,
1361 			struct cifsLockInfo **conf_lock, int rw_check)
1362 {
1363 	bool rc = false;
1364 	struct cifs_fid_locks *cur;
1365 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1366 
1367 	list_for_each_entry(cur, &cinode->llist, llist) {
1368 		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1369 						 flags, cfile, conf_lock,
1370 						 rw_check);
1371 		if (rc)
1372 			break;
1373 	}
1374 
1375 	return rc;
1376 }
1377 
1378 /*
1379  * Check if there is another lock that prevents us to set the lock (mandatory
1380  * style). If such a lock exists, update the flock structure with its
1381  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1382  * or leave it the same if we can't. Returns 0 if we don't need to request to
1383  * the server or 1 otherwise.
1384  */
1385 static int
1386 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1387 	       __u8 type, struct file_lock *flock)
1388 {
1389 	int rc = 0;
1390 	struct cifsLockInfo *conf_lock;
1391 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1392 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1393 	bool exist;
1394 
1395 	down_read(&cinode->lock_sem);
1396 
1397 	exist = cifs_find_lock_conflict(cfile, offset, length, type,
1398 					flock->fl_flags, &conf_lock,
1399 					CIFS_LOCK_OP);
1400 	if (exist) {
1401 		flock->fl_start = conf_lock->offset;
1402 		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1403 		flock->fl_pid = conf_lock->pid;
1404 		if (conf_lock->type & server->vals->shared_lock_type)
1405 			flock->fl_type = F_RDLCK;
1406 		else
1407 			flock->fl_type = F_WRLCK;
1408 	} else if (!cinode->can_cache_brlcks)
1409 		rc = 1;
1410 	else
1411 		flock->fl_type = F_UNLCK;
1412 
1413 	up_read(&cinode->lock_sem);
1414 	return rc;
1415 }
1416 
1417 static void
1418 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1419 {
1420 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1421 	cifs_down_write(&cinode->lock_sem);
1422 	list_add_tail(&lock->llist, &cfile->llist->locks);
1423 	up_write(&cinode->lock_sem);
1424 }
1425 
1426 /*
1427  * Set the byte-range lock (mandatory style). Returns:
1428  * 1) 0, if we set the lock and don't need to request to the server;
1429  * 2) 1, if no locks prevent us but we need to request to the server;
1430  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1431  */
1432 static int
1433 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1434 		 bool wait)
1435 {
1436 	struct cifsLockInfo *conf_lock;
1437 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1438 	bool exist;
1439 	int rc = 0;
1440 
1441 try_again:
1442 	exist = false;
1443 	cifs_down_write(&cinode->lock_sem);
1444 
1445 	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1446 					lock->type, lock->flags, &conf_lock,
1447 					CIFS_LOCK_OP);
1448 	if (!exist && cinode->can_cache_brlcks) {
1449 		list_add_tail(&lock->llist, &cfile->llist->locks);
1450 		up_write(&cinode->lock_sem);
1451 		return rc;
1452 	}
1453 
1454 	if (!exist)
1455 		rc = 1;
1456 	else if (!wait)
1457 		rc = -EACCES;
1458 	else {
1459 		list_add_tail(&lock->blist, &conf_lock->blist);
1460 		up_write(&cinode->lock_sem);
1461 		rc = wait_event_interruptible(lock->block_q,
1462 					(lock->blist.prev == &lock->blist) &&
1463 					(lock->blist.next == &lock->blist));
1464 		if (!rc)
1465 			goto try_again;
1466 		cifs_down_write(&cinode->lock_sem);
1467 		list_del_init(&lock->blist);
1468 	}
1469 
1470 	up_write(&cinode->lock_sem);
1471 	return rc;
1472 }
1473 
1474 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1475 /*
1476  * Check if there is another lock that prevents us to set the lock (posix
1477  * style). If such a lock exists, update the flock structure with its
1478  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1479  * or leave it the same if we can't. Returns 0 if we don't need to request to
1480  * the server or 1 otherwise.
1481  */
1482 static int
1483 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1484 {
1485 	int rc = 0;
1486 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1487 	unsigned char saved_type = flock->fl_type;
1488 
1489 	if ((flock->fl_flags & FL_POSIX) == 0)
1490 		return 1;
1491 
1492 	down_read(&cinode->lock_sem);
1493 	posix_test_lock(file, flock);
1494 
1495 	if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1496 		flock->fl_type = saved_type;
1497 		rc = 1;
1498 	}
1499 
1500 	up_read(&cinode->lock_sem);
1501 	return rc;
1502 }
1503 
1504 /*
1505  * Set the byte-range lock (posix style). Returns:
1506  * 1) <0, if the error occurs while setting the lock;
1507  * 2) 0, if we set the lock and don't need to request to the server;
1508  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1509  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1510  */
1511 static int
1512 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1513 {
1514 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1515 	int rc = FILE_LOCK_DEFERRED + 1;
1516 
1517 	if ((flock->fl_flags & FL_POSIX) == 0)
1518 		return rc;
1519 
1520 	cifs_down_write(&cinode->lock_sem);
1521 	if (!cinode->can_cache_brlcks) {
1522 		up_write(&cinode->lock_sem);
1523 		return rc;
1524 	}
1525 
1526 	rc = posix_lock_file(file, flock, NULL);
1527 	up_write(&cinode->lock_sem);
1528 	return rc;
1529 }
1530 
1531 int
1532 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1533 {
1534 	unsigned int xid;
1535 	int rc = 0, stored_rc;
1536 	struct cifsLockInfo *li, *tmp;
1537 	struct cifs_tcon *tcon;
1538 	unsigned int num, max_num, max_buf;
1539 	LOCKING_ANDX_RANGE *buf, *cur;
1540 	static const int types[] = {
1541 		LOCKING_ANDX_LARGE_FILES,
1542 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1543 	};
1544 	int i;
1545 
1546 	xid = get_xid();
1547 	tcon = tlink_tcon(cfile->tlink);
1548 
1549 	/*
1550 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1551 	 * and check it before using.
1552 	 */
1553 	max_buf = tcon->ses->server->maxBuf;
1554 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1555 		free_xid(xid);
1556 		return -EINVAL;
1557 	}
1558 
1559 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1560 		     PAGE_SIZE);
1561 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1562 			PAGE_SIZE);
1563 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1564 						sizeof(LOCKING_ANDX_RANGE);
1565 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1566 	if (!buf) {
1567 		free_xid(xid);
1568 		return -ENOMEM;
1569 	}
1570 
1571 	for (i = 0; i < 2; i++) {
1572 		cur = buf;
1573 		num = 0;
1574 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1575 			if (li->type != types[i])
1576 				continue;
1577 			cur->Pid = cpu_to_le16(li->pid);
1578 			cur->LengthLow = cpu_to_le32((u32)li->length);
1579 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1580 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1581 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1582 			if (++num == max_num) {
1583 				stored_rc = cifs_lockv(xid, tcon,
1584 						       cfile->fid.netfid,
1585 						       (__u8)li->type, 0, num,
1586 						       buf);
1587 				if (stored_rc)
1588 					rc = stored_rc;
1589 				cur = buf;
1590 				num = 0;
1591 			} else
1592 				cur++;
1593 		}
1594 
1595 		if (num) {
1596 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1597 					       (__u8)types[i], 0, num, buf);
1598 			if (stored_rc)
1599 				rc = stored_rc;
1600 		}
1601 	}
1602 
1603 	kfree(buf);
1604 	free_xid(xid);
1605 	return rc;
1606 }
1607 
1608 static __u32
1609 hash_lockowner(fl_owner_t owner)
1610 {
1611 	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1612 }
1613 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1614 
1615 struct lock_to_push {
1616 	struct list_head llist;
1617 	__u64 offset;
1618 	__u64 length;
1619 	__u32 pid;
1620 	__u16 netfid;
1621 	__u8 type;
1622 };
1623 
1624 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1625 static int
1626 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1627 {
1628 	struct inode *inode = d_inode(cfile->dentry);
1629 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1630 	struct file_lock *flock;
1631 	struct file_lock_context *flctx = locks_inode_context(inode);
1632 	unsigned int count = 0, i;
1633 	int rc = 0, xid, type;
1634 	struct list_head locks_to_send, *el;
1635 	struct lock_to_push *lck, *tmp;
1636 	__u64 length;
1637 
1638 	xid = get_xid();
1639 
1640 	if (!flctx)
1641 		goto out;
1642 
1643 	spin_lock(&flctx->flc_lock);
1644 	list_for_each(el, &flctx->flc_posix) {
1645 		count++;
1646 	}
1647 	spin_unlock(&flctx->flc_lock);
1648 
1649 	INIT_LIST_HEAD(&locks_to_send);
1650 
1651 	/*
1652 	 * Allocating count locks is enough because no FL_POSIX locks can be
1653 	 * added to the list while we are holding cinode->lock_sem that
1654 	 * protects locking operations of this inode.
1655 	 */
1656 	for (i = 0; i < count; i++) {
1657 		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1658 		if (!lck) {
1659 			rc = -ENOMEM;
1660 			goto err_out;
1661 		}
1662 		list_add_tail(&lck->llist, &locks_to_send);
1663 	}
1664 
1665 	el = locks_to_send.next;
1666 	spin_lock(&flctx->flc_lock);
1667 	list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1668 		if (el == &locks_to_send) {
1669 			/*
1670 			 * The list ended. We don't have enough allocated
1671 			 * structures - something is really wrong.
1672 			 */
1673 			cifs_dbg(VFS, "Can't push all brlocks!\n");
1674 			break;
1675 		}
1676 		length = cifs_flock_len(flock);
1677 		if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1678 			type = CIFS_RDLCK;
1679 		else
1680 			type = CIFS_WRLCK;
1681 		lck = list_entry(el, struct lock_to_push, llist);
1682 		lck->pid = hash_lockowner(flock->fl_owner);
1683 		lck->netfid = cfile->fid.netfid;
1684 		lck->length = length;
1685 		lck->type = type;
1686 		lck->offset = flock->fl_start;
1687 	}
1688 	spin_unlock(&flctx->flc_lock);
1689 
1690 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1691 		int stored_rc;
1692 
1693 		stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1694 					     lck->offset, lck->length, NULL,
1695 					     lck->type, 0);
1696 		if (stored_rc)
1697 			rc = stored_rc;
1698 		list_del(&lck->llist);
1699 		kfree(lck);
1700 	}
1701 
1702 out:
1703 	free_xid(xid);
1704 	return rc;
1705 err_out:
1706 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1707 		list_del(&lck->llist);
1708 		kfree(lck);
1709 	}
1710 	goto out;
1711 }
1712 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1713 
1714 static int
1715 cifs_push_locks(struct cifsFileInfo *cfile)
1716 {
1717 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1718 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1719 	int rc = 0;
1720 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1721 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1722 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1723 
1724 	/* we are going to update can_cache_brlcks here - need a write access */
1725 	cifs_down_write(&cinode->lock_sem);
1726 	if (!cinode->can_cache_brlcks) {
1727 		up_write(&cinode->lock_sem);
1728 		return rc;
1729 	}
1730 
1731 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1732 	if (cap_unix(tcon->ses) &&
1733 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1734 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1735 		rc = cifs_push_posix_locks(cfile);
1736 	else
1737 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1738 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
1739 
1740 	cinode->can_cache_brlcks = false;
1741 	up_write(&cinode->lock_sem);
1742 	return rc;
1743 }
1744 
1745 static void
1746 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1747 		bool *wait_flag, struct TCP_Server_Info *server)
1748 {
1749 	if (flock->fl_flags & FL_POSIX)
1750 		cifs_dbg(FYI, "Posix\n");
1751 	if (flock->fl_flags & FL_FLOCK)
1752 		cifs_dbg(FYI, "Flock\n");
1753 	if (flock->fl_flags & FL_SLEEP) {
1754 		cifs_dbg(FYI, "Blocking lock\n");
1755 		*wait_flag = true;
1756 	}
1757 	if (flock->fl_flags & FL_ACCESS)
1758 		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1759 	if (flock->fl_flags & FL_LEASE)
1760 		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1761 	if (flock->fl_flags &
1762 	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1763 	       FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1764 		cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1765 
1766 	*type = server->vals->large_lock_type;
1767 	if (flock->fl_type == F_WRLCK) {
1768 		cifs_dbg(FYI, "F_WRLCK\n");
1769 		*type |= server->vals->exclusive_lock_type;
1770 		*lock = 1;
1771 	} else if (flock->fl_type == F_UNLCK) {
1772 		cifs_dbg(FYI, "F_UNLCK\n");
1773 		*type |= server->vals->unlock_lock_type;
1774 		*unlock = 1;
1775 		/* Check if unlock includes more than one lock range */
1776 	} else if (flock->fl_type == F_RDLCK) {
1777 		cifs_dbg(FYI, "F_RDLCK\n");
1778 		*type |= server->vals->shared_lock_type;
1779 		*lock = 1;
1780 	} else if (flock->fl_type == F_EXLCK) {
1781 		cifs_dbg(FYI, "F_EXLCK\n");
1782 		*type |= server->vals->exclusive_lock_type;
1783 		*lock = 1;
1784 	} else if (flock->fl_type == F_SHLCK) {
1785 		cifs_dbg(FYI, "F_SHLCK\n");
1786 		*type |= server->vals->shared_lock_type;
1787 		*lock = 1;
1788 	} else
1789 		cifs_dbg(FYI, "Unknown type of lock\n");
1790 }
1791 
1792 static int
1793 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1794 	   bool wait_flag, bool posix_lck, unsigned int xid)
1795 {
1796 	int rc = 0;
1797 	__u64 length = cifs_flock_len(flock);
1798 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1799 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1800 	struct TCP_Server_Info *server = tcon->ses->server;
1801 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1802 	__u16 netfid = cfile->fid.netfid;
1803 
1804 	if (posix_lck) {
1805 		int posix_lock_type;
1806 
1807 		rc = cifs_posix_lock_test(file, flock);
1808 		if (!rc)
1809 			return rc;
1810 
1811 		if (type & server->vals->shared_lock_type)
1812 			posix_lock_type = CIFS_RDLCK;
1813 		else
1814 			posix_lock_type = CIFS_WRLCK;
1815 		rc = CIFSSMBPosixLock(xid, tcon, netfid,
1816 				      hash_lockowner(flock->fl_owner),
1817 				      flock->fl_start, length, flock,
1818 				      posix_lock_type, wait_flag);
1819 		return rc;
1820 	}
1821 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1822 
1823 	rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1824 	if (!rc)
1825 		return rc;
1826 
1827 	/* BB we could chain these into one lock request BB */
1828 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1829 				    1, 0, false);
1830 	if (rc == 0) {
1831 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1832 					    type, 0, 1, false);
1833 		flock->fl_type = F_UNLCK;
1834 		if (rc != 0)
1835 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1836 				 rc);
1837 		return 0;
1838 	}
1839 
1840 	if (type & server->vals->shared_lock_type) {
1841 		flock->fl_type = F_WRLCK;
1842 		return 0;
1843 	}
1844 
1845 	type &= ~server->vals->exclusive_lock_type;
1846 
1847 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1848 				    type | server->vals->shared_lock_type,
1849 				    1, 0, false);
1850 	if (rc == 0) {
1851 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1852 			type | server->vals->shared_lock_type, 0, 1, false);
1853 		flock->fl_type = F_RDLCK;
1854 		if (rc != 0)
1855 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1856 				 rc);
1857 	} else
1858 		flock->fl_type = F_WRLCK;
1859 
1860 	return 0;
1861 }
1862 
1863 void
1864 cifs_move_llist(struct list_head *source, struct list_head *dest)
1865 {
1866 	struct list_head *li, *tmp;
1867 	list_for_each_safe(li, tmp, source)
1868 		list_move(li, dest);
1869 }
1870 
1871 void
1872 cifs_free_llist(struct list_head *llist)
1873 {
1874 	struct cifsLockInfo *li, *tmp;
1875 	list_for_each_entry_safe(li, tmp, llist, llist) {
1876 		cifs_del_lock_waiters(li);
1877 		list_del(&li->llist);
1878 		kfree(li);
1879 	}
1880 }
1881 
1882 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1883 int
1884 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1885 		  unsigned int xid)
1886 {
1887 	int rc = 0, stored_rc;
1888 	static const int types[] = {
1889 		LOCKING_ANDX_LARGE_FILES,
1890 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1891 	};
1892 	unsigned int i;
1893 	unsigned int max_num, num, max_buf;
1894 	LOCKING_ANDX_RANGE *buf, *cur;
1895 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1896 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1897 	struct cifsLockInfo *li, *tmp;
1898 	__u64 length = cifs_flock_len(flock);
1899 	struct list_head tmp_llist;
1900 
1901 	INIT_LIST_HEAD(&tmp_llist);
1902 
1903 	/*
1904 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1905 	 * and check it before using.
1906 	 */
1907 	max_buf = tcon->ses->server->maxBuf;
1908 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1909 		return -EINVAL;
1910 
1911 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1912 		     PAGE_SIZE);
1913 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1914 			PAGE_SIZE);
1915 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1916 						sizeof(LOCKING_ANDX_RANGE);
1917 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1918 	if (!buf)
1919 		return -ENOMEM;
1920 
1921 	cifs_down_write(&cinode->lock_sem);
1922 	for (i = 0; i < 2; i++) {
1923 		cur = buf;
1924 		num = 0;
1925 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1926 			if (flock->fl_start > li->offset ||
1927 			    (flock->fl_start + length) <
1928 			    (li->offset + li->length))
1929 				continue;
1930 			if (current->tgid != li->pid)
1931 				continue;
1932 			if (types[i] != li->type)
1933 				continue;
1934 			if (cinode->can_cache_brlcks) {
1935 				/*
1936 				 * We can cache brlock requests - simply remove
1937 				 * a lock from the file's list.
1938 				 */
1939 				list_del(&li->llist);
1940 				cifs_del_lock_waiters(li);
1941 				kfree(li);
1942 				continue;
1943 			}
1944 			cur->Pid = cpu_to_le16(li->pid);
1945 			cur->LengthLow = cpu_to_le32((u32)li->length);
1946 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1947 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1948 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1949 			/*
1950 			 * We need to save a lock here to let us add it again to
1951 			 * the file's list if the unlock range request fails on
1952 			 * the server.
1953 			 */
1954 			list_move(&li->llist, &tmp_llist);
1955 			if (++num == max_num) {
1956 				stored_rc = cifs_lockv(xid, tcon,
1957 						       cfile->fid.netfid,
1958 						       li->type, num, 0, buf);
1959 				if (stored_rc) {
1960 					/*
1961 					 * We failed on the unlock range
1962 					 * request - add all locks from the tmp
1963 					 * list to the head of the file's list.
1964 					 */
1965 					cifs_move_llist(&tmp_llist,
1966 							&cfile->llist->locks);
1967 					rc = stored_rc;
1968 				} else
1969 					/*
1970 					 * The unlock range request succeed -
1971 					 * free the tmp list.
1972 					 */
1973 					cifs_free_llist(&tmp_llist);
1974 				cur = buf;
1975 				num = 0;
1976 			} else
1977 				cur++;
1978 		}
1979 		if (num) {
1980 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1981 					       types[i], num, 0, buf);
1982 			if (stored_rc) {
1983 				cifs_move_llist(&tmp_llist,
1984 						&cfile->llist->locks);
1985 				rc = stored_rc;
1986 			} else
1987 				cifs_free_llist(&tmp_llist);
1988 		}
1989 	}
1990 
1991 	up_write(&cinode->lock_sem);
1992 	kfree(buf);
1993 	return rc;
1994 }
1995 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1996 
1997 static int
1998 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1999 	   bool wait_flag, bool posix_lck, int lock, int unlock,
2000 	   unsigned int xid)
2001 {
2002 	int rc = 0;
2003 	__u64 length = cifs_flock_len(flock);
2004 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2005 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2006 	struct TCP_Server_Info *server = tcon->ses->server;
2007 	struct inode *inode = d_inode(cfile->dentry);
2008 
2009 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
2010 	if (posix_lck) {
2011 		int posix_lock_type;
2012 
2013 		rc = cifs_posix_lock_set(file, flock);
2014 		if (rc <= FILE_LOCK_DEFERRED)
2015 			return rc;
2016 
2017 		if (type & server->vals->shared_lock_type)
2018 			posix_lock_type = CIFS_RDLCK;
2019 		else
2020 			posix_lock_type = CIFS_WRLCK;
2021 
2022 		if (unlock == 1)
2023 			posix_lock_type = CIFS_UNLCK;
2024 
2025 		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
2026 				      hash_lockowner(flock->fl_owner),
2027 				      flock->fl_start, length,
2028 				      NULL, posix_lock_type, wait_flag);
2029 		goto out;
2030 	}
2031 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
2032 	if (lock) {
2033 		struct cifsLockInfo *lock;
2034 
2035 		lock = cifs_lock_init(flock->fl_start, length, type,
2036 				      flock->fl_flags);
2037 		if (!lock)
2038 			return -ENOMEM;
2039 
2040 		rc = cifs_lock_add_if(cfile, lock, wait_flag);
2041 		if (rc < 0) {
2042 			kfree(lock);
2043 			return rc;
2044 		}
2045 		if (!rc)
2046 			goto out;
2047 
2048 		/*
2049 		 * Windows 7 server can delay breaking lease from read to None
2050 		 * if we set a byte-range lock on a file - break it explicitly
2051 		 * before sending the lock to the server to be sure the next
2052 		 * read won't conflict with non-overlapted locks due to
2053 		 * pagereading.
2054 		 */
2055 		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
2056 					CIFS_CACHE_READ(CIFS_I(inode))) {
2057 			cifs_zap_mapping(inode);
2058 			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
2059 				 inode);
2060 			CIFS_I(inode)->oplock = 0;
2061 		}
2062 
2063 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
2064 					    type, 1, 0, wait_flag);
2065 		if (rc) {
2066 			kfree(lock);
2067 			return rc;
2068 		}
2069 
2070 		cifs_lock_add(cfile, lock);
2071 	} else if (unlock)
2072 		rc = server->ops->mand_unlock_range(cfile, flock, xid);
2073 
2074 out:
2075 	if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
2076 		/*
2077 		 * If this is a request to remove all locks because we
2078 		 * are closing the file, it doesn't matter if the
2079 		 * unlocking failed as both cifs.ko and the SMB server
2080 		 * remove the lock on file close
2081 		 */
2082 		if (rc) {
2083 			cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
2084 			if (!(flock->fl_flags & FL_CLOSE))
2085 				return rc;
2086 		}
2087 		rc = locks_lock_file_wait(file, flock);
2088 	}
2089 	return rc;
2090 }
2091 
2092 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
2093 {
2094 	int rc, xid;
2095 	int lock = 0, unlock = 0;
2096 	bool wait_flag = false;
2097 	bool posix_lck = false;
2098 	struct cifs_sb_info *cifs_sb;
2099 	struct cifs_tcon *tcon;
2100 	struct cifsFileInfo *cfile;
2101 	__u32 type;
2102 
2103 	xid = get_xid();
2104 
2105 	if (!(fl->fl_flags & FL_FLOCK)) {
2106 		rc = -ENOLCK;
2107 		free_xid(xid);
2108 		return rc;
2109 	}
2110 
2111 	cfile = (struct cifsFileInfo *)file->private_data;
2112 	tcon = tlink_tcon(cfile->tlink);
2113 
2114 	cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
2115 			tcon->ses->server);
2116 	cifs_sb = CIFS_FILE_SB(file);
2117 
2118 	if (cap_unix(tcon->ses) &&
2119 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2120 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2121 		posix_lck = true;
2122 
2123 	if (!lock && !unlock) {
2124 		/*
2125 		 * if no lock or unlock then nothing to do since we do not
2126 		 * know what it is
2127 		 */
2128 		rc = -EOPNOTSUPP;
2129 		free_xid(xid);
2130 		return rc;
2131 	}
2132 
2133 	rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2134 			xid);
2135 	free_xid(xid);
2136 	return rc;
2137 
2138 
2139 }
2140 
2141 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2142 {
2143 	int rc, xid;
2144 	int lock = 0, unlock = 0;
2145 	bool wait_flag = false;
2146 	bool posix_lck = false;
2147 	struct cifs_sb_info *cifs_sb;
2148 	struct cifs_tcon *tcon;
2149 	struct cifsFileInfo *cfile;
2150 	__u32 type;
2151 
2152 	rc = -EACCES;
2153 	xid = get_xid();
2154 
2155 	cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2156 		 flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
2157 		 (long long)flock->fl_end);
2158 
2159 	cfile = (struct cifsFileInfo *)file->private_data;
2160 	tcon = tlink_tcon(cfile->tlink);
2161 
2162 	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2163 			tcon->ses->server);
2164 	cifs_sb = CIFS_FILE_SB(file);
2165 	set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2166 
2167 	if (cap_unix(tcon->ses) &&
2168 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2169 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2170 		posix_lck = true;
2171 	/*
2172 	 * BB add code here to normalize offset and length to account for
2173 	 * negative length which we can not accept over the wire.
2174 	 */
2175 	if (IS_GETLK(cmd)) {
2176 		rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2177 		free_xid(xid);
2178 		return rc;
2179 	}
2180 
2181 	if (!lock && !unlock) {
2182 		/*
2183 		 * if no lock or unlock then nothing to do since we do not
2184 		 * know what it is
2185 		 */
2186 		free_xid(xid);
2187 		return -EOPNOTSUPP;
2188 	}
2189 
2190 	rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2191 			xid);
2192 	free_xid(xid);
2193 	return rc;
2194 }
2195 
2196 /*
2197  * update the file size (if needed) after a write. Should be called with
2198  * the inode->i_lock held
2199  */
2200 void
2201 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2202 		      unsigned int bytes_written)
2203 {
2204 	loff_t end_of_write = offset + bytes_written;
2205 
2206 	if (end_of_write > cifsi->server_eof)
2207 		cifsi->server_eof = end_of_write;
2208 }
2209 
2210 static ssize_t
2211 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2212 	   size_t write_size, loff_t *offset)
2213 {
2214 	int rc = 0;
2215 	unsigned int bytes_written = 0;
2216 	unsigned int total_written;
2217 	struct cifs_tcon *tcon;
2218 	struct TCP_Server_Info *server;
2219 	unsigned int xid;
2220 	struct dentry *dentry = open_file->dentry;
2221 	struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2222 	struct cifs_io_parms io_parms = {0};
2223 
2224 	cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2225 		 write_size, *offset, dentry);
2226 
2227 	tcon = tlink_tcon(open_file->tlink);
2228 	server = tcon->ses->server;
2229 
2230 	if (!server->ops->sync_write)
2231 		return -ENOSYS;
2232 
2233 	xid = get_xid();
2234 
2235 	for (total_written = 0; write_size > total_written;
2236 	     total_written += bytes_written) {
2237 		rc = -EAGAIN;
2238 		while (rc == -EAGAIN) {
2239 			struct kvec iov[2];
2240 			unsigned int len;
2241 
2242 			if (open_file->invalidHandle) {
2243 				/* we could deadlock if we called
2244 				   filemap_fdatawait from here so tell
2245 				   reopen_file not to flush data to
2246 				   server now */
2247 				rc = cifs_reopen_file(open_file, false);
2248 				if (rc != 0)
2249 					break;
2250 			}
2251 
2252 			len = min(server->ops->wp_retry_size(d_inode(dentry)),
2253 				  (unsigned int)write_size - total_written);
2254 			/* iov[0] is reserved for smb header */
2255 			iov[1].iov_base = (char *)write_data + total_written;
2256 			iov[1].iov_len = len;
2257 			io_parms.pid = pid;
2258 			io_parms.tcon = tcon;
2259 			io_parms.offset = *offset;
2260 			io_parms.length = len;
2261 			rc = server->ops->sync_write(xid, &open_file->fid,
2262 					&io_parms, &bytes_written, iov, 1);
2263 		}
2264 		if (rc || (bytes_written == 0)) {
2265 			if (total_written)
2266 				break;
2267 			else {
2268 				free_xid(xid);
2269 				return rc;
2270 			}
2271 		} else {
2272 			spin_lock(&d_inode(dentry)->i_lock);
2273 			cifs_update_eof(cifsi, *offset, bytes_written);
2274 			spin_unlock(&d_inode(dentry)->i_lock);
2275 			*offset += bytes_written;
2276 		}
2277 	}
2278 
2279 	cifs_stats_bytes_written(tcon, total_written);
2280 
2281 	if (total_written > 0) {
2282 		spin_lock(&d_inode(dentry)->i_lock);
2283 		if (*offset > d_inode(dentry)->i_size) {
2284 			i_size_write(d_inode(dentry), *offset);
2285 			d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2286 		}
2287 		spin_unlock(&d_inode(dentry)->i_lock);
2288 	}
2289 	mark_inode_dirty_sync(d_inode(dentry));
2290 	free_xid(xid);
2291 	return total_written;
2292 }
2293 
2294 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2295 					bool fsuid_only)
2296 {
2297 	struct cifsFileInfo *open_file = NULL;
2298 	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2299 
2300 	/* only filter by fsuid on multiuser mounts */
2301 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2302 		fsuid_only = false;
2303 
2304 	spin_lock(&cifs_inode->open_file_lock);
2305 	/* we could simply get the first_list_entry since write-only entries
2306 	   are always at the end of the list but since the first entry might
2307 	   have a close pending, we go through the whole list */
2308 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2309 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2310 			continue;
2311 		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2312 			if ((!open_file->invalidHandle)) {
2313 				/* found a good file */
2314 				/* lock it so it will not be closed on us */
2315 				cifsFileInfo_get(open_file);
2316 				spin_unlock(&cifs_inode->open_file_lock);
2317 				return open_file;
2318 			} /* else might as well continue, and look for
2319 			     another, or simply have the caller reopen it
2320 			     again rather than trying to fix this handle */
2321 		} else /* write only file */
2322 			break; /* write only files are last so must be done */
2323 	}
2324 	spin_unlock(&cifs_inode->open_file_lock);
2325 	return NULL;
2326 }
2327 
2328 /* Return -EBADF if no handle is found and general rc otherwise */
2329 int
2330 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2331 		       struct cifsFileInfo **ret_file)
2332 {
2333 	struct cifsFileInfo *open_file, *inv_file = NULL;
2334 	struct cifs_sb_info *cifs_sb;
2335 	bool any_available = false;
2336 	int rc = -EBADF;
2337 	unsigned int refind = 0;
2338 	bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2339 	bool with_delete = flags & FIND_WR_WITH_DELETE;
2340 	*ret_file = NULL;
2341 
2342 	/*
2343 	 * Having a null inode here (because mapping->host was set to zero by
2344 	 * the VFS or MM) should not happen but we had reports of on oops (due
2345 	 * to it being zero) during stress testcases so we need to check for it
2346 	 */
2347 
2348 	if (cifs_inode == NULL) {
2349 		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2350 		dump_stack();
2351 		return rc;
2352 	}
2353 
2354 	cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2355 
2356 	/* only filter by fsuid on multiuser mounts */
2357 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2358 		fsuid_only = false;
2359 
2360 	spin_lock(&cifs_inode->open_file_lock);
2361 refind_writable:
2362 	if (refind > MAX_REOPEN_ATT) {
2363 		spin_unlock(&cifs_inode->open_file_lock);
2364 		return rc;
2365 	}
2366 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2367 		if (!any_available && open_file->pid != current->tgid)
2368 			continue;
2369 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2370 			continue;
2371 		if (with_delete && !(open_file->fid.access & DELETE))
2372 			continue;
2373 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2374 			if (!open_file->invalidHandle) {
2375 				/* found a good writable file */
2376 				cifsFileInfo_get(open_file);
2377 				spin_unlock(&cifs_inode->open_file_lock);
2378 				*ret_file = open_file;
2379 				return 0;
2380 			} else {
2381 				if (!inv_file)
2382 					inv_file = open_file;
2383 			}
2384 		}
2385 	}
2386 	/* couldn't find useable FH with same pid, try any available */
2387 	if (!any_available) {
2388 		any_available = true;
2389 		goto refind_writable;
2390 	}
2391 
2392 	if (inv_file) {
2393 		any_available = false;
2394 		cifsFileInfo_get(inv_file);
2395 	}
2396 
2397 	spin_unlock(&cifs_inode->open_file_lock);
2398 
2399 	if (inv_file) {
2400 		rc = cifs_reopen_file(inv_file, false);
2401 		if (!rc) {
2402 			*ret_file = inv_file;
2403 			return 0;
2404 		}
2405 
2406 		spin_lock(&cifs_inode->open_file_lock);
2407 		list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2408 		spin_unlock(&cifs_inode->open_file_lock);
2409 		cifsFileInfo_put(inv_file);
2410 		++refind;
2411 		inv_file = NULL;
2412 		spin_lock(&cifs_inode->open_file_lock);
2413 		goto refind_writable;
2414 	}
2415 
2416 	return rc;
2417 }
2418 
2419 struct cifsFileInfo *
2420 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2421 {
2422 	struct cifsFileInfo *cfile;
2423 	int rc;
2424 
2425 	rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2426 	if (rc)
2427 		cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2428 
2429 	return cfile;
2430 }
2431 
2432 int
2433 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2434 		       int flags,
2435 		       struct cifsFileInfo **ret_file)
2436 {
2437 	struct cifsFileInfo *cfile;
2438 	void *page = alloc_dentry_path();
2439 
2440 	*ret_file = NULL;
2441 
2442 	spin_lock(&tcon->open_file_lock);
2443 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2444 		struct cifsInodeInfo *cinode;
2445 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2446 		if (IS_ERR(full_path)) {
2447 			spin_unlock(&tcon->open_file_lock);
2448 			free_dentry_path(page);
2449 			return PTR_ERR(full_path);
2450 		}
2451 		if (strcmp(full_path, name))
2452 			continue;
2453 
2454 		cinode = CIFS_I(d_inode(cfile->dentry));
2455 		spin_unlock(&tcon->open_file_lock);
2456 		free_dentry_path(page);
2457 		return cifs_get_writable_file(cinode, flags, ret_file);
2458 	}
2459 
2460 	spin_unlock(&tcon->open_file_lock);
2461 	free_dentry_path(page);
2462 	return -ENOENT;
2463 }
2464 
2465 int
2466 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2467 		       struct cifsFileInfo **ret_file)
2468 {
2469 	struct cifsFileInfo *cfile;
2470 	void *page = alloc_dentry_path();
2471 
2472 	*ret_file = NULL;
2473 
2474 	spin_lock(&tcon->open_file_lock);
2475 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2476 		struct cifsInodeInfo *cinode;
2477 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2478 		if (IS_ERR(full_path)) {
2479 			spin_unlock(&tcon->open_file_lock);
2480 			free_dentry_path(page);
2481 			return PTR_ERR(full_path);
2482 		}
2483 		if (strcmp(full_path, name))
2484 			continue;
2485 
2486 		cinode = CIFS_I(d_inode(cfile->dentry));
2487 		spin_unlock(&tcon->open_file_lock);
2488 		free_dentry_path(page);
2489 		*ret_file = find_readable_file(cinode, 0);
2490 		return *ret_file ? 0 : -ENOENT;
2491 	}
2492 
2493 	spin_unlock(&tcon->open_file_lock);
2494 	free_dentry_path(page);
2495 	return -ENOENT;
2496 }
2497 
2498 void
2499 cifs_writedata_release(struct kref *refcount)
2500 {
2501 	struct cifs_writedata *wdata = container_of(refcount,
2502 					struct cifs_writedata, refcount);
2503 #ifdef CONFIG_CIFS_SMB_DIRECT
2504 	if (wdata->mr) {
2505 		smbd_deregister_mr(wdata->mr);
2506 		wdata->mr = NULL;
2507 	}
2508 #endif
2509 
2510 	if (wdata->cfile)
2511 		cifsFileInfo_put(wdata->cfile);
2512 
2513 	kfree(wdata);
2514 }
2515 
2516 /*
2517  * Write failed with a retryable error. Resend the write request. It's also
2518  * possible that the page was redirtied so re-clean the page.
2519  */
2520 static void
2521 cifs_writev_requeue(struct cifs_writedata *wdata)
2522 {
2523 	int rc = 0;
2524 	struct inode *inode = d_inode(wdata->cfile->dentry);
2525 	struct TCP_Server_Info *server;
2526 	unsigned int rest_len = wdata->bytes;
2527 	loff_t fpos = wdata->offset;
2528 
2529 	server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2530 	do {
2531 		struct cifs_writedata *wdata2;
2532 		unsigned int wsize, cur_len;
2533 
2534 		wsize = server->ops->wp_retry_size(inode);
2535 		if (wsize < rest_len) {
2536 			if (wsize < PAGE_SIZE) {
2537 				rc = -EOPNOTSUPP;
2538 				break;
2539 			}
2540 			cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
2541 		} else {
2542 			cur_len = rest_len;
2543 		}
2544 
2545 		wdata2 = cifs_writedata_alloc(cifs_writev_complete);
2546 		if (!wdata2) {
2547 			rc = -ENOMEM;
2548 			break;
2549 		}
2550 
2551 		wdata2->sync_mode = wdata->sync_mode;
2552 		wdata2->offset	= fpos;
2553 		wdata2->bytes	= cur_len;
2554 		wdata2->iter	= wdata->iter;
2555 
2556 		iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
2557 		iov_iter_truncate(&wdata2->iter, wdata2->bytes);
2558 
2559 		if (iov_iter_is_xarray(&wdata2->iter))
2560 			/* Check for pages having been redirtied and clean
2561 			 * them.  We can do this by walking the xarray.  If
2562 			 * it's not an xarray, then it's a DIO and we shouldn't
2563 			 * be mucking around with the page bits.
2564 			 */
2565 			cifs_undirty_folios(inode, fpos, cur_len);
2566 
2567 		rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2568 					    &wdata2->cfile);
2569 		if (!wdata2->cfile) {
2570 			cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2571 				 rc);
2572 			if (!is_retryable_error(rc))
2573 				rc = -EBADF;
2574 		} else {
2575 			wdata2->pid = wdata2->cfile->pid;
2576 			rc = server->ops->async_writev(wdata2,
2577 						       cifs_writedata_release);
2578 		}
2579 
2580 		kref_put(&wdata2->refcount, cifs_writedata_release);
2581 		if (rc) {
2582 			if (is_retryable_error(rc))
2583 				continue;
2584 			fpos += cur_len;
2585 			rest_len -= cur_len;
2586 			break;
2587 		}
2588 
2589 		fpos += cur_len;
2590 		rest_len -= cur_len;
2591 	} while (rest_len > 0);
2592 
2593 	/* Clean up remaining pages from the original wdata */
2594 	if (iov_iter_is_xarray(&wdata->iter))
2595 		cifs_pages_write_failed(inode, fpos, rest_len);
2596 
2597 	if (rc != 0 && !is_retryable_error(rc))
2598 		mapping_set_error(inode->i_mapping, rc);
2599 	kref_put(&wdata->refcount, cifs_writedata_release);
2600 }
2601 
2602 void
2603 cifs_writev_complete(struct work_struct *work)
2604 {
2605 	struct cifs_writedata *wdata = container_of(work,
2606 						struct cifs_writedata, work);
2607 	struct inode *inode = d_inode(wdata->cfile->dentry);
2608 
2609 	if (wdata->result == 0) {
2610 		spin_lock(&inode->i_lock);
2611 		cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2612 		spin_unlock(&inode->i_lock);
2613 		cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2614 					 wdata->bytes);
2615 	} else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2616 		return cifs_writev_requeue(wdata);
2617 
2618 	if (wdata->result == -EAGAIN)
2619 		cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
2620 	else if (wdata->result < 0)
2621 		cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
2622 	else
2623 		cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
2624 
2625 	if (wdata->result != -EAGAIN)
2626 		mapping_set_error(inode->i_mapping, wdata->result);
2627 	kref_put(&wdata->refcount, cifs_writedata_release);
2628 }
2629 
2630 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
2631 {
2632 	struct cifs_writedata *wdata;
2633 
2634 	wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2635 	if (wdata != NULL) {
2636 		kref_init(&wdata->refcount);
2637 		INIT_LIST_HEAD(&wdata->list);
2638 		init_completion(&wdata->done);
2639 		INIT_WORK(&wdata->work, complete);
2640 	}
2641 	return wdata;
2642 }
2643 
2644 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2645 {
2646 	struct address_space *mapping = page->mapping;
2647 	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2648 	char *write_data;
2649 	int rc = -EFAULT;
2650 	int bytes_written = 0;
2651 	struct inode *inode;
2652 	struct cifsFileInfo *open_file;
2653 
2654 	if (!mapping || !mapping->host)
2655 		return -EFAULT;
2656 
2657 	inode = page->mapping->host;
2658 
2659 	offset += (loff_t)from;
2660 	write_data = kmap(page);
2661 	write_data += from;
2662 
2663 	if ((to > PAGE_SIZE) || (from > to)) {
2664 		kunmap(page);
2665 		return -EIO;
2666 	}
2667 
2668 	/* racing with truncate? */
2669 	if (offset > mapping->host->i_size) {
2670 		kunmap(page);
2671 		return 0; /* don't care */
2672 	}
2673 
2674 	/* check to make sure that we are not extending the file */
2675 	if (mapping->host->i_size - offset < (loff_t)to)
2676 		to = (unsigned)(mapping->host->i_size - offset);
2677 
2678 	rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2679 				    &open_file);
2680 	if (!rc) {
2681 		bytes_written = cifs_write(open_file, open_file->pid,
2682 					   write_data, to - from, &offset);
2683 		cifsFileInfo_put(open_file);
2684 		/* Does mm or vfs already set times? */
2685 		simple_inode_init_ts(inode);
2686 		if ((bytes_written > 0) && (offset))
2687 			rc = 0;
2688 		else if (bytes_written < 0)
2689 			rc = bytes_written;
2690 		else
2691 			rc = -EFAULT;
2692 	} else {
2693 		cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2694 		if (!is_retryable_error(rc))
2695 			rc = -EIO;
2696 	}
2697 
2698 	kunmap(page);
2699 	return rc;
2700 }
2701 
2702 /*
2703  * Extend the region to be written back to include subsequent contiguously
2704  * dirty pages if possible, but don't sleep while doing so.
2705  */
2706 static void cifs_extend_writeback(struct address_space *mapping,
2707 				  struct xa_state *xas,
2708 				  long *_count,
2709 				  loff_t start,
2710 				  int max_pages,
2711 				  loff_t max_len,
2712 				  size_t *_len)
2713 {
2714 	struct folio_batch batch;
2715 	struct folio *folio;
2716 	unsigned int nr_pages;
2717 	pgoff_t index = (start + *_len) / PAGE_SIZE;
2718 	size_t len;
2719 	bool stop = true;
2720 	unsigned int i;
2721 
2722 	folio_batch_init(&batch);
2723 
2724 	do {
2725 		/* Firstly, we gather up a batch of contiguous dirty pages
2726 		 * under the RCU read lock - but we can't clear the dirty flags
2727 		 * there if any of those pages are mapped.
2728 		 */
2729 		rcu_read_lock();
2730 
2731 		xas_for_each(xas, folio, ULONG_MAX) {
2732 			stop = true;
2733 			if (xas_retry(xas, folio))
2734 				continue;
2735 			if (xa_is_value(folio))
2736 				break;
2737 			if (folio->index != index) {
2738 				xas_reset(xas);
2739 				break;
2740 			}
2741 
2742 			if (!folio_try_get_rcu(folio)) {
2743 				xas_reset(xas);
2744 				continue;
2745 			}
2746 			nr_pages = folio_nr_pages(folio);
2747 			if (nr_pages > max_pages) {
2748 				xas_reset(xas);
2749 				break;
2750 			}
2751 
2752 			/* Has the page moved or been split? */
2753 			if (unlikely(folio != xas_reload(xas))) {
2754 				folio_put(folio);
2755 				xas_reset(xas);
2756 				break;
2757 			}
2758 
2759 			if (!folio_trylock(folio)) {
2760 				folio_put(folio);
2761 				xas_reset(xas);
2762 				break;
2763 			}
2764 			if (!folio_test_dirty(folio) ||
2765 			    folio_test_writeback(folio)) {
2766 				folio_unlock(folio);
2767 				folio_put(folio);
2768 				xas_reset(xas);
2769 				break;
2770 			}
2771 
2772 			max_pages -= nr_pages;
2773 			len = folio_size(folio);
2774 			stop = false;
2775 
2776 			index += nr_pages;
2777 			*_count -= nr_pages;
2778 			*_len += len;
2779 			if (max_pages <= 0 || *_len >= max_len || *_count <= 0)
2780 				stop = true;
2781 
2782 			if (!folio_batch_add(&batch, folio))
2783 				break;
2784 			if (stop)
2785 				break;
2786 		}
2787 
2788 		xas_pause(xas);
2789 		rcu_read_unlock();
2790 
2791 		/* Now, if we obtained any pages, we can shift them to being
2792 		 * writable and mark them for caching.
2793 		 */
2794 		if (!folio_batch_count(&batch))
2795 			break;
2796 
2797 		for (i = 0; i < folio_batch_count(&batch); i++) {
2798 			folio = batch.folios[i];
2799 			/* The folio should be locked, dirty and not undergoing
2800 			 * writeback from the loop above.
2801 			 */
2802 			if (!folio_clear_dirty_for_io(folio))
2803 				WARN_ON(1);
2804 			folio_start_writeback(folio);
2805 			folio_unlock(folio);
2806 		}
2807 
2808 		folio_batch_release(&batch);
2809 		cond_resched();
2810 	} while (!stop);
2811 }
2812 
2813 /*
2814  * Write back the locked page and any subsequent non-locked dirty pages.
2815  */
2816 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
2817 						 struct writeback_control *wbc,
2818 						 struct xa_state *xas,
2819 						 struct folio *folio,
2820 						 unsigned long long start,
2821 						 unsigned long long end)
2822 {
2823 	struct inode *inode = mapping->host;
2824 	struct TCP_Server_Info *server;
2825 	struct cifs_writedata *wdata;
2826 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2827 	struct cifs_credits credits_on_stack;
2828 	struct cifs_credits *credits = &credits_on_stack;
2829 	struct cifsFileInfo *cfile = NULL;
2830 	unsigned long long i_size = i_size_read(inode), max_len;
2831 	unsigned int xid, wsize;
2832 	size_t len = folio_size(folio);
2833 	long count = wbc->nr_to_write;
2834 	int rc;
2835 
2836 	/* The folio should be locked, dirty and not undergoing writeback. */
2837 	if (!folio_clear_dirty_for_io(folio))
2838 		WARN_ON_ONCE(1);
2839 	folio_start_writeback(folio);
2840 
2841 	count -= folio_nr_pages(folio);
2842 
2843 	xid = get_xid();
2844 	server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2845 
2846 	rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2847 	if (rc) {
2848 		cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
2849 		goto err_xid;
2850 	}
2851 
2852 	rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2853 					   &wsize, credits);
2854 	if (rc != 0)
2855 		goto err_close;
2856 
2857 	wdata = cifs_writedata_alloc(cifs_writev_complete);
2858 	if (!wdata) {
2859 		rc = -ENOMEM;
2860 		goto err_uncredit;
2861 	}
2862 
2863 	wdata->sync_mode = wbc->sync_mode;
2864 	wdata->offset = folio_pos(folio);
2865 	wdata->pid = cfile->pid;
2866 	wdata->credits = credits_on_stack;
2867 	wdata->cfile = cfile;
2868 	wdata->server = server;
2869 	cfile = NULL;
2870 
2871 	/* Find all consecutive lockable dirty pages that have contiguous
2872 	 * written regions, stopping when we find a page that is not
2873 	 * immediately lockable, is not dirty or is missing, or we reach the
2874 	 * end of the range.
2875 	 */
2876 	if (start < i_size) {
2877 		/* Trim the write to the EOF; the extra data is ignored.  Also
2878 		 * put an upper limit on the size of a single storedata op.
2879 		 */
2880 		max_len = wsize;
2881 		max_len = min_t(unsigned long long, max_len, end - start + 1);
2882 		max_len = min_t(unsigned long long, max_len, i_size - start);
2883 
2884 		if (len < max_len) {
2885 			int max_pages = INT_MAX;
2886 
2887 #ifdef CONFIG_CIFS_SMB_DIRECT
2888 			if (server->smbd_conn)
2889 				max_pages = server->smbd_conn->max_frmr_depth;
2890 #endif
2891 			max_pages -= folio_nr_pages(folio);
2892 
2893 			if (max_pages > 0)
2894 				cifs_extend_writeback(mapping, xas, &count, start,
2895 						      max_pages, max_len, &len);
2896 		}
2897 	}
2898 	len = min_t(unsigned long long, len, i_size - start);
2899 
2900 	/* We now have a contiguous set of dirty pages, each with writeback
2901 	 * set; the first page is still locked at this point, but all the rest
2902 	 * have been unlocked.
2903 	 */
2904 	folio_unlock(folio);
2905 	wdata->bytes = len;
2906 
2907 	if (start < i_size) {
2908 		iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
2909 				start, len);
2910 
2911 		rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2912 		if (rc)
2913 			goto err_wdata;
2914 
2915 		if (wdata->cfile->invalidHandle)
2916 			rc = -EAGAIN;
2917 		else
2918 			rc = wdata->server->ops->async_writev(wdata,
2919 							      cifs_writedata_release);
2920 		if (rc >= 0) {
2921 			kref_put(&wdata->refcount, cifs_writedata_release);
2922 			goto err_close;
2923 		}
2924 	} else {
2925 		/* The dirty region was entirely beyond the EOF. */
2926 		cifs_pages_written_back(inode, start, len);
2927 		rc = 0;
2928 	}
2929 
2930 err_wdata:
2931 	kref_put(&wdata->refcount, cifs_writedata_release);
2932 err_uncredit:
2933 	add_credits_and_wake_if(server, credits, 0);
2934 err_close:
2935 	if (cfile)
2936 		cifsFileInfo_put(cfile);
2937 err_xid:
2938 	free_xid(xid);
2939 	if (rc == 0) {
2940 		wbc->nr_to_write = count;
2941 		rc = len;
2942 	} else if (is_retryable_error(rc)) {
2943 		cifs_pages_write_redirty(inode, start, len);
2944 	} else {
2945 		cifs_pages_write_failed(inode, start, len);
2946 		mapping_set_error(mapping, rc);
2947 	}
2948 	/* Indication to update ctime and mtime as close is deferred */
2949 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2950 	return rc;
2951 }
2952 
2953 /*
2954  * write a region of pages back to the server
2955  */
2956 static ssize_t cifs_writepages_begin(struct address_space *mapping,
2957 				     struct writeback_control *wbc,
2958 				     struct xa_state *xas,
2959 				     unsigned long long *_start,
2960 				     unsigned long long end)
2961 {
2962 	struct folio *folio;
2963 	unsigned long long start = *_start;
2964 	ssize_t ret;
2965 	int skips = 0;
2966 
2967 search_again:
2968 	/* Find the first dirty page. */
2969 	rcu_read_lock();
2970 
2971 	for (;;) {
2972 		folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY);
2973 		if (xas_retry(xas, folio) || xa_is_value(folio))
2974 			continue;
2975 		if (!folio)
2976 			break;
2977 
2978 		if (!folio_try_get_rcu(folio)) {
2979 			xas_reset(xas);
2980 			continue;
2981 		}
2982 
2983 		if (unlikely(folio != xas_reload(xas))) {
2984 			folio_put(folio);
2985 			xas_reset(xas);
2986 			continue;
2987 		}
2988 
2989 		xas_pause(xas);
2990 		break;
2991 	}
2992 	rcu_read_unlock();
2993 	if (!folio)
2994 		return 0;
2995 
2996 	start = folio_pos(folio); /* May regress with THPs */
2997 
2998 	/* At this point we hold neither the i_pages lock nor the page lock:
2999 	 * the page may be truncated or invalidated (changing page->mapping to
3000 	 * NULL), or even swizzled back from swapper_space to tmpfs file
3001 	 * mapping
3002 	 */
3003 lock_again:
3004 	if (wbc->sync_mode != WB_SYNC_NONE) {
3005 		ret = folio_lock_killable(folio);
3006 		if (ret < 0)
3007 			return ret;
3008 	} else {
3009 		if (!folio_trylock(folio))
3010 			goto search_again;
3011 	}
3012 
3013 	if (folio->mapping != mapping ||
3014 	    !folio_test_dirty(folio)) {
3015 		start += folio_size(folio);
3016 		folio_unlock(folio);
3017 		goto search_again;
3018 	}
3019 
3020 	if (folio_test_writeback(folio) ||
3021 	    folio_test_fscache(folio)) {
3022 		folio_unlock(folio);
3023 		if (wbc->sync_mode != WB_SYNC_NONE) {
3024 			folio_wait_writeback(folio);
3025 #ifdef CONFIG_CIFS_FSCACHE
3026 			folio_wait_fscache(folio);
3027 #endif
3028 			goto lock_again;
3029 		}
3030 
3031 		start += folio_size(folio);
3032 		if (wbc->sync_mode == WB_SYNC_NONE) {
3033 			if (skips >= 5 || need_resched()) {
3034 				ret = 0;
3035 				goto out;
3036 			}
3037 			skips++;
3038 		}
3039 		goto search_again;
3040 	}
3041 
3042 	ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end);
3043 out:
3044 	if (ret > 0)
3045 		*_start = start + ret;
3046 	return ret;
3047 }
3048 
3049 /*
3050  * Write a region of pages back to the server
3051  */
3052 static int cifs_writepages_region(struct address_space *mapping,
3053 				  struct writeback_control *wbc,
3054 				  unsigned long long *_start,
3055 				  unsigned long long end)
3056 {
3057 	ssize_t ret;
3058 
3059 	XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE);
3060 
3061 	do {
3062 		ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end);
3063 		if (ret > 0 && wbc->nr_to_write > 0)
3064 			cond_resched();
3065 	} while (ret > 0 && wbc->nr_to_write > 0);
3066 
3067 	return ret > 0 ? 0 : ret;
3068 }
3069 
3070 /*
3071  * Write some of the pending data back to the server
3072  */
3073 static int cifs_writepages(struct address_space *mapping,
3074 			   struct writeback_control *wbc)
3075 {
3076 	loff_t start, end;
3077 	int ret;
3078 
3079 	/* We have to be careful as we can end up racing with setattr()
3080 	 * truncating the pagecache since the caller doesn't take a lock here
3081 	 * to prevent it.
3082 	 */
3083 
3084 	if (wbc->range_cyclic && mapping->writeback_index) {
3085 		start = mapping->writeback_index * PAGE_SIZE;
3086 		ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3087 		if (ret < 0)
3088 			goto out;
3089 
3090 		if (wbc->nr_to_write <= 0) {
3091 			mapping->writeback_index = start / PAGE_SIZE;
3092 			goto out;
3093 		}
3094 
3095 		start = 0;
3096 		end = mapping->writeback_index * PAGE_SIZE;
3097 		mapping->writeback_index = 0;
3098 		ret = cifs_writepages_region(mapping, wbc, &start, end);
3099 		if (ret == 0)
3100 			mapping->writeback_index = start / PAGE_SIZE;
3101 	} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
3102 		start = 0;
3103 		ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3104 		if (wbc->nr_to_write > 0 && ret == 0)
3105 			mapping->writeback_index = start / PAGE_SIZE;
3106 	} else {
3107 		start = wbc->range_start;
3108 		ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end);
3109 	}
3110 
3111 out:
3112 	return ret;
3113 }
3114 
3115 static int
3116 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
3117 {
3118 	int rc;
3119 	unsigned int xid;
3120 
3121 	xid = get_xid();
3122 /* BB add check for wbc flags */
3123 	get_page(page);
3124 	if (!PageUptodate(page))
3125 		cifs_dbg(FYI, "ppw - page not up to date\n");
3126 
3127 	/*
3128 	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
3129 	 *
3130 	 * A writepage() implementation always needs to do either this,
3131 	 * or re-dirty the page with "redirty_page_for_writepage()" in
3132 	 * the case of a failure.
3133 	 *
3134 	 * Just unlocking the page will cause the radix tree tag-bits
3135 	 * to fail to update with the state of the page correctly.
3136 	 */
3137 	set_page_writeback(page);
3138 retry_write:
3139 	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
3140 	if (is_retryable_error(rc)) {
3141 		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
3142 			goto retry_write;
3143 		redirty_page_for_writepage(wbc, page);
3144 	} else if (rc != 0) {
3145 		SetPageError(page);
3146 		mapping_set_error(page->mapping, rc);
3147 	} else {
3148 		SetPageUptodate(page);
3149 	}
3150 	end_page_writeback(page);
3151 	put_page(page);
3152 	free_xid(xid);
3153 	return rc;
3154 }
3155 
3156 static int cifs_write_end(struct file *file, struct address_space *mapping,
3157 			loff_t pos, unsigned len, unsigned copied,
3158 			struct page *page, void *fsdata)
3159 {
3160 	int rc;
3161 	struct inode *inode = mapping->host;
3162 	struct cifsFileInfo *cfile = file->private_data;
3163 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
3164 	struct folio *folio = page_folio(page);
3165 	__u32 pid;
3166 
3167 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3168 		pid = cfile->pid;
3169 	else
3170 		pid = current->tgid;
3171 
3172 	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
3173 		 page, pos, copied);
3174 
3175 	if (folio_test_checked(folio)) {
3176 		if (copied == len)
3177 			folio_mark_uptodate(folio);
3178 		folio_clear_checked(folio);
3179 	} else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE)
3180 		folio_mark_uptodate(folio);
3181 
3182 	if (!folio_test_uptodate(folio)) {
3183 		char *page_data;
3184 		unsigned offset = pos & (PAGE_SIZE - 1);
3185 		unsigned int xid;
3186 
3187 		xid = get_xid();
3188 		/* this is probably better than directly calling
3189 		   partialpage_write since in this function the file handle is
3190 		   known which we might as well	leverage */
3191 		/* BB check if anything else missing out of ppw
3192 		   such as updating last write time */
3193 		page_data = kmap(page);
3194 		rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
3195 		/* if (rc < 0) should we set writebehind rc? */
3196 		kunmap(page);
3197 
3198 		free_xid(xid);
3199 	} else {
3200 		rc = copied;
3201 		pos += copied;
3202 		set_page_dirty(page);
3203 	}
3204 
3205 	if (rc > 0) {
3206 		spin_lock(&inode->i_lock);
3207 		if (pos > inode->i_size) {
3208 			loff_t additional_blocks = (512 - 1 + copied) >> 9;
3209 
3210 			i_size_write(inode, pos);
3211 			/*
3212 			 * Estimate new allocation size based on the amount written.
3213 			 * This will be updated from server on close (and on queryinfo)
3214 			 */
3215 			inode->i_blocks = min_t(blkcnt_t, (512 - 1 + pos) >> 9,
3216 						inode->i_blocks + additional_blocks);
3217 		}
3218 		spin_unlock(&inode->i_lock);
3219 	}
3220 
3221 	unlock_page(page);
3222 	put_page(page);
3223 	/* Indication to update ctime and mtime as close is deferred */
3224 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3225 
3226 	return rc;
3227 }
3228 
3229 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3230 		      int datasync)
3231 {
3232 	unsigned int xid;
3233 	int rc = 0;
3234 	struct cifs_tcon *tcon;
3235 	struct TCP_Server_Info *server;
3236 	struct cifsFileInfo *smbfile = file->private_data;
3237 	struct inode *inode = file_inode(file);
3238 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3239 
3240 	rc = file_write_and_wait_range(file, start, end);
3241 	if (rc) {
3242 		trace_cifs_fsync_err(inode->i_ino, rc);
3243 		return rc;
3244 	}
3245 
3246 	xid = get_xid();
3247 
3248 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3249 		 file, datasync);
3250 
3251 	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3252 		rc = cifs_zap_mapping(inode);
3253 		if (rc) {
3254 			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3255 			rc = 0; /* don't care about it in fsync */
3256 		}
3257 	}
3258 
3259 	tcon = tlink_tcon(smbfile->tlink);
3260 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3261 		server = tcon->ses->server;
3262 		if (server->ops->flush == NULL) {
3263 			rc = -ENOSYS;
3264 			goto strict_fsync_exit;
3265 		}
3266 
3267 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3268 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3269 			if (smbfile) {
3270 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3271 				cifsFileInfo_put(smbfile);
3272 			} else
3273 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3274 		} else
3275 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3276 	}
3277 
3278 strict_fsync_exit:
3279 	free_xid(xid);
3280 	return rc;
3281 }
3282 
3283 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3284 {
3285 	unsigned int xid;
3286 	int rc = 0;
3287 	struct cifs_tcon *tcon;
3288 	struct TCP_Server_Info *server;
3289 	struct cifsFileInfo *smbfile = file->private_data;
3290 	struct inode *inode = file_inode(file);
3291 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3292 
3293 	rc = file_write_and_wait_range(file, start, end);
3294 	if (rc) {
3295 		trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3296 		return rc;
3297 	}
3298 
3299 	xid = get_xid();
3300 
3301 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3302 		 file, datasync);
3303 
3304 	tcon = tlink_tcon(smbfile->tlink);
3305 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3306 		server = tcon->ses->server;
3307 		if (server->ops->flush == NULL) {
3308 			rc = -ENOSYS;
3309 			goto fsync_exit;
3310 		}
3311 
3312 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3313 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3314 			if (smbfile) {
3315 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3316 				cifsFileInfo_put(smbfile);
3317 			} else
3318 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3319 		} else
3320 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3321 	}
3322 
3323 fsync_exit:
3324 	free_xid(xid);
3325 	return rc;
3326 }
3327 
3328 /*
3329  * As file closes, flush all cached write data for this inode checking
3330  * for write behind errors.
3331  */
3332 int cifs_flush(struct file *file, fl_owner_t id)
3333 {
3334 	struct inode *inode = file_inode(file);
3335 	int rc = 0;
3336 
3337 	if (file->f_mode & FMODE_WRITE)
3338 		rc = filemap_write_and_wait(inode->i_mapping);
3339 
3340 	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3341 	if (rc) {
3342 		/* get more nuanced writeback errors */
3343 		rc = filemap_check_wb_err(file->f_mapping, 0);
3344 		trace_cifs_flush_err(inode->i_ino, rc);
3345 	}
3346 	return rc;
3347 }
3348 
3349 static void
3350 cifs_uncached_writedata_release(struct kref *refcount)
3351 {
3352 	struct cifs_writedata *wdata = container_of(refcount,
3353 					struct cifs_writedata, refcount);
3354 
3355 	kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3356 	cifs_writedata_release(refcount);
3357 }
3358 
3359 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3360 
3361 static void
3362 cifs_uncached_writev_complete(struct work_struct *work)
3363 {
3364 	struct cifs_writedata *wdata = container_of(work,
3365 					struct cifs_writedata, work);
3366 	struct inode *inode = d_inode(wdata->cfile->dentry);
3367 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
3368 
3369 	spin_lock(&inode->i_lock);
3370 	cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3371 	if (cifsi->server_eof > inode->i_size)
3372 		i_size_write(inode, cifsi->server_eof);
3373 	spin_unlock(&inode->i_lock);
3374 
3375 	complete(&wdata->done);
3376 	collect_uncached_write_data(wdata->ctx);
3377 	/* the below call can possibly free the last ref to aio ctx */
3378 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3379 }
3380 
3381 static int
3382 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3383 	struct cifs_aio_ctx *ctx)
3384 {
3385 	unsigned int wsize;
3386 	struct cifs_credits credits;
3387 	int rc;
3388 	struct TCP_Server_Info *server = wdata->server;
3389 
3390 	do {
3391 		if (wdata->cfile->invalidHandle) {
3392 			rc = cifs_reopen_file(wdata->cfile, false);
3393 			if (rc == -EAGAIN)
3394 				continue;
3395 			else if (rc)
3396 				break;
3397 		}
3398 
3399 
3400 		/*
3401 		 * Wait for credits to resend this wdata.
3402 		 * Note: we are attempting to resend the whole wdata not in
3403 		 * segments
3404 		 */
3405 		do {
3406 			rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3407 						&wsize, &credits);
3408 			if (rc)
3409 				goto fail;
3410 
3411 			if (wsize < wdata->bytes) {
3412 				add_credits_and_wake_if(server, &credits, 0);
3413 				msleep(1000);
3414 			}
3415 		} while (wsize < wdata->bytes);
3416 		wdata->credits = credits;
3417 
3418 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3419 
3420 		if (!rc) {
3421 			if (wdata->cfile->invalidHandle)
3422 				rc = -EAGAIN;
3423 			else {
3424 				wdata->replay = true;
3425 #ifdef CONFIG_CIFS_SMB_DIRECT
3426 				if (wdata->mr) {
3427 					wdata->mr->need_invalidate = true;
3428 					smbd_deregister_mr(wdata->mr);
3429 					wdata->mr = NULL;
3430 				}
3431 #endif
3432 				rc = server->ops->async_writev(wdata,
3433 					cifs_uncached_writedata_release);
3434 			}
3435 		}
3436 
3437 		/* If the write was successfully sent, we are done */
3438 		if (!rc) {
3439 			list_add_tail(&wdata->list, wdata_list);
3440 			return 0;
3441 		}
3442 
3443 		/* Roll back credits and retry if needed */
3444 		add_credits_and_wake_if(server, &wdata->credits, 0);
3445 	} while (rc == -EAGAIN);
3446 
3447 fail:
3448 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3449 	return rc;
3450 }
3451 
3452 /*
3453  * Select span of a bvec iterator we're going to use.  Limit it by both maximum
3454  * size and maximum number of segments.
3455  */
3456 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size,
3457 				     size_t max_segs, unsigned int *_nsegs)
3458 {
3459 	const struct bio_vec *bvecs = iter->bvec;
3460 	unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
3461 	size_t len, span = 0, n = iter->count;
3462 	size_t skip = iter->iov_offset;
3463 
3464 	if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0)
3465 		return 0;
3466 
3467 	while (n && ix < nbv && skip) {
3468 		len = bvecs[ix].bv_len;
3469 		if (skip < len)
3470 			break;
3471 		skip -= len;
3472 		n -= len;
3473 		ix++;
3474 	}
3475 
3476 	while (n && ix < nbv) {
3477 		len = min3(n, bvecs[ix].bv_len - skip, max_size);
3478 		span += len;
3479 		max_size -= len;
3480 		nsegs++;
3481 		ix++;
3482 		if (max_size == 0 || nsegs >= max_segs)
3483 			break;
3484 		skip = 0;
3485 		n -= len;
3486 	}
3487 
3488 	*_nsegs = nsegs;
3489 	return span;
3490 }
3491 
3492 static int
3493 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from,
3494 		     struct cifsFileInfo *open_file,
3495 		     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3496 		     struct cifs_aio_ctx *ctx)
3497 {
3498 	int rc = 0;
3499 	size_t cur_len, max_len;
3500 	struct cifs_writedata *wdata;
3501 	pid_t pid;
3502 	struct TCP_Server_Info *server;
3503 	unsigned int xid, max_segs = INT_MAX;
3504 
3505 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3506 		pid = open_file->pid;
3507 	else
3508 		pid = current->tgid;
3509 
3510 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3511 	xid = get_xid();
3512 
3513 #ifdef CONFIG_CIFS_SMB_DIRECT
3514 	if (server->smbd_conn)
3515 		max_segs = server->smbd_conn->max_frmr_depth;
3516 #endif
3517 
3518 	do {
3519 		struct cifs_credits credits_on_stack;
3520 		struct cifs_credits *credits = &credits_on_stack;
3521 		unsigned int wsize, nsegs = 0;
3522 
3523 		if (signal_pending(current)) {
3524 			rc = -EINTR;
3525 			break;
3526 		}
3527 
3528 		if (open_file->invalidHandle) {
3529 			rc = cifs_reopen_file(open_file, false);
3530 			if (rc == -EAGAIN)
3531 				continue;
3532 			else if (rc)
3533 				break;
3534 		}
3535 
3536 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3537 						   &wsize, credits);
3538 		if (rc)
3539 			break;
3540 
3541 		max_len = min_t(const size_t, len, wsize);
3542 		if (!max_len) {
3543 			rc = -EAGAIN;
3544 			add_credits_and_wake_if(server, credits, 0);
3545 			break;
3546 		}
3547 
3548 		cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs);
3549 		cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3550 			 cur_len, max_len, nsegs, from->nr_segs, max_segs);
3551 		if (cur_len == 0) {
3552 			rc = -EIO;
3553 			add_credits_and_wake_if(server, credits, 0);
3554 			break;
3555 		}
3556 
3557 		wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
3558 		if (!wdata) {
3559 			rc = -ENOMEM;
3560 			add_credits_and_wake_if(server, credits, 0);
3561 			break;
3562 		}
3563 
3564 		wdata->sync_mode = WB_SYNC_ALL;
3565 		wdata->offset	= (__u64)fpos;
3566 		wdata->cfile	= cifsFileInfo_get(open_file);
3567 		wdata->server	= server;
3568 		wdata->pid	= pid;
3569 		wdata->bytes	= cur_len;
3570 		wdata->credits	= credits_on_stack;
3571 		wdata->iter	= *from;
3572 		wdata->ctx	= ctx;
3573 		kref_get(&ctx->refcount);
3574 
3575 		iov_iter_truncate(&wdata->iter, cur_len);
3576 
3577 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3578 
3579 		if (!rc) {
3580 			if (wdata->cfile->invalidHandle)
3581 				rc = -EAGAIN;
3582 			else
3583 				rc = server->ops->async_writev(wdata,
3584 					cifs_uncached_writedata_release);
3585 		}
3586 
3587 		if (rc) {
3588 			add_credits_and_wake_if(server, &wdata->credits, 0);
3589 			kref_put(&wdata->refcount,
3590 				 cifs_uncached_writedata_release);
3591 			if (rc == -EAGAIN)
3592 				continue;
3593 			break;
3594 		}
3595 
3596 		list_add_tail(&wdata->list, wdata_list);
3597 		iov_iter_advance(from, cur_len);
3598 		fpos += cur_len;
3599 		len -= cur_len;
3600 	} while (len > 0);
3601 
3602 	free_xid(xid);
3603 	return rc;
3604 }
3605 
3606 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3607 {
3608 	struct cifs_writedata *wdata, *tmp;
3609 	struct cifs_tcon *tcon;
3610 	struct cifs_sb_info *cifs_sb;
3611 	struct dentry *dentry = ctx->cfile->dentry;
3612 	ssize_t rc;
3613 
3614 	tcon = tlink_tcon(ctx->cfile->tlink);
3615 	cifs_sb = CIFS_SB(dentry->d_sb);
3616 
3617 	mutex_lock(&ctx->aio_mutex);
3618 
3619 	if (list_empty(&ctx->list)) {
3620 		mutex_unlock(&ctx->aio_mutex);
3621 		return;
3622 	}
3623 
3624 	rc = ctx->rc;
3625 	/*
3626 	 * Wait for and collect replies for any successful sends in order of
3627 	 * increasing offset. Once an error is hit, then return without waiting
3628 	 * for any more replies.
3629 	 */
3630 restart_loop:
3631 	list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3632 		if (!rc) {
3633 			if (!try_wait_for_completion(&wdata->done)) {
3634 				mutex_unlock(&ctx->aio_mutex);
3635 				return;
3636 			}
3637 
3638 			if (wdata->result)
3639 				rc = wdata->result;
3640 			else
3641 				ctx->total_len += wdata->bytes;
3642 
3643 			/* resend call if it's a retryable error */
3644 			if (rc == -EAGAIN) {
3645 				struct list_head tmp_list;
3646 				struct iov_iter tmp_from = ctx->iter;
3647 
3648 				INIT_LIST_HEAD(&tmp_list);
3649 				list_del_init(&wdata->list);
3650 
3651 				if (ctx->direct_io)
3652 					rc = cifs_resend_wdata(
3653 						wdata, &tmp_list, ctx);
3654 				else {
3655 					iov_iter_advance(&tmp_from,
3656 						 wdata->offset - ctx->pos);
3657 
3658 					rc = cifs_write_from_iter(wdata->offset,
3659 						wdata->bytes, &tmp_from,
3660 						ctx->cfile, cifs_sb, &tmp_list,
3661 						ctx);
3662 
3663 					kref_put(&wdata->refcount,
3664 						cifs_uncached_writedata_release);
3665 				}
3666 
3667 				list_splice(&tmp_list, &ctx->list);
3668 				goto restart_loop;
3669 			}
3670 		}
3671 		list_del_init(&wdata->list);
3672 		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3673 	}
3674 
3675 	cifs_stats_bytes_written(tcon, ctx->total_len);
3676 	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3677 
3678 	ctx->rc = (rc == 0) ? ctx->total_len : rc;
3679 
3680 	mutex_unlock(&ctx->aio_mutex);
3681 
3682 	if (ctx->iocb && ctx->iocb->ki_complete)
3683 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3684 	else
3685 		complete(&ctx->done);
3686 }
3687 
3688 static ssize_t __cifs_writev(
3689 	struct kiocb *iocb, struct iov_iter *from, bool direct)
3690 {
3691 	struct file *file = iocb->ki_filp;
3692 	ssize_t total_written = 0;
3693 	struct cifsFileInfo *cfile;
3694 	struct cifs_tcon *tcon;
3695 	struct cifs_sb_info *cifs_sb;
3696 	struct cifs_aio_ctx *ctx;
3697 	int rc;
3698 
3699 	rc = generic_write_checks(iocb, from);
3700 	if (rc <= 0)
3701 		return rc;
3702 
3703 	cifs_sb = CIFS_FILE_SB(file);
3704 	cfile = file->private_data;
3705 	tcon = tlink_tcon(cfile->tlink);
3706 
3707 	if (!tcon->ses->server->ops->async_writev)
3708 		return -ENOSYS;
3709 
3710 	ctx = cifs_aio_ctx_alloc();
3711 	if (!ctx)
3712 		return -ENOMEM;
3713 
3714 	ctx->cfile = cifsFileInfo_get(cfile);
3715 
3716 	if (!is_sync_kiocb(iocb))
3717 		ctx->iocb = iocb;
3718 
3719 	ctx->pos = iocb->ki_pos;
3720 	ctx->direct_io = direct;
3721 	ctx->nr_pinned_pages = 0;
3722 
3723 	if (user_backed_iter(from)) {
3724 		/*
3725 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
3726 		 * they contain references to the calling process's virtual
3727 		 * memory layout which won't be available in an async worker
3728 		 * thread.  This also takes a pin on every folio involved.
3729 		 */
3730 		rc = netfs_extract_user_iter(from, iov_iter_count(from),
3731 					     &ctx->iter, 0);
3732 		if (rc < 0) {
3733 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3734 			return rc;
3735 		}
3736 
3737 		ctx->nr_pinned_pages = rc;
3738 		ctx->bv = (void *)ctx->iter.bvec;
3739 		ctx->bv_need_unpin = iov_iter_extract_will_pin(from);
3740 	} else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
3741 		   !is_sync_kiocb(iocb)) {
3742 		/*
3743 		 * If the op is asynchronous, we need to copy the list attached
3744 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
3745 		 * will be pinned by the caller; in any case, we may or may not
3746 		 * be able to pin the pages, so we don't try.
3747 		 */
3748 		ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL);
3749 		if (!ctx->bv) {
3750 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3751 			return -ENOMEM;
3752 		}
3753 	} else {
3754 		/*
3755 		 * Otherwise, we just pass the iterator down as-is and rely on
3756 		 * the caller to make sure the pages referred to by the
3757 		 * iterator don't evaporate.
3758 		 */
3759 		ctx->iter = *from;
3760 	}
3761 
3762 	ctx->len = iov_iter_count(&ctx->iter);
3763 
3764 	/* grab a lock here due to read response handlers can access ctx */
3765 	mutex_lock(&ctx->aio_mutex);
3766 
3767 	rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter,
3768 				  cfile, cifs_sb, &ctx->list, ctx);
3769 
3770 	/*
3771 	 * If at least one write was successfully sent, then discard any rc
3772 	 * value from the later writes. If the other write succeeds, then
3773 	 * we'll end up returning whatever was written. If it fails, then
3774 	 * we'll get a new rc value from that.
3775 	 */
3776 	if (!list_empty(&ctx->list))
3777 		rc = 0;
3778 
3779 	mutex_unlock(&ctx->aio_mutex);
3780 
3781 	if (rc) {
3782 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3783 		return rc;
3784 	}
3785 
3786 	if (!is_sync_kiocb(iocb)) {
3787 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3788 		return -EIOCBQUEUED;
3789 	}
3790 
3791 	rc = wait_for_completion_killable(&ctx->done);
3792 	if (rc) {
3793 		mutex_lock(&ctx->aio_mutex);
3794 		ctx->rc = rc = -EINTR;
3795 		total_written = ctx->total_len;
3796 		mutex_unlock(&ctx->aio_mutex);
3797 	} else {
3798 		rc = ctx->rc;
3799 		total_written = ctx->total_len;
3800 	}
3801 
3802 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
3803 
3804 	if (unlikely(!total_written))
3805 		return rc;
3806 
3807 	iocb->ki_pos += total_written;
3808 	return total_written;
3809 }
3810 
3811 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3812 {
3813 	struct file *file = iocb->ki_filp;
3814 
3815 	cifs_revalidate_mapping(file->f_inode);
3816 	return __cifs_writev(iocb, from, true);
3817 }
3818 
3819 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3820 {
3821 	return __cifs_writev(iocb, from, false);
3822 }
3823 
3824 static ssize_t
3825 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3826 {
3827 	struct file *file = iocb->ki_filp;
3828 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3829 	struct inode *inode = file->f_mapping->host;
3830 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3831 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3832 	ssize_t rc;
3833 
3834 	inode_lock(inode);
3835 	/*
3836 	 * We need to hold the sem to be sure nobody modifies lock list
3837 	 * with a brlock that prevents writing.
3838 	 */
3839 	down_read(&cinode->lock_sem);
3840 
3841 	rc = generic_write_checks(iocb, from);
3842 	if (rc <= 0)
3843 		goto out;
3844 
3845 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3846 				     server->vals->exclusive_lock_type, 0,
3847 				     NULL, CIFS_WRITE_OP))
3848 		rc = __generic_file_write_iter(iocb, from);
3849 	else
3850 		rc = -EACCES;
3851 out:
3852 	up_read(&cinode->lock_sem);
3853 	inode_unlock(inode);
3854 
3855 	if (rc > 0)
3856 		rc = generic_write_sync(iocb, rc);
3857 	return rc;
3858 }
3859 
3860 ssize_t
3861 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3862 {
3863 	struct inode *inode = file_inode(iocb->ki_filp);
3864 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3865 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3866 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3867 						iocb->ki_filp->private_data;
3868 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3869 	ssize_t written;
3870 
3871 	written = cifs_get_writer(cinode);
3872 	if (written)
3873 		return written;
3874 
3875 	if (CIFS_CACHE_WRITE(cinode)) {
3876 		if (cap_unix(tcon->ses) &&
3877 		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3878 		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3879 			written = generic_file_write_iter(iocb, from);
3880 			goto out;
3881 		}
3882 		written = cifs_writev(iocb, from);
3883 		goto out;
3884 	}
3885 	/*
3886 	 * For non-oplocked files in strict cache mode we need to write the data
3887 	 * to the server exactly from the pos to pos+len-1 rather than flush all
3888 	 * affected pages because it may cause a error with mandatory locks on
3889 	 * these pages but not on the region from pos to ppos+len-1.
3890 	 */
3891 	written = cifs_user_writev(iocb, from);
3892 	if (CIFS_CACHE_READ(cinode)) {
3893 		/*
3894 		 * We have read level caching and we have just sent a write
3895 		 * request to the server thus making data in the cache stale.
3896 		 * Zap the cache and set oplock/lease level to NONE to avoid
3897 		 * reading stale data from the cache. All subsequent read
3898 		 * operations will read new data from the server.
3899 		 */
3900 		cifs_zap_mapping(inode);
3901 		cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3902 			 inode);
3903 		cinode->oplock = 0;
3904 	}
3905 out:
3906 	cifs_put_writer(cinode);
3907 	return written;
3908 }
3909 
3910 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
3911 {
3912 	struct cifs_readdata *rdata;
3913 
3914 	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3915 	if (rdata) {
3916 		kref_init(&rdata->refcount);
3917 		INIT_LIST_HEAD(&rdata->list);
3918 		init_completion(&rdata->done);
3919 		INIT_WORK(&rdata->work, complete);
3920 	}
3921 
3922 	return rdata;
3923 }
3924 
3925 void
3926 cifs_readdata_release(struct kref *refcount)
3927 {
3928 	struct cifs_readdata *rdata = container_of(refcount,
3929 					struct cifs_readdata, refcount);
3930 
3931 	if (rdata->ctx)
3932 		kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3933 #ifdef CONFIG_CIFS_SMB_DIRECT
3934 	if (rdata->mr) {
3935 		smbd_deregister_mr(rdata->mr);
3936 		rdata->mr = NULL;
3937 	}
3938 #endif
3939 	if (rdata->cfile)
3940 		cifsFileInfo_put(rdata->cfile);
3941 
3942 	kfree(rdata);
3943 }
3944 
3945 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3946 
3947 static void
3948 cifs_uncached_readv_complete(struct work_struct *work)
3949 {
3950 	struct cifs_readdata *rdata = container_of(work,
3951 						struct cifs_readdata, work);
3952 
3953 	complete(&rdata->done);
3954 	collect_uncached_read_data(rdata->ctx);
3955 	/* the below call can possibly free the last ref to aio ctx */
3956 	kref_put(&rdata->refcount, cifs_readdata_release);
3957 }
3958 
3959 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3960 			struct list_head *rdata_list,
3961 			struct cifs_aio_ctx *ctx)
3962 {
3963 	unsigned int rsize;
3964 	struct cifs_credits credits;
3965 	int rc;
3966 	struct TCP_Server_Info *server;
3967 
3968 	/* XXX: should we pick a new channel here? */
3969 	server = rdata->server;
3970 
3971 	do {
3972 		if (rdata->cfile->invalidHandle) {
3973 			rc = cifs_reopen_file(rdata->cfile, true);
3974 			if (rc == -EAGAIN)
3975 				continue;
3976 			else if (rc)
3977 				break;
3978 		}
3979 
3980 		/*
3981 		 * Wait for credits to resend this rdata.
3982 		 * Note: we are attempting to resend the whole rdata not in
3983 		 * segments
3984 		 */
3985 		do {
3986 			rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3987 						&rsize, &credits);
3988 
3989 			if (rc)
3990 				goto fail;
3991 
3992 			if (rsize < rdata->bytes) {
3993 				add_credits_and_wake_if(server, &credits, 0);
3994 				msleep(1000);
3995 			}
3996 		} while (rsize < rdata->bytes);
3997 		rdata->credits = credits;
3998 
3999 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4000 		if (!rc) {
4001 			if (rdata->cfile->invalidHandle)
4002 				rc = -EAGAIN;
4003 			else {
4004 #ifdef CONFIG_CIFS_SMB_DIRECT
4005 				if (rdata->mr) {
4006 					rdata->mr->need_invalidate = true;
4007 					smbd_deregister_mr(rdata->mr);
4008 					rdata->mr = NULL;
4009 				}
4010 #endif
4011 				rc = server->ops->async_readv(rdata);
4012 			}
4013 		}
4014 
4015 		/* If the read was successfully sent, we are done */
4016 		if (!rc) {
4017 			/* Add to aio pending list */
4018 			list_add_tail(&rdata->list, rdata_list);
4019 			return 0;
4020 		}
4021 
4022 		/* Roll back credits and retry if needed */
4023 		add_credits_and_wake_if(server, &rdata->credits, 0);
4024 	} while (rc == -EAGAIN);
4025 
4026 fail:
4027 	kref_put(&rdata->refcount, cifs_readdata_release);
4028 	return rc;
4029 }
4030 
4031 static int
4032 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file,
4033 		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
4034 		     struct cifs_aio_ctx *ctx)
4035 {
4036 	struct cifs_readdata *rdata;
4037 	unsigned int rsize, nsegs, max_segs = INT_MAX;
4038 	struct cifs_credits credits_on_stack;
4039 	struct cifs_credits *credits = &credits_on_stack;
4040 	size_t cur_len, max_len;
4041 	int rc;
4042 	pid_t pid;
4043 	struct TCP_Server_Info *server;
4044 
4045 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4046 
4047 #ifdef CONFIG_CIFS_SMB_DIRECT
4048 	if (server->smbd_conn)
4049 		max_segs = server->smbd_conn->max_frmr_depth;
4050 #endif
4051 
4052 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4053 		pid = open_file->pid;
4054 	else
4055 		pid = current->tgid;
4056 
4057 	do {
4058 		if (open_file->invalidHandle) {
4059 			rc = cifs_reopen_file(open_file, true);
4060 			if (rc == -EAGAIN)
4061 				continue;
4062 			else if (rc)
4063 				break;
4064 		}
4065 
4066 		if (cifs_sb->ctx->rsize == 0)
4067 			cifs_sb->ctx->rsize =
4068 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4069 							     cifs_sb->ctx);
4070 
4071 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4072 						   &rsize, credits);
4073 		if (rc)
4074 			break;
4075 
4076 		max_len = min_t(size_t, len, rsize);
4077 
4078 		cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len,
4079 						 max_segs, &nsegs);
4080 		cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n",
4081 			 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs);
4082 		if (cur_len == 0) {
4083 			rc = -EIO;
4084 			add_credits_and_wake_if(server, credits, 0);
4085 			break;
4086 		}
4087 
4088 		rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
4089 		if (!rdata) {
4090 			add_credits_and_wake_if(server, credits, 0);
4091 			rc = -ENOMEM;
4092 			break;
4093 		}
4094 
4095 		rdata->server	= server;
4096 		rdata->cfile	= cifsFileInfo_get(open_file);
4097 		rdata->offset	= fpos;
4098 		rdata->bytes	= cur_len;
4099 		rdata->pid	= pid;
4100 		rdata->credits	= credits_on_stack;
4101 		rdata->ctx	= ctx;
4102 		kref_get(&ctx->refcount);
4103 
4104 		rdata->iter	= ctx->iter;
4105 		iov_iter_truncate(&rdata->iter, cur_len);
4106 
4107 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4108 
4109 		if (!rc) {
4110 			if (rdata->cfile->invalidHandle)
4111 				rc = -EAGAIN;
4112 			else
4113 				rc = server->ops->async_readv(rdata);
4114 		}
4115 
4116 		if (rc) {
4117 			add_credits_and_wake_if(server, &rdata->credits, 0);
4118 			kref_put(&rdata->refcount, cifs_readdata_release);
4119 			if (rc == -EAGAIN)
4120 				continue;
4121 			break;
4122 		}
4123 
4124 		list_add_tail(&rdata->list, rdata_list);
4125 		iov_iter_advance(&ctx->iter, cur_len);
4126 		fpos += cur_len;
4127 		len -= cur_len;
4128 	} while (len > 0);
4129 
4130 	return rc;
4131 }
4132 
4133 static void
4134 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4135 {
4136 	struct cifs_readdata *rdata, *tmp;
4137 	struct cifs_sb_info *cifs_sb;
4138 	int rc;
4139 
4140 	cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4141 
4142 	mutex_lock(&ctx->aio_mutex);
4143 
4144 	if (list_empty(&ctx->list)) {
4145 		mutex_unlock(&ctx->aio_mutex);
4146 		return;
4147 	}
4148 
4149 	rc = ctx->rc;
4150 	/* the loop below should proceed in the order of increasing offsets */
4151 again:
4152 	list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4153 		if (!rc) {
4154 			if (!try_wait_for_completion(&rdata->done)) {
4155 				mutex_unlock(&ctx->aio_mutex);
4156 				return;
4157 			}
4158 
4159 			if (rdata->result == -EAGAIN) {
4160 				/* resend call if it's a retryable error */
4161 				struct list_head tmp_list;
4162 				unsigned int got_bytes = rdata->got_bytes;
4163 
4164 				list_del_init(&rdata->list);
4165 				INIT_LIST_HEAD(&tmp_list);
4166 
4167 				if (ctx->direct_io) {
4168 					/*
4169 					 * Re-use rdata as this is a
4170 					 * direct I/O
4171 					 */
4172 					rc = cifs_resend_rdata(
4173 						rdata,
4174 						&tmp_list, ctx);
4175 				} else {
4176 					rc = cifs_send_async_read(
4177 						rdata->offset + got_bytes,
4178 						rdata->bytes - got_bytes,
4179 						rdata->cfile, cifs_sb,
4180 						&tmp_list, ctx);
4181 
4182 					kref_put(&rdata->refcount,
4183 						cifs_readdata_release);
4184 				}
4185 
4186 				list_splice(&tmp_list, &ctx->list);
4187 
4188 				goto again;
4189 			} else if (rdata->result)
4190 				rc = rdata->result;
4191 
4192 			/* if there was a short read -- discard anything left */
4193 			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4194 				rc = -ENODATA;
4195 
4196 			ctx->total_len += rdata->got_bytes;
4197 		}
4198 		list_del_init(&rdata->list);
4199 		kref_put(&rdata->refcount, cifs_readdata_release);
4200 	}
4201 
4202 	/* mask nodata case */
4203 	if (rc == -ENODATA)
4204 		rc = 0;
4205 
4206 	ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4207 
4208 	mutex_unlock(&ctx->aio_mutex);
4209 
4210 	if (ctx->iocb && ctx->iocb->ki_complete)
4211 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4212 	else
4213 		complete(&ctx->done);
4214 }
4215 
4216 static ssize_t __cifs_readv(
4217 	struct kiocb *iocb, struct iov_iter *to, bool direct)
4218 {
4219 	size_t len;
4220 	struct file *file = iocb->ki_filp;
4221 	struct cifs_sb_info *cifs_sb;
4222 	struct cifsFileInfo *cfile;
4223 	struct cifs_tcon *tcon;
4224 	ssize_t rc, total_read = 0;
4225 	loff_t offset = iocb->ki_pos;
4226 	struct cifs_aio_ctx *ctx;
4227 
4228 	len = iov_iter_count(to);
4229 	if (!len)
4230 		return 0;
4231 
4232 	cifs_sb = CIFS_FILE_SB(file);
4233 	cfile = file->private_data;
4234 	tcon = tlink_tcon(cfile->tlink);
4235 
4236 	if (!tcon->ses->server->ops->async_readv)
4237 		return -ENOSYS;
4238 
4239 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4240 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4241 
4242 	ctx = cifs_aio_ctx_alloc();
4243 	if (!ctx)
4244 		return -ENOMEM;
4245 
4246 	ctx->pos	= offset;
4247 	ctx->direct_io	= direct;
4248 	ctx->len	= len;
4249 	ctx->cfile	= cifsFileInfo_get(cfile);
4250 	ctx->nr_pinned_pages = 0;
4251 
4252 	if (!is_sync_kiocb(iocb))
4253 		ctx->iocb = iocb;
4254 
4255 	if (user_backed_iter(to)) {
4256 		/*
4257 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
4258 		 * they contain references to the calling process's virtual
4259 		 * memory layout which won't be available in an async worker
4260 		 * thread.  This also takes a pin on every folio involved.
4261 		 */
4262 		rc = netfs_extract_user_iter(to, iov_iter_count(to),
4263 					     &ctx->iter, 0);
4264 		if (rc < 0) {
4265 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4266 			return rc;
4267 		}
4268 
4269 		ctx->nr_pinned_pages = rc;
4270 		ctx->bv = (void *)ctx->iter.bvec;
4271 		ctx->bv_need_unpin = iov_iter_extract_will_pin(to);
4272 		ctx->should_dirty = true;
4273 	} else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
4274 		   !is_sync_kiocb(iocb)) {
4275 		/*
4276 		 * If the op is asynchronous, we need to copy the list attached
4277 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
4278 		 * will be retained by the caller; in any case, we may or may
4279 		 * not be able to pin the pages, so we don't try.
4280 		 */
4281 		ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL);
4282 		if (!ctx->bv) {
4283 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4284 			return -ENOMEM;
4285 		}
4286 	} else {
4287 		/*
4288 		 * Otherwise, we just pass the iterator down as-is and rely on
4289 		 * the caller to make sure the pages referred to by the
4290 		 * iterator don't evaporate.
4291 		 */
4292 		ctx->iter = *to;
4293 	}
4294 
4295 	if (direct) {
4296 		rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4297 						  offset, offset + len - 1);
4298 		if (rc) {
4299 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4300 			return -EAGAIN;
4301 		}
4302 	}
4303 
4304 	/* grab a lock here due to read response handlers can access ctx */
4305 	mutex_lock(&ctx->aio_mutex);
4306 
4307 	rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4308 
4309 	/* if at least one read request send succeeded, then reset rc */
4310 	if (!list_empty(&ctx->list))
4311 		rc = 0;
4312 
4313 	mutex_unlock(&ctx->aio_mutex);
4314 
4315 	if (rc) {
4316 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4317 		return rc;
4318 	}
4319 
4320 	if (!is_sync_kiocb(iocb)) {
4321 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4322 		return -EIOCBQUEUED;
4323 	}
4324 
4325 	rc = wait_for_completion_killable(&ctx->done);
4326 	if (rc) {
4327 		mutex_lock(&ctx->aio_mutex);
4328 		ctx->rc = rc = -EINTR;
4329 		total_read = ctx->total_len;
4330 		mutex_unlock(&ctx->aio_mutex);
4331 	} else {
4332 		rc = ctx->rc;
4333 		total_read = ctx->total_len;
4334 	}
4335 
4336 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
4337 
4338 	if (total_read) {
4339 		iocb->ki_pos += total_read;
4340 		return total_read;
4341 	}
4342 	return rc;
4343 }
4344 
4345 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4346 {
4347 	return __cifs_readv(iocb, to, true);
4348 }
4349 
4350 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4351 {
4352 	return __cifs_readv(iocb, to, false);
4353 }
4354 
4355 ssize_t
4356 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4357 {
4358 	struct inode *inode = file_inode(iocb->ki_filp);
4359 	struct cifsInodeInfo *cinode = CIFS_I(inode);
4360 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4361 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4362 						iocb->ki_filp->private_data;
4363 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4364 	int rc = -EACCES;
4365 
4366 	/*
4367 	 * In strict cache mode we need to read from the server all the time
4368 	 * if we don't have level II oplock because the server can delay mtime
4369 	 * change - so we can't make a decision about inode invalidating.
4370 	 * And we can also fail with pagereading if there are mandatory locks
4371 	 * on pages affected by this read but not on the region from pos to
4372 	 * pos+len-1.
4373 	 */
4374 	if (!CIFS_CACHE_READ(cinode))
4375 		return cifs_user_readv(iocb, to);
4376 
4377 	if (cap_unix(tcon->ses) &&
4378 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4379 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4380 		return generic_file_read_iter(iocb, to);
4381 
4382 	/*
4383 	 * We need to hold the sem to be sure nobody modifies lock list
4384 	 * with a brlock that prevents reading.
4385 	 */
4386 	down_read(&cinode->lock_sem);
4387 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4388 				     tcon->ses->server->vals->shared_lock_type,
4389 				     0, NULL, CIFS_READ_OP))
4390 		rc = generic_file_read_iter(iocb, to);
4391 	up_read(&cinode->lock_sem);
4392 	return rc;
4393 }
4394 
4395 static ssize_t
4396 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4397 {
4398 	int rc = -EACCES;
4399 	unsigned int bytes_read = 0;
4400 	unsigned int total_read;
4401 	unsigned int current_read_size;
4402 	unsigned int rsize;
4403 	struct cifs_sb_info *cifs_sb;
4404 	struct cifs_tcon *tcon;
4405 	struct TCP_Server_Info *server;
4406 	unsigned int xid;
4407 	char *cur_offset;
4408 	struct cifsFileInfo *open_file;
4409 	struct cifs_io_parms io_parms = {0};
4410 	int buf_type = CIFS_NO_BUFFER;
4411 	__u32 pid;
4412 
4413 	xid = get_xid();
4414 	cifs_sb = CIFS_FILE_SB(file);
4415 
4416 	/* FIXME: set up handlers for larger reads and/or convert to async */
4417 	rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4418 
4419 	if (file->private_data == NULL) {
4420 		rc = -EBADF;
4421 		free_xid(xid);
4422 		return rc;
4423 	}
4424 	open_file = file->private_data;
4425 	tcon = tlink_tcon(open_file->tlink);
4426 	server = cifs_pick_channel(tcon->ses);
4427 
4428 	if (!server->ops->sync_read) {
4429 		free_xid(xid);
4430 		return -ENOSYS;
4431 	}
4432 
4433 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4434 		pid = open_file->pid;
4435 	else
4436 		pid = current->tgid;
4437 
4438 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4439 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4440 
4441 	for (total_read = 0, cur_offset = read_data; read_size > total_read;
4442 	     total_read += bytes_read, cur_offset += bytes_read) {
4443 		do {
4444 			current_read_size = min_t(uint, read_size - total_read,
4445 						  rsize);
4446 			/*
4447 			 * For windows me and 9x we do not want to request more
4448 			 * than it negotiated since it will refuse the read
4449 			 * then.
4450 			 */
4451 			if (!(tcon->ses->capabilities &
4452 				tcon->ses->server->vals->cap_large_files)) {
4453 				current_read_size = min_t(uint,
4454 					current_read_size, CIFSMaxBufSize);
4455 			}
4456 			if (open_file->invalidHandle) {
4457 				rc = cifs_reopen_file(open_file, true);
4458 				if (rc != 0)
4459 					break;
4460 			}
4461 			io_parms.pid = pid;
4462 			io_parms.tcon = tcon;
4463 			io_parms.offset = *offset;
4464 			io_parms.length = current_read_size;
4465 			io_parms.server = server;
4466 			rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4467 						    &bytes_read, &cur_offset,
4468 						    &buf_type);
4469 		} while (rc == -EAGAIN);
4470 
4471 		if (rc || (bytes_read == 0)) {
4472 			if (total_read) {
4473 				break;
4474 			} else {
4475 				free_xid(xid);
4476 				return rc;
4477 			}
4478 		} else {
4479 			cifs_stats_bytes_read(tcon, total_read);
4480 			*offset += bytes_read;
4481 		}
4482 	}
4483 	free_xid(xid);
4484 	return total_read;
4485 }
4486 
4487 /*
4488  * If the page is mmap'ed into a process' page tables, then we need to make
4489  * sure that it doesn't change while being written back.
4490  */
4491 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4492 {
4493 	struct folio *folio = page_folio(vmf->page);
4494 
4495 	/* Wait for the folio to be written to the cache before we allow it to
4496 	 * be modified.  We then assume the entire folio will need writing back.
4497 	 */
4498 #ifdef CONFIG_CIFS_FSCACHE
4499 	if (folio_test_fscache(folio) &&
4500 	    folio_wait_fscache_killable(folio) < 0)
4501 		return VM_FAULT_RETRY;
4502 #endif
4503 
4504 	folio_wait_writeback(folio);
4505 
4506 	if (folio_lock_killable(folio) < 0)
4507 		return VM_FAULT_RETRY;
4508 	return VM_FAULT_LOCKED;
4509 }
4510 
4511 static const struct vm_operations_struct cifs_file_vm_ops = {
4512 	.fault = filemap_fault,
4513 	.map_pages = filemap_map_pages,
4514 	.page_mkwrite = cifs_page_mkwrite,
4515 };
4516 
4517 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4518 {
4519 	int xid, rc = 0;
4520 	struct inode *inode = file_inode(file);
4521 
4522 	xid = get_xid();
4523 
4524 	if (!CIFS_CACHE_READ(CIFS_I(inode)))
4525 		rc = cifs_zap_mapping(inode);
4526 	if (!rc)
4527 		rc = generic_file_mmap(file, vma);
4528 	if (!rc)
4529 		vma->vm_ops = &cifs_file_vm_ops;
4530 
4531 	free_xid(xid);
4532 	return rc;
4533 }
4534 
4535 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4536 {
4537 	int rc, xid;
4538 
4539 	xid = get_xid();
4540 
4541 	rc = cifs_revalidate_file(file);
4542 	if (rc)
4543 		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4544 			 rc);
4545 	if (!rc)
4546 		rc = generic_file_mmap(file, vma);
4547 	if (!rc)
4548 		vma->vm_ops = &cifs_file_vm_ops;
4549 
4550 	free_xid(xid);
4551 	return rc;
4552 }
4553 
4554 /*
4555  * Unlock a bunch of folios in the pagecache.
4556  */
4557 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
4558 {
4559 	struct folio *folio;
4560 	XA_STATE(xas, &mapping->i_pages, first);
4561 
4562 	rcu_read_lock();
4563 	xas_for_each(&xas, folio, last) {
4564 		folio_unlock(folio);
4565 	}
4566 	rcu_read_unlock();
4567 }
4568 
4569 static void cifs_readahead_complete(struct work_struct *work)
4570 {
4571 	struct cifs_readdata *rdata = container_of(work,
4572 						   struct cifs_readdata, work);
4573 	struct folio *folio;
4574 	pgoff_t last;
4575 	bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
4576 
4577 	XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
4578 
4579 	if (good)
4580 		cifs_readahead_to_fscache(rdata->mapping->host,
4581 					  rdata->offset, rdata->bytes);
4582 
4583 	if (iov_iter_count(&rdata->iter) > 0)
4584 		iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
4585 
4586 	last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE;
4587 
4588 	rcu_read_lock();
4589 	xas_for_each(&xas, folio, last) {
4590 		if (good) {
4591 			flush_dcache_folio(folio);
4592 			folio_mark_uptodate(folio);
4593 		}
4594 		folio_unlock(folio);
4595 	}
4596 	rcu_read_unlock();
4597 
4598 	kref_put(&rdata->refcount, cifs_readdata_release);
4599 }
4600 
4601 static void cifs_readahead(struct readahead_control *ractl)
4602 {
4603 	struct cifsFileInfo *open_file = ractl->file->private_data;
4604 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4605 	struct TCP_Server_Info *server;
4606 	unsigned int xid, nr_pages, cache_nr_pages = 0;
4607 	unsigned int ra_pages;
4608 	pgoff_t next_cached = ULONG_MAX, ra_index;
4609 	bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4610 		cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4611 	bool check_cache = caching;
4612 	pid_t pid;
4613 	int rc = 0;
4614 
4615 	/* Note that readahead_count() lags behind our dequeuing of pages from
4616 	 * the ractl, wo we have to keep track for ourselves.
4617 	 */
4618 	ra_pages = readahead_count(ractl);
4619 	ra_index = readahead_index(ractl);
4620 
4621 	xid = get_xid();
4622 
4623 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4624 		pid = open_file->pid;
4625 	else
4626 		pid = current->tgid;
4627 
4628 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4629 
4630 	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4631 		 __func__, ractl->file, ractl->mapping, ra_pages);
4632 
4633 	/*
4634 	 * Chop the readahead request up into rsize-sized read requests.
4635 	 */
4636 	while ((nr_pages = ra_pages)) {
4637 		unsigned int i, rsize;
4638 		struct cifs_readdata *rdata;
4639 		struct cifs_credits credits_on_stack;
4640 		struct cifs_credits *credits = &credits_on_stack;
4641 		struct folio *folio;
4642 		pgoff_t fsize;
4643 
4644 		/*
4645 		 * Find out if we have anything cached in the range of
4646 		 * interest, and if so, where the next chunk of cached data is.
4647 		 */
4648 		if (caching) {
4649 			if (check_cache) {
4650 				rc = cifs_fscache_query_occupancy(
4651 					ractl->mapping->host, ra_index, nr_pages,
4652 					&next_cached, &cache_nr_pages);
4653 				if (rc < 0)
4654 					caching = false;
4655 				check_cache = false;
4656 			}
4657 
4658 			if (ra_index == next_cached) {
4659 				/*
4660 				 * TODO: Send a whole batch of pages to be read
4661 				 * by the cache.
4662 				 */
4663 				folio = readahead_folio(ractl);
4664 				fsize = folio_nr_pages(folio);
4665 				ra_pages -= fsize;
4666 				ra_index += fsize;
4667 				if (cifs_readpage_from_fscache(ractl->mapping->host,
4668 							       &folio->page) < 0) {
4669 					/*
4670 					 * TODO: Deal with cache read failure
4671 					 * here, but for the moment, delegate
4672 					 * that to readpage.
4673 					 */
4674 					caching = false;
4675 				}
4676 				folio_unlock(folio);
4677 				next_cached += fsize;
4678 				cache_nr_pages -= fsize;
4679 				if (cache_nr_pages == 0)
4680 					check_cache = true;
4681 				continue;
4682 			}
4683 		}
4684 
4685 		if (open_file->invalidHandle) {
4686 			rc = cifs_reopen_file(open_file, true);
4687 			if (rc) {
4688 				if (rc == -EAGAIN)
4689 					continue;
4690 				break;
4691 			}
4692 		}
4693 
4694 		if (cifs_sb->ctx->rsize == 0)
4695 			cifs_sb->ctx->rsize =
4696 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4697 							     cifs_sb->ctx);
4698 
4699 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4700 						   &rsize, credits);
4701 		if (rc)
4702 			break;
4703 		nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages);
4704 		if (next_cached != ULONG_MAX)
4705 			nr_pages = min_t(size_t, nr_pages, next_cached - ra_index);
4706 
4707 		/*
4708 		 * Give up immediately if rsize is too small to read an entire
4709 		 * page. The VFS will fall back to readpage. We should never
4710 		 * reach this point however since we set ra_pages to 0 when the
4711 		 * rsize is smaller than a cache page.
4712 		 */
4713 		if (unlikely(!nr_pages)) {
4714 			add_credits_and_wake_if(server, credits, 0);
4715 			break;
4716 		}
4717 
4718 		rdata = cifs_readdata_alloc(cifs_readahead_complete);
4719 		if (!rdata) {
4720 			/* best to give up if we're out of mem */
4721 			add_credits_and_wake_if(server, credits, 0);
4722 			break;
4723 		}
4724 
4725 		rdata->offset	= ra_index * PAGE_SIZE;
4726 		rdata->bytes	= nr_pages * PAGE_SIZE;
4727 		rdata->cfile	= cifsFileInfo_get(open_file);
4728 		rdata->server	= server;
4729 		rdata->mapping	= ractl->mapping;
4730 		rdata->pid	= pid;
4731 		rdata->credits	= credits_on_stack;
4732 
4733 		for (i = 0; i < nr_pages; i++) {
4734 			if (!readahead_folio(ractl))
4735 				WARN_ON(1);
4736 		}
4737 		ra_pages -= nr_pages;
4738 		ra_index += nr_pages;
4739 
4740 		iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages,
4741 				rdata->offset, rdata->bytes);
4742 
4743 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4744 		if (!rc) {
4745 			if (rdata->cfile->invalidHandle)
4746 				rc = -EAGAIN;
4747 			else
4748 				rc = server->ops->async_readv(rdata);
4749 		}
4750 
4751 		if (rc) {
4752 			add_credits_and_wake_if(server, &rdata->credits, 0);
4753 			cifs_unlock_folios(rdata->mapping,
4754 					   rdata->offset / PAGE_SIZE,
4755 					   (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
4756 			/* Fallback to the readpage in error/reconnect cases */
4757 			kref_put(&rdata->refcount, cifs_readdata_release);
4758 			break;
4759 		}
4760 
4761 		kref_put(&rdata->refcount, cifs_readdata_release);
4762 	}
4763 
4764 	free_xid(xid);
4765 }
4766 
4767 /*
4768  * cifs_readpage_worker must be called with the page pinned
4769  */
4770 static int cifs_readpage_worker(struct file *file, struct page *page,
4771 	loff_t *poffset)
4772 {
4773 	struct inode *inode = file_inode(file);
4774 	struct timespec64 atime, mtime;
4775 	char *read_data;
4776 	int rc;
4777 
4778 	/* Is the page cached? */
4779 	rc = cifs_readpage_from_fscache(inode, page);
4780 	if (rc == 0)
4781 		goto read_complete;
4782 
4783 	read_data = kmap(page);
4784 	/* for reads over a certain size could initiate async read ahead */
4785 
4786 	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4787 
4788 	if (rc < 0)
4789 		goto io_error;
4790 	else
4791 		cifs_dbg(FYI, "Bytes read %d\n", rc);
4792 
4793 	/* we do not want atime to be less than mtime, it broke some apps */
4794 	atime = inode_set_atime_to_ts(inode, current_time(inode));
4795 	mtime = inode_get_mtime(inode);
4796 	if (timespec64_compare(&atime, &mtime) < 0)
4797 		inode_set_atime_to_ts(inode, inode_get_mtime(inode));
4798 
4799 	if (PAGE_SIZE > rc)
4800 		memset(read_data + rc, 0, PAGE_SIZE - rc);
4801 
4802 	flush_dcache_page(page);
4803 	SetPageUptodate(page);
4804 	rc = 0;
4805 
4806 io_error:
4807 	kunmap(page);
4808 
4809 read_complete:
4810 	unlock_page(page);
4811 	return rc;
4812 }
4813 
4814 static int cifs_read_folio(struct file *file, struct folio *folio)
4815 {
4816 	struct page *page = &folio->page;
4817 	loff_t offset = page_file_offset(page);
4818 	int rc = -EACCES;
4819 	unsigned int xid;
4820 
4821 	xid = get_xid();
4822 
4823 	if (file->private_data == NULL) {
4824 		rc = -EBADF;
4825 		free_xid(xid);
4826 		return rc;
4827 	}
4828 
4829 	cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4830 		 page, (int)offset, (int)offset);
4831 
4832 	rc = cifs_readpage_worker(file, page, &offset);
4833 
4834 	free_xid(xid);
4835 	return rc;
4836 }
4837 
4838 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4839 {
4840 	struct cifsFileInfo *open_file;
4841 
4842 	spin_lock(&cifs_inode->open_file_lock);
4843 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4844 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4845 			spin_unlock(&cifs_inode->open_file_lock);
4846 			return 1;
4847 		}
4848 	}
4849 	spin_unlock(&cifs_inode->open_file_lock);
4850 	return 0;
4851 }
4852 
4853 /* We do not want to update the file size from server for inodes
4854    open for write - to avoid races with writepage extending
4855    the file - in the future we could consider allowing
4856    refreshing the inode only on increases in the file size
4857    but this is tricky to do without racing with writebehind
4858    page caching in the current Linux kernel design */
4859 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file,
4860 			    bool from_readdir)
4861 {
4862 	if (!cifsInode)
4863 		return true;
4864 
4865 	if (is_inode_writable(cifsInode) ||
4866 		((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) {
4867 		/* This inode is open for write at least once */
4868 		struct cifs_sb_info *cifs_sb;
4869 
4870 		cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4871 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4872 			/* since no page cache to corrupt on directio
4873 			we can change size safely */
4874 			return true;
4875 		}
4876 
4877 		if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4878 			return true;
4879 
4880 		return false;
4881 	} else
4882 		return true;
4883 }
4884 
4885 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4886 			loff_t pos, unsigned len,
4887 			struct page **pagep, void **fsdata)
4888 {
4889 	int oncethru = 0;
4890 	pgoff_t index = pos >> PAGE_SHIFT;
4891 	loff_t offset = pos & (PAGE_SIZE - 1);
4892 	loff_t page_start = pos & PAGE_MASK;
4893 	loff_t i_size;
4894 	struct page *page;
4895 	int rc = 0;
4896 
4897 	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4898 
4899 start:
4900 	page = grab_cache_page_write_begin(mapping, index);
4901 	if (!page) {
4902 		rc = -ENOMEM;
4903 		goto out;
4904 	}
4905 
4906 	if (PageUptodate(page))
4907 		goto out;
4908 
4909 	/*
4910 	 * If we write a full page it will be up to date, no need to read from
4911 	 * the server. If the write is short, we'll end up doing a sync write
4912 	 * instead.
4913 	 */
4914 	if (len == PAGE_SIZE)
4915 		goto out;
4916 
4917 	/*
4918 	 * optimize away the read when we have an oplock, and we're not
4919 	 * expecting to use any of the data we'd be reading in. That
4920 	 * is, when the page lies beyond the EOF, or straddles the EOF
4921 	 * and the write will cover all of the existing data.
4922 	 */
4923 	if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4924 		i_size = i_size_read(mapping->host);
4925 		if (page_start >= i_size ||
4926 		    (offset == 0 && (pos + len) >= i_size)) {
4927 			zero_user_segments(page, 0, offset,
4928 					   offset + len,
4929 					   PAGE_SIZE);
4930 			/*
4931 			 * PageChecked means that the parts of the page
4932 			 * to which we're not writing are considered up
4933 			 * to date. Once the data is copied to the
4934 			 * page, it can be set uptodate.
4935 			 */
4936 			SetPageChecked(page);
4937 			goto out;
4938 		}
4939 	}
4940 
4941 	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4942 		/*
4943 		 * might as well read a page, it is fast enough. If we get
4944 		 * an error, we don't need to return it. cifs_write_end will
4945 		 * do a sync write instead since PG_uptodate isn't set.
4946 		 */
4947 		cifs_readpage_worker(file, page, &page_start);
4948 		put_page(page);
4949 		oncethru = 1;
4950 		goto start;
4951 	} else {
4952 		/* we could try using another file handle if there is one -
4953 		   but how would we lock it to prevent close of that handle
4954 		   racing with this read? In any case
4955 		   this will be written out by write_end so is fine */
4956 	}
4957 out:
4958 	*pagep = page;
4959 	return rc;
4960 }
4961 
4962 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4963 {
4964 	if (folio_test_private(folio))
4965 		return 0;
4966 	if (folio_test_fscache(folio)) {
4967 		if (current_is_kswapd() || !(gfp & __GFP_FS))
4968 			return false;
4969 		folio_wait_fscache(folio);
4970 	}
4971 	fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4972 	return true;
4973 }
4974 
4975 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4976 				 size_t length)
4977 {
4978 	folio_wait_fscache(folio);
4979 }
4980 
4981 static int cifs_launder_folio(struct folio *folio)
4982 {
4983 	int rc = 0;
4984 	loff_t range_start = folio_pos(folio);
4985 	loff_t range_end = range_start + folio_size(folio);
4986 	struct writeback_control wbc = {
4987 		.sync_mode = WB_SYNC_ALL,
4988 		.nr_to_write = 0,
4989 		.range_start = range_start,
4990 		.range_end = range_end,
4991 	};
4992 
4993 	cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
4994 
4995 	if (folio_clear_dirty_for_io(folio))
4996 		rc = cifs_writepage_locked(&folio->page, &wbc);
4997 
4998 	folio_wait_fscache(folio);
4999 	return rc;
5000 }
5001 
5002 void cifs_oplock_break(struct work_struct *work)
5003 {
5004 	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
5005 						  oplock_break);
5006 	struct inode *inode = d_inode(cfile->dentry);
5007 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
5008 	struct cifsInodeInfo *cinode = CIFS_I(inode);
5009 	struct cifs_tcon *tcon;
5010 	struct TCP_Server_Info *server;
5011 	struct tcon_link *tlink;
5012 	int rc = 0;
5013 	bool purge_cache = false, oplock_break_cancelled;
5014 	__u64 persistent_fid, volatile_fid;
5015 	__u16 net_fid;
5016 
5017 	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
5018 			TASK_UNINTERRUPTIBLE);
5019 
5020 	tlink = cifs_sb_tlink(cifs_sb);
5021 	if (IS_ERR(tlink))
5022 		goto out;
5023 	tcon = tlink_tcon(tlink);
5024 	server = tcon->ses->server;
5025 
5026 	server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
5027 				      cfile->oplock_epoch, &purge_cache);
5028 
5029 	if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
5030 						cifs_has_mand_locks(cinode)) {
5031 		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
5032 			 inode);
5033 		cinode->oplock = 0;
5034 	}
5035 
5036 	if (inode && S_ISREG(inode->i_mode)) {
5037 		if (CIFS_CACHE_READ(cinode))
5038 			break_lease(inode, O_RDONLY);
5039 		else
5040 			break_lease(inode, O_WRONLY);
5041 		rc = filemap_fdatawrite(inode->i_mapping);
5042 		if (!CIFS_CACHE_READ(cinode) || purge_cache) {
5043 			rc = filemap_fdatawait(inode->i_mapping);
5044 			mapping_set_error(inode->i_mapping, rc);
5045 			cifs_zap_mapping(inode);
5046 		}
5047 		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
5048 		if (CIFS_CACHE_WRITE(cinode))
5049 			goto oplock_break_ack;
5050 	}
5051 
5052 	rc = cifs_push_locks(cfile);
5053 	if (rc)
5054 		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
5055 
5056 oplock_break_ack:
5057 	/*
5058 	 * When oplock break is received and there are no active
5059 	 * file handles but cached, then schedule deferred close immediately.
5060 	 * So, new open will not use cached handle.
5061 	 */
5062 
5063 	if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
5064 		cifs_close_deferred_file(cinode);
5065 
5066 	persistent_fid = cfile->fid.persistent_fid;
5067 	volatile_fid = cfile->fid.volatile_fid;
5068 	net_fid = cfile->fid.netfid;
5069 	oplock_break_cancelled = cfile->oplock_break_cancelled;
5070 
5071 	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
5072 	/*
5073 	 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
5074 	 * an acknowledgment to be sent when the file has already been closed.
5075 	 */
5076 	spin_lock(&cinode->open_file_lock);
5077 	/* check list empty since can race with kill_sb calling tree disconnect */
5078 	if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
5079 		spin_unlock(&cinode->open_file_lock);
5080 		rc = server->ops->oplock_response(tcon, persistent_fid,
5081 						  volatile_fid, net_fid, cinode);
5082 		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
5083 	} else
5084 		spin_unlock(&cinode->open_file_lock);
5085 
5086 	cifs_put_tlink(tlink);
5087 out:
5088 	cifs_done_oplock_break(cinode);
5089 }
5090 
5091 /*
5092  * The presence of cifs_direct_io() in the address space ops vector
5093  * allowes open() O_DIRECT flags which would have failed otherwise.
5094  *
5095  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5096  * so this method should never be called.
5097  *
5098  * Direct IO is not yet supported in the cached mode.
5099  */
5100 static ssize_t
5101 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5102 {
5103         /*
5104          * FIXME
5105          * Eventually need to support direct IO for non forcedirectio mounts
5106          */
5107         return -EINVAL;
5108 }
5109 
5110 static int cifs_swap_activate(struct swap_info_struct *sis,
5111 			      struct file *swap_file, sector_t *span)
5112 {
5113 	struct cifsFileInfo *cfile = swap_file->private_data;
5114 	struct inode *inode = swap_file->f_mapping->host;
5115 	unsigned long blocks;
5116 	long long isize;
5117 
5118 	cifs_dbg(FYI, "swap activate\n");
5119 
5120 	if (!swap_file->f_mapping->a_ops->swap_rw)
5121 		/* Cannot support swap */
5122 		return -EINVAL;
5123 
5124 	spin_lock(&inode->i_lock);
5125 	blocks = inode->i_blocks;
5126 	isize = inode->i_size;
5127 	spin_unlock(&inode->i_lock);
5128 	if (blocks*512 < isize) {
5129 		pr_warn("swap activate: swapfile has holes\n");
5130 		return -EINVAL;
5131 	}
5132 	*span = sis->pages;
5133 
5134 	pr_warn_once("Swap support over SMB3 is experimental\n");
5135 
5136 	/*
5137 	 * TODO: consider adding ACL (or documenting how) to prevent other
5138 	 * users (on this or other systems) from reading it
5139 	 */
5140 
5141 
5142 	/* TODO: add sk_set_memalloc(inet) or similar */
5143 
5144 	if (cfile)
5145 		cfile->swapfile = true;
5146 	/*
5147 	 * TODO: Since file already open, we can't open with DENY_ALL here
5148 	 * but we could add call to grab a byte range lock to prevent others
5149 	 * from reading or writing the file
5150 	 */
5151 
5152 	sis->flags |= SWP_FS_OPS;
5153 	return add_swap_extent(sis, 0, sis->max, 0);
5154 }
5155 
5156 static void cifs_swap_deactivate(struct file *file)
5157 {
5158 	struct cifsFileInfo *cfile = file->private_data;
5159 
5160 	cifs_dbg(FYI, "swap deactivate\n");
5161 
5162 	/* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5163 
5164 	if (cfile)
5165 		cfile->swapfile = false;
5166 
5167 	/* do we need to unpin (or unlock) the file */
5168 }
5169 
5170 /*
5171  * Mark a page as having been made dirty and thus needing writeback.  We also
5172  * need to pin the cache object to write back to.
5173  */
5174 #ifdef CONFIG_CIFS_FSCACHE
5175 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
5176 {
5177 	return fscache_dirty_folio(mapping, folio,
5178 					cifs_inode_cookie(mapping->host));
5179 }
5180 #else
5181 #define cifs_dirty_folio filemap_dirty_folio
5182 #endif
5183 
5184 const struct address_space_operations cifs_addr_ops = {
5185 	.read_folio = cifs_read_folio,
5186 	.readahead = cifs_readahead,
5187 	.writepages = cifs_writepages,
5188 	.write_begin = cifs_write_begin,
5189 	.write_end = cifs_write_end,
5190 	.dirty_folio = cifs_dirty_folio,
5191 	.release_folio = cifs_release_folio,
5192 	.direct_IO = cifs_direct_io,
5193 	.invalidate_folio = cifs_invalidate_folio,
5194 	.launder_folio = cifs_launder_folio,
5195 	.migrate_folio = filemap_migrate_folio,
5196 	/*
5197 	 * TODO: investigate and if useful we could add an is_dirty_writeback
5198 	 * helper if needed
5199 	 */
5200 	.swap_activate = cifs_swap_activate,
5201 	.swap_deactivate = cifs_swap_deactivate,
5202 };
5203 
5204 /*
5205  * cifs_readahead requires the server to support a buffer large enough to
5206  * contain the header plus one complete page of data.  Otherwise, we need
5207  * to leave cifs_readahead out of the address space operations.
5208  */
5209 const struct address_space_operations cifs_addr_ops_smallbuf = {
5210 	.read_folio = cifs_read_folio,
5211 	.writepages = cifs_writepages,
5212 	.write_begin = cifs_write_begin,
5213 	.write_end = cifs_write_end,
5214 	.dirty_folio = cifs_dirty_folio,
5215 	.release_folio = cifs_release_folio,
5216 	.invalidate_folio = cifs_invalidate_folio,
5217 	.launder_folio = cifs_launder_folio,
5218 	.migrate_folio = filemap_migrate_folio,
5219 };
5220