xref: /openbmc/linux/fs/smb/client/file.c (revision 051a246b)
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/filelock.h>
13 #include <linux/backing-dev.h>
14 #include <linux/stat.h>
15 #include <linux/fcntl.h>
16 #include <linux/pagemap.h>
17 #include <linux/pagevec.h>
18 #include <linux/writeback.h>
19 #include <linux/task_io_accounting_ops.h>
20 #include <linux/delay.h>
21 #include <linux/mount.h>
22 #include <linux/slab.h>
23 #include <linux/swap.h>
24 #include <linux/mm.h>
25 #include <asm/div64.h>
26 #include "cifsfs.h"
27 #include "cifspdu.h"
28 #include "cifsglob.h"
29 #include "cifsproto.h"
30 #include "smb2proto.h"
31 #include "cifs_unicode.h"
32 #include "cifs_debug.h"
33 #include "cifs_fs_sb.h"
34 #include "fscache.h"
35 #include "smbdirect.h"
36 #include "fs_context.h"
37 #include "cifs_ioctl.h"
38 #include "cached_dir.h"
39 
40 /*
41  * Remove the dirty flags from a span of pages.
42  */
43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
44 {
45 	struct address_space *mapping = inode->i_mapping;
46 	struct folio *folio;
47 	pgoff_t end;
48 
49 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
50 
51 	rcu_read_lock();
52 
53 	end = (start + len - 1) / PAGE_SIZE;
54 	xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
55 		if (xas_retry(&xas, folio))
56 			continue;
57 		xas_pause(&xas);
58 		rcu_read_unlock();
59 		folio_lock(folio);
60 		folio_clear_dirty_for_io(folio);
61 		folio_unlock(folio);
62 		rcu_read_lock();
63 	}
64 
65 	rcu_read_unlock();
66 }
67 
68 /*
69  * Completion of write to server.
70  */
71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
72 {
73 	struct address_space *mapping = inode->i_mapping;
74 	struct folio *folio;
75 	pgoff_t end;
76 
77 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
78 
79 	if (!len)
80 		return;
81 
82 	rcu_read_lock();
83 
84 	end = (start + len - 1) / PAGE_SIZE;
85 	xas_for_each(&xas, folio, end) {
86 		if (xas_retry(&xas, folio))
87 			continue;
88 		if (!folio_test_writeback(folio)) {
89 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
90 				  len, start, folio->index, end);
91 			continue;
92 		}
93 
94 		folio_detach_private(folio);
95 		folio_end_writeback(folio);
96 	}
97 
98 	rcu_read_unlock();
99 }
100 
101 /*
102  * Failure of write to server.
103  */
104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len)
105 {
106 	struct address_space *mapping = inode->i_mapping;
107 	struct folio *folio;
108 	pgoff_t end;
109 
110 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
111 
112 	if (!len)
113 		return;
114 
115 	rcu_read_lock();
116 
117 	end = (start + len - 1) / PAGE_SIZE;
118 	xas_for_each(&xas, folio, end) {
119 		if (xas_retry(&xas, folio))
120 			continue;
121 		if (!folio_test_writeback(folio)) {
122 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
123 				  len, start, folio->index, end);
124 			continue;
125 		}
126 
127 		folio_set_error(folio);
128 		folio_end_writeback(folio);
129 	}
130 
131 	rcu_read_unlock();
132 }
133 
134 /*
135  * Redirty pages after a temporary failure.
136  */
137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len)
138 {
139 	struct address_space *mapping = inode->i_mapping;
140 	struct folio *folio;
141 	pgoff_t end;
142 
143 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
144 
145 	if (!len)
146 		return;
147 
148 	rcu_read_lock();
149 
150 	end = (start + len - 1) / PAGE_SIZE;
151 	xas_for_each(&xas, folio, end) {
152 		if (!folio_test_writeback(folio)) {
153 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
154 				  len, start, folio->index, end);
155 			continue;
156 		}
157 
158 		filemap_dirty_folio(folio->mapping, folio);
159 		folio_end_writeback(folio);
160 	}
161 
162 	rcu_read_unlock();
163 }
164 
165 /*
166  * Mark as invalid, all open files on tree connections since they
167  * were closed when session to server was lost.
168  */
169 void
170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
171 {
172 	struct cifsFileInfo *open_file = NULL;
173 	struct list_head *tmp;
174 	struct list_head *tmp1;
175 
176 	/* only send once per connect */
177 	spin_lock(&tcon->tc_lock);
178 	if (tcon->need_reconnect)
179 		tcon->status = TID_NEED_RECON;
180 
181 	if (tcon->status != TID_NEED_RECON) {
182 		spin_unlock(&tcon->tc_lock);
183 		return;
184 	}
185 	tcon->status = TID_IN_FILES_INVALIDATE;
186 	spin_unlock(&tcon->tc_lock);
187 
188 	/* list all files open on tree connection and mark them invalid */
189 	spin_lock(&tcon->open_file_lock);
190 	list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
191 		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
192 		open_file->invalidHandle = true;
193 		open_file->oplock_break_cancelled = true;
194 	}
195 	spin_unlock(&tcon->open_file_lock);
196 
197 	invalidate_all_cached_dirs(tcon);
198 	spin_lock(&tcon->tc_lock);
199 	if (tcon->status == TID_IN_FILES_INVALIDATE)
200 		tcon->status = TID_NEED_TCON;
201 	spin_unlock(&tcon->tc_lock);
202 
203 	/*
204 	 * BB Add call to invalidate_inodes(sb) for all superblocks mounted
205 	 * to this tcon.
206 	 */
207 }
208 
209 static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache)
210 {
211 	if ((flags & O_ACCMODE) == O_RDONLY)
212 		return GENERIC_READ;
213 	else if ((flags & O_ACCMODE) == O_WRONLY)
214 		return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE;
215 	else if ((flags & O_ACCMODE) == O_RDWR) {
216 		/* GENERIC_ALL is too much permission to request
217 		   can cause unnecessary access denied on create */
218 		/* return GENERIC_ALL; */
219 		return (GENERIC_READ | GENERIC_WRITE);
220 	}
221 
222 	return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
223 		FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
224 		FILE_READ_DATA);
225 }
226 
227 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
228 static u32 cifs_posix_convert_flags(unsigned int flags)
229 {
230 	u32 posix_flags = 0;
231 
232 	if ((flags & O_ACCMODE) == O_RDONLY)
233 		posix_flags = SMB_O_RDONLY;
234 	else if ((flags & O_ACCMODE) == O_WRONLY)
235 		posix_flags = SMB_O_WRONLY;
236 	else if ((flags & O_ACCMODE) == O_RDWR)
237 		posix_flags = SMB_O_RDWR;
238 
239 	if (flags & O_CREAT) {
240 		posix_flags |= SMB_O_CREAT;
241 		if (flags & O_EXCL)
242 			posix_flags |= SMB_O_EXCL;
243 	} else if (flags & O_EXCL)
244 		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
245 			 current->comm, current->tgid);
246 
247 	if (flags & O_TRUNC)
248 		posix_flags |= SMB_O_TRUNC;
249 	/* be safe and imply O_SYNC for O_DSYNC */
250 	if (flags & O_DSYNC)
251 		posix_flags |= SMB_O_SYNC;
252 	if (flags & O_DIRECTORY)
253 		posix_flags |= SMB_O_DIRECTORY;
254 	if (flags & O_NOFOLLOW)
255 		posix_flags |= SMB_O_NOFOLLOW;
256 	if (flags & O_DIRECT)
257 		posix_flags |= SMB_O_DIRECT;
258 
259 	return posix_flags;
260 }
261 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
262 
263 static inline int cifs_get_disposition(unsigned int flags)
264 {
265 	if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
266 		return FILE_CREATE;
267 	else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
268 		return FILE_OVERWRITE_IF;
269 	else if ((flags & O_CREAT) == O_CREAT)
270 		return FILE_OPEN_IF;
271 	else if ((flags & O_TRUNC) == O_TRUNC)
272 		return FILE_OVERWRITE;
273 	else
274 		return FILE_OPEN;
275 }
276 
277 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
278 int cifs_posix_open(const char *full_path, struct inode **pinode,
279 			struct super_block *sb, int mode, unsigned int f_flags,
280 			__u32 *poplock, __u16 *pnetfid, unsigned int xid)
281 {
282 	int rc;
283 	FILE_UNIX_BASIC_INFO *presp_data;
284 	__u32 posix_flags = 0;
285 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
286 	struct cifs_fattr fattr;
287 	struct tcon_link *tlink;
288 	struct cifs_tcon *tcon;
289 
290 	cifs_dbg(FYI, "posix open %s\n", full_path);
291 
292 	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
293 	if (presp_data == NULL)
294 		return -ENOMEM;
295 
296 	tlink = cifs_sb_tlink(cifs_sb);
297 	if (IS_ERR(tlink)) {
298 		rc = PTR_ERR(tlink);
299 		goto posix_open_ret;
300 	}
301 
302 	tcon = tlink_tcon(tlink);
303 	mode &= ~current_umask();
304 
305 	posix_flags = cifs_posix_convert_flags(f_flags);
306 	rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
307 			     poplock, full_path, cifs_sb->local_nls,
308 			     cifs_remap(cifs_sb));
309 	cifs_put_tlink(tlink);
310 
311 	if (rc)
312 		goto posix_open_ret;
313 
314 	if (presp_data->Type == cpu_to_le32(-1))
315 		goto posix_open_ret; /* open ok, caller does qpathinfo */
316 
317 	if (!pinode)
318 		goto posix_open_ret; /* caller does not need info */
319 
320 	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
321 
322 	/* get new inode and set it up */
323 	if (*pinode == NULL) {
324 		cifs_fill_uniqueid(sb, &fattr);
325 		*pinode = cifs_iget(sb, &fattr);
326 		if (!*pinode) {
327 			rc = -ENOMEM;
328 			goto posix_open_ret;
329 		}
330 	} else {
331 		cifs_revalidate_mapping(*pinode);
332 		rc = cifs_fattr_to_inode(*pinode, &fattr, false);
333 	}
334 
335 posix_open_ret:
336 	kfree(presp_data);
337 	return rc;
338 }
339 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
340 
341 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
342 			struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
343 			struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
344 {
345 	int rc;
346 	int desired_access;
347 	int disposition;
348 	int create_options = CREATE_NOT_DIR;
349 	struct TCP_Server_Info *server = tcon->ses->server;
350 	struct cifs_open_parms oparms;
351 	int rdwr_for_fscache = 0;
352 
353 	if (!server->ops->open)
354 		return -ENOSYS;
355 
356 	/* If we're caching, we need to be able to fill in around partial writes. */
357 	if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY)
358 		rdwr_for_fscache = 1;
359 
360 	desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache);
361 
362 /*********************************************************************
363  *  open flag mapping table:
364  *
365  *	POSIX Flag            CIFS Disposition
366  *	----------            ----------------
367  *	O_CREAT               FILE_OPEN_IF
368  *	O_CREAT | O_EXCL      FILE_CREATE
369  *	O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
370  *	O_TRUNC               FILE_OVERWRITE
371  *	none of the above     FILE_OPEN
372  *
373  *	Note that there is not a direct match between disposition
374  *	FILE_SUPERSEDE (ie create whether or not file exists although
375  *	O_CREAT | O_TRUNC is similar but truncates the existing
376  *	file rather than creating a new file as FILE_SUPERSEDE does
377  *	(which uses the attributes / metadata passed in on open call)
378  *?
379  *?  O_SYNC is a reasonable match to CIFS writethrough flag
380  *?  and the read write flags match reasonably.  O_LARGEFILE
381  *?  is irrelevant because largefile support is always used
382  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
383  *	 O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
384  *********************************************************************/
385 
386 	disposition = cifs_get_disposition(f_flags);
387 
388 	/* BB pass O_SYNC flag through on file attributes .. BB */
389 
390 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
391 	if (f_flags & O_SYNC)
392 		create_options |= CREATE_WRITE_THROUGH;
393 
394 	if (f_flags & O_DIRECT)
395 		create_options |= CREATE_NO_BUFFER;
396 
397 retry_open:
398 	oparms = (struct cifs_open_parms) {
399 		.tcon = tcon,
400 		.cifs_sb = cifs_sb,
401 		.desired_access = desired_access,
402 		.create_options = cifs_create_options(cifs_sb, create_options),
403 		.disposition = disposition,
404 		.path = full_path,
405 		.fid = fid,
406 	};
407 
408 	rc = server->ops->open(xid, &oparms, oplock, buf);
409 	if (rc) {
410 		if (rc == -EACCES && rdwr_for_fscache == 1) {
411 			desired_access = cifs_convert_flags(f_flags, 0);
412 			rdwr_for_fscache = 2;
413 			goto retry_open;
414 		}
415 		return rc;
416 	}
417 	if (rdwr_for_fscache == 2)
418 		cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
419 
420 	/* TODO: Add support for calling posix query info but with passing in fid */
421 	if (tcon->unix_ext)
422 		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
423 					      xid);
424 	else
425 		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
426 					 xid, fid);
427 
428 	if (rc) {
429 		server->ops->close(xid, tcon, fid);
430 		if (rc == -ESTALE)
431 			rc = -EOPENSTALE;
432 	}
433 
434 	return rc;
435 }
436 
437 static bool
438 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
439 {
440 	struct cifs_fid_locks *cur;
441 	bool has_locks = false;
442 
443 	down_read(&cinode->lock_sem);
444 	list_for_each_entry(cur, &cinode->llist, llist) {
445 		if (!list_empty(&cur->locks)) {
446 			has_locks = true;
447 			break;
448 		}
449 	}
450 	up_read(&cinode->lock_sem);
451 	return has_locks;
452 }
453 
454 void
455 cifs_down_write(struct rw_semaphore *sem)
456 {
457 	while (!down_write_trylock(sem))
458 		msleep(10);
459 }
460 
461 static void cifsFileInfo_put_work(struct work_struct *work);
462 void serverclose_work(struct work_struct *work);
463 
464 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
465 				       struct tcon_link *tlink, __u32 oplock,
466 				       const char *symlink_target)
467 {
468 	struct dentry *dentry = file_dentry(file);
469 	struct inode *inode = d_inode(dentry);
470 	struct cifsInodeInfo *cinode = CIFS_I(inode);
471 	struct cifsFileInfo *cfile;
472 	struct cifs_fid_locks *fdlocks;
473 	struct cifs_tcon *tcon = tlink_tcon(tlink);
474 	struct TCP_Server_Info *server = tcon->ses->server;
475 
476 	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
477 	if (cfile == NULL)
478 		return cfile;
479 
480 	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
481 	if (!fdlocks) {
482 		kfree(cfile);
483 		return NULL;
484 	}
485 
486 	if (symlink_target) {
487 		cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
488 		if (!cfile->symlink_target) {
489 			kfree(fdlocks);
490 			kfree(cfile);
491 			return NULL;
492 		}
493 	}
494 
495 	INIT_LIST_HEAD(&fdlocks->locks);
496 	fdlocks->cfile = cfile;
497 	cfile->llist = fdlocks;
498 
499 	cfile->count = 1;
500 	cfile->pid = current->tgid;
501 	cfile->uid = current_fsuid();
502 	cfile->dentry = dget(dentry);
503 	cfile->f_flags = file->f_flags;
504 	cfile->invalidHandle = false;
505 	cfile->deferred_close_scheduled = false;
506 	cfile->tlink = cifs_get_tlink(tlink);
507 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
508 	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
509 	INIT_WORK(&cfile->serverclose, serverclose_work);
510 	INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
511 	mutex_init(&cfile->fh_mutex);
512 	spin_lock_init(&cfile->file_info_lock);
513 
514 	cifs_sb_active(inode->i_sb);
515 
516 	/*
517 	 * If the server returned a read oplock and we have mandatory brlocks,
518 	 * set oplock level to None.
519 	 */
520 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
521 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
522 		oplock = 0;
523 	}
524 
525 	cifs_down_write(&cinode->lock_sem);
526 	list_add(&fdlocks->llist, &cinode->llist);
527 	up_write(&cinode->lock_sem);
528 
529 	spin_lock(&tcon->open_file_lock);
530 	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
531 		oplock = fid->pending_open->oplock;
532 	list_del(&fid->pending_open->olist);
533 
534 	fid->purge_cache = false;
535 	server->ops->set_fid(cfile, fid, oplock);
536 
537 	list_add(&cfile->tlist, &tcon->openFileList);
538 	atomic_inc(&tcon->num_local_opens);
539 
540 	/* if readable file instance put first in list*/
541 	spin_lock(&cinode->open_file_lock);
542 	if (file->f_mode & FMODE_READ)
543 		list_add(&cfile->flist, &cinode->openFileList);
544 	else
545 		list_add_tail(&cfile->flist, &cinode->openFileList);
546 	spin_unlock(&cinode->open_file_lock);
547 	spin_unlock(&tcon->open_file_lock);
548 
549 	if (fid->purge_cache)
550 		cifs_zap_mapping(inode);
551 
552 	file->private_data = cfile;
553 	return cfile;
554 }
555 
556 struct cifsFileInfo *
557 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
558 {
559 	spin_lock(&cifs_file->file_info_lock);
560 	cifsFileInfo_get_locked(cifs_file);
561 	spin_unlock(&cifs_file->file_info_lock);
562 	return cifs_file;
563 }
564 
565 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
566 {
567 	struct inode *inode = d_inode(cifs_file->dentry);
568 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
569 	struct cifsLockInfo *li, *tmp;
570 	struct super_block *sb = inode->i_sb;
571 
572 	/*
573 	 * Delete any outstanding lock records. We'll lose them when the file
574 	 * is closed anyway.
575 	 */
576 	cifs_down_write(&cifsi->lock_sem);
577 	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
578 		list_del(&li->llist);
579 		cifs_del_lock_waiters(li);
580 		kfree(li);
581 	}
582 	list_del(&cifs_file->llist->llist);
583 	kfree(cifs_file->llist);
584 	up_write(&cifsi->lock_sem);
585 
586 	cifs_put_tlink(cifs_file->tlink);
587 	dput(cifs_file->dentry);
588 	cifs_sb_deactive(sb);
589 	kfree(cifs_file->symlink_target);
590 	kfree(cifs_file);
591 }
592 
593 static void cifsFileInfo_put_work(struct work_struct *work)
594 {
595 	struct cifsFileInfo *cifs_file = container_of(work,
596 			struct cifsFileInfo, put);
597 
598 	cifsFileInfo_put_final(cifs_file);
599 }
600 
601 void serverclose_work(struct work_struct *work)
602 {
603 	struct cifsFileInfo *cifs_file = container_of(work,
604 			struct cifsFileInfo, serverclose);
605 
606 	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
607 
608 	struct TCP_Server_Info *server = tcon->ses->server;
609 	int rc = 0;
610 	int retries = 0;
611 	int MAX_RETRIES = 4;
612 
613 	do {
614 		if (server->ops->close_getattr)
615 			rc = server->ops->close_getattr(0, tcon, cifs_file);
616 		else if (server->ops->close)
617 			rc = server->ops->close(0, tcon, &cifs_file->fid);
618 
619 		if (rc == -EBUSY || rc == -EAGAIN) {
620 			retries++;
621 			msleep(250);
622 		}
623 	} while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES)
624 	);
625 
626 	if (retries == MAX_RETRIES)
627 		pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES);
628 
629 	if (cifs_file->offload)
630 		queue_work(fileinfo_put_wq, &cifs_file->put);
631 	else
632 		cifsFileInfo_put_final(cifs_file);
633 }
634 
635 /**
636  * cifsFileInfo_put - release a reference of file priv data
637  *
638  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
639  *
640  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
641  */
642 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
643 {
644 	_cifsFileInfo_put(cifs_file, true, true);
645 }
646 
647 /**
648  * _cifsFileInfo_put - release a reference of file priv data
649  *
650  * This may involve closing the filehandle @cifs_file out on the
651  * server. Must be called without holding tcon->open_file_lock,
652  * cinode->open_file_lock and cifs_file->file_info_lock.
653  *
654  * If @wait_for_oplock_handler is true and we are releasing the last
655  * reference, wait for any running oplock break handler of the file
656  * and cancel any pending one.
657  *
658  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
659  * @wait_oplock_handler: must be false if called from oplock_break_handler
660  * @offload:	not offloaded on close and oplock breaks
661  *
662  */
663 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
664 		       bool wait_oplock_handler, bool offload)
665 {
666 	struct inode *inode = d_inode(cifs_file->dentry);
667 	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
668 	struct TCP_Server_Info *server = tcon->ses->server;
669 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
670 	struct super_block *sb = inode->i_sb;
671 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
672 	struct cifs_fid fid = {};
673 	struct cifs_pending_open open;
674 	bool oplock_break_cancelled;
675 	bool serverclose_offloaded = false;
676 
677 	spin_lock(&tcon->open_file_lock);
678 	spin_lock(&cifsi->open_file_lock);
679 	spin_lock(&cifs_file->file_info_lock);
680 
681 	cifs_file->offload = offload;
682 	if (--cifs_file->count > 0) {
683 		spin_unlock(&cifs_file->file_info_lock);
684 		spin_unlock(&cifsi->open_file_lock);
685 		spin_unlock(&tcon->open_file_lock);
686 		return;
687 	}
688 	spin_unlock(&cifs_file->file_info_lock);
689 
690 	if (server->ops->get_lease_key)
691 		server->ops->get_lease_key(inode, &fid);
692 
693 	/* store open in pending opens to make sure we don't miss lease break */
694 	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
695 
696 	/* remove it from the lists */
697 	list_del(&cifs_file->flist);
698 	list_del(&cifs_file->tlist);
699 	atomic_dec(&tcon->num_local_opens);
700 
701 	if (list_empty(&cifsi->openFileList)) {
702 		cifs_dbg(FYI, "closing last open instance for inode %p\n",
703 			 d_inode(cifs_file->dentry));
704 		/*
705 		 * In strict cache mode we need invalidate mapping on the last
706 		 * close  because it may cause a error when we open this file
707 		 * again and get at least level II oplock.
708 		 */
709 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
710 			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
711 		cifs_set_oplock_level(cifsi, 0);
712 	}
713 
714 	spin_unlock(&cifsi->open_file_lock);
715 	spin_unlock(&tcon->open_file_lock);
716 
717 	oplock_break_cancelled = wait_oplock_handler ?
718 		cancel_work_sync(&cifs_file->oplock_break) : false;
719 
720 	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
721 		struct TCP_Server_Info *server = tcon->ses->server;
722 		unsigned int xid;
723 		int rc = 0;
724 
725 		xid = get_xid();
726 		if (server->ops->close_getattr)
727 			rc = server->ops->close_getattr(xid, tcon, cifs_file);
728 		else if (server->ops->close)
729 			rc = server->ops->close(xid, tcon, &cifs_file->fid);
730 		_free_xid(xid);
731 
732 		if (rc == -EBUSY || rc == -EAGAIN) {
733 			// Server close failed, hence offloading it as an async op
734 			queue_work(serverclose_wq, &cifs_file->serverclose);
735 			serverclose_offloaded = true;
736 		}
737 	}
738 
739 	if (oplock_break_cancelled)
740 		cifs_done_oplock_break(cifsi);
741 
742 	cifs_del_pending_open(&open);
743 
744 	// if serverclose has been offloaded to wq (on failure), it will
745 	// handle offloading put as well. If serverclose not offloaded,
746 	// we need to handle offloading put here.
747 	if (!serverclose_offloaded) {
748 		if (offload)
749 			queue_work(fileinfo_put_wq, &cifs_file->put);
750 		else
751 			cifsFileInfo_put_final(cifs_file);
752 	}
753 }
754 
755 int cifs_open(struct inode *inode, struct file *file)
756 
757 {
758 	int rc = -EACCES;
759 	unsigned int xid;
760 	__u32 oplock;
761 	struct cifs_sb_info *cifs_sb;
762 	struct TCP_Server_Info *server;
763 	struct cifs_tcon *tcon;
764 	struct tcon_link *tlink;
765 	struct cifsFileInfo *cfile = NULL;
766 	void *page;
767 	const char *full_path;
768 	bool posix_open_ok = false;
769 	struct cifs_fid fid = {};
770 	struct cifs_pending_open open;
771 	struct cifs_open_info_data data = {};
772 
773 	xid = get_xid();
774 
775 	cifs_sb = CIFS_SB(inode->i_sb);
776 	if (unlikely(cifs_forced_shutdown(cifs_sb))) {
777 		free_xid(xid);
778 		return -EIO;
779 	}
780 
781 	tlink = cifs_sb_tlink(cifs_sb);
782 	if (IS_ERR(tlink)) {
783 		free_xid(xid);
784 		return PTR_ERR(tlink);
785 	}
786 	tcon = tlink_tcon(tlink);
787 	server = tcon->ses->server;
788 
789 	page = alloc_dentry_path();
790 	full_path = build_path_from_dentry(file_dentry(file), page);
791 	if (IS_ERR(full_path)) {
792 		rc = PTR_ERR(full_path);
793 		goto out;
794 	}
795 
796 	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
797 		 inode, file->f_flags, full_path);
798 
799 	if (file->f_flags & O_DIRECT &&
800 	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
801 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
802 			file->f_op = &cifs_file_direct_nobrl_ops;
803 		else
804 			file->f_op = &cifs_file_direct_ops;
805 	}
806 
807 	/* Get the cached handle as SMB2 close is deferred */
808 	rc = cifs_get_readable_path(tcon, full_path, &cfile);
809 	if (rc == 0) {
810 		if (file->f_flags == cfile->f_flags) {
811 			file->private_data = cfile;
812 			spin_lock(&CIFS_I(inode)->deferred_lock);
813 			cifs_del_deferred_close(cfile);
814 			spin_unlock(&CIFS_I(inode)->deferred_lock);
815 			goto use_cache;
816 		} else {
817 			_cifsFileInfo_put(cfile, true, false);
818 		}
819 	}
820 
821 	if (server->oplocks)
822 		oplock = REQ_OPLOCK;
823 	else
824 		oplock = 0;
825 
826 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
827 	if (!tcon->broken_posix_open && tcon->unix_ext &&
828 	    cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
829 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
830 		/* can not refresh inode info since size could be stale */
831 		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
832 				cifs_sb->ctx->file_mode /* ignored */,
833 				file->f_flags, &oplock, &fid.netfid, xid);
834 		if (rc == 0) {
835 			cifs_dbg(FYI, "posix open succeeded\n");
836 			posix_open_ok = true;
837 		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
838 			if (tcon->ses->serverNOS)
839 				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
840 					 tcon->ses->ip_addr,
841 					 tcon->ses->serverNOS);
842 			tcon->broken_posix_open = true;
843 		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
844 			 (rc != -EOPNOTSUPP)) /* path not found or net err */
845 			goto out;
846 		/*
847 		 * Else fallthrough to retry open the old way on network i/o
848 		 * or DFS errors.
849 		 */
850 	}
851 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
852 
853 	if (server->ops->get_lease_key)
854 		server->ops->get_lease_key(inode, &fid);
855 
856 	cifs_add_pending_open(&fid, tlink, &open);
857 
858 	if (!posix_open_ok) {
859 		if (server->ops->get_lease_key)
860 			server->ops->get_lease_key(inode, &fid);
861 
862 		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
863 				  xid, &data);
864 		if (rc) {
865 			cifs_del_pending_open(&open);
866 			goto out;
867 		}
868 	}
869 
870 	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
871 	if (cfile == NULL) {
872 		if (server->ops->close)
873 			server->ops->close(xid, tcon, &fid);
874 		cifs_del_pending_open(&open);
875 		rc = -ENOMEM;
876 		goto out;
877 	}
878 
879 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
880 	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
881 		/*
882 		 * Time to set mode which we can not set earlier due to
883 		 * problems creating new read-only files.
884 		 */
885 		struct cifs_unix_set_info_args args = {
886 			.mode	= inode->i_mode,
887 			.uid	= INVALID_UID, /* no change */
888 			.gid	= INVALID_GID, /* no change */
889 			.ctime	= NO_CHANGE_64,
890 			.atime	= NO_CHANGE_64,
891 			.mtime	= NO_CHANGE_64,
892 			.device	= 0,
893 		};
894 		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
895 				       cfile->pid);
896 	}
897 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
898 
899 use_cache:
900 	fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
901 			   file->f_mode & FMODE_WRITE);
902 	if (!(file->f_flags & O_DIRECT))
903 		goto out;
904 	if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY)
905 		goto out;
906 	cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE);
907 
908 out:
909 	free_dentry_path(page);
910 	free_xid(xid);
911 	cifs_put_tlink(tlink);
912 	cifs_free_open_info(&data);
913 	return rc;
914 }
915 
916 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
917 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
918 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
919 
920 /*
921  * Try to reacquire byte range locks that were released when session
922  * to server was lost.
923  */
924 static int
925 cifs_relock_file(struct cifsFileInfo *cfile)
926 {
927 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
928 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
929 	int rc = 0;
930 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
931 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
932 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
933 
934 	down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
935 	if (cinode->can_cache_brlcks) {
936 		/* can cache locks - no need to relock */
937 		up_read(&cinode->lock_sem);
938 		return rc;
939 	}
940 
941 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
942 	if (cap_unix(tcon->ses) &&
943 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
944 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
945 		rc = cifs_push_posix_locks(cfile);
946 	else
947 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
948 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
949 
950 	up_read(&cinode->lock_sem);
951 	return rc;
952 }
953 
954 static int
955 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
956 {
957 	int rc = -EACCES;
958 	unsigned int xid;
959 	__u32 oplock;
960 	struct cifs_sb_info *cifs_sb;
961 	struct cifs_tcon *tcon;
962 	struct TCP_Server_Info *server;
963 	struct cifsInodeInfo *cinode;
964 	struct inode *inode;
965 	void *page;
966 	const char *full_path;
967 	int desired_access;
968 	int disposition = FILE_OPEN;
969 	int create_options = CREATE_NOT_DIR;
970 	struct cifs_open_parms oparms;
971 	int rdwr_for_fscache = 0;
972 
973 	xid = get_xid();
974 	mutex_lock(&cfile->fh_mutex);
975 	if (!cfile->invalidHandle) {
976 		mutex_unlock(&cfile->fh_mutex);
977 		free_xid(xid);
978 		return 0;
979 	}
980 
981 	inode = d_inode(cfile->dentry);
982 	cifs_sb = CIFS_SB(inode->i_sb);
983 	tcon = tlink_tcon(cfile->tlink);
984 	server = tcon->ses->server;
985 
986 	/*
987 	 * Can not grab rename sem here because various ops, including those
988 	 * that already have the rename sem can end up causing writepage to get
989 	 * called and if the server was down that means we end up here, and we
990 	 * can never tell if the caller already has the rename_sem.
991 	 */
992 	page = alloc_dentry_path();
993 	full_path = build_path_from_dentry(cfile->dentry, page);
994 	if (IS_ERR(full_path)) {
995 		mutex_unlock(&cfile->fh_mutex);
996 		free_dentry_path(page);
997 		free_xid(xid);
998 		return PTR_ERR(full_path);
999 	}
1000 
1001 	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
1002 		 inode, cfile->f_flags, full_path);
1003 
1004 	if (tcon->ses->server->oplocks)
1005 		oplock = REQ_OPLOCK;
1006 	else
1007 		oplock = 0;
1008 
1009 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1010 	if (tcon->unix_ext && cap_unix(tcon->ses) &&
1011 	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
1012 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
1013 		/*
1014 		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
1015 		 * original open. Must mask them off for a reopen.
1016 		 */
1017 		unsigned int oflags = cfile->f_flags &
1018 						~(O_CREAT | O_EXCL | O_TRUNC);
1019 
1020 		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
1021 				     cifs_sb->ctx->file_mode /* ignored */,
1022 				     oflags, &oplock, &cfile->fid.netfid, xid);
1023 		if (rc == 0) {
1024 			cifs_dbg(FYI, "posix reopen succeeded\n");
1025 			oparms.reconnect = true;
1026 			goto reopen_success;
1027 		}
1028 		/*
1029 		 * fallthrough to retry open the old way on errors, especially
1030 		 * in the reconnect path it is important to retry hard
1031 		 */
1032 	}
1033 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1034 
1035 	/* If we're caching, we need to be able to fill in around partial writes. */
1036 	if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY)
1037 		rdwr_for_fscache = 1;
1038 
1039 	desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache);
1040 
1041 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
1042 	if (cfile->f_flags & O_SYNC)
1043 		create_options |= CREATE_WRITE_THROUGH;
1044 
1045 	if (cfile->f_flags & O_DIRECT)
1046 		create_options |= CREATE_NO_BUFFER;
1047 
1048 	if (server->ops->get_lease_key)
1049 		server->ops->get_lease_key(inode, &cfile->fid);
1050 
1051 retry_open:
1052 	oparms = (struct cifs_open_parms) {
1053 		.tcon = tcon,
1054 		.cifs_sb = cifs_sb,
1055 		.desired_access = desired_access,
1056 		.create_options = cifs_create_options(cifs_sb, create_options),
1057 		.disposition = disposition,
1058 		.path = full_path,
1059 		.fid = &cfile->fid,
1060 		.reconnect = true,
1061 	};
1062 
1063 	/*
1064 	 * Can not refresh inode by passing in file_info buf to be returned by
1065 	 * ops->open and then calling get_inode_info with returned buf since
1066 	 * file might have write behind data that needs to be flushed and server
1067 	 * version of file size can be stale. If we knew for sure that inode was
1068 	 * not dirty locally we could do this.
1069 	 */
1070 	rc = server->ops->open(xid, &oparms, &oplock, NULL);
1071 	if (rc == -ENOENT && oparms.reconnect == false) {
1072 		/* durable handle timeout is expired - open the file again */
1073 		rc = server->ops->open(xid, &oparms, &oplock, NULL);
1074 		/* indicate that we need to relock the file */
1075 		oparms.reconnect = true;
1076 	}
1077 	if (rc == -EACCES && rdwr_for_fscache == 1) {
1078 		desired_access = cifs_convert_flags(cfile->f_flags, 0);
1079 		rdwr_for_fscache = 2;
1080 		goto retry_open;
1081 	}
1082 
1083 	if (rc) {
1084 		mutex_unlock(&cfile->fh_mutex);
1085 		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
1086 		cifs_dbg(FYI, "oplock: %d\n", oplock);
1087 		goto reopen_error_exit;
1088 	}
1089 
1090 	if (rdwr_for_fscache == 2)
1091 		cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
1092 
1093 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1094 reopen_success:
1095 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1096 	cfile->invalidHandle = false;
1097 	mutex_unlock(&cfile->fh_mutex);
1098 	cinode = CIFS_I(inode);
1099 
1100 	if (can_flush) {
1101 		rc = filemap_write_and_wait(inode->i_mapping);
1102 		if (!is_interrupt_error(rc))
1103 			mapping_set_error(inode->i_mapping, rc);
1104 
1105 		if (tcon->posix_extensions) {
1106 			rc = smb311_posix_get_inode_info(&inode, full_path,
1107 							 NULL, inode->i_sb, xid);
1108 		} else if (tcon->unix_ext) {
1109 			rc = cifs_get_inode_info_unix(&inode, full_path,
1110 						      inode->i_sb, xid);
1111 		} else {
1112 			rc = cifs_get_inode_info(&inode, full_path, NULL,
1113 						 inode->i_sb, xid, NULL);
1114 		}
1115 	}
1116 	/*
1117 	 * Else we are writing out data to server already and could deadlock if
1118 	 * we tried to flush data, and since we do not know if we have data that
1119 	 * would invalidate the current end of file on the server we can not go
1120 	 * to the server to get the new inode info.
1121 	 */
1122 
1123 	/*
1124 	 * If the server returned a read oplock and we have mandatory brlocks,
1125 	 * set oplock level to None.
1126 	 */
1127 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
1128 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
1129 		oplock = 0;
1130 	}
1131 
1132 	server->ops->set_fid(cfile, &cfile->fid, oplock);
1133 	if (oparms.reconnect)
1134 		cifs_relock_file(cfile);
1135 
1136 reopen_error_exit:
1137 	free_dentry_path(page);
1138 	free_xid(xid);
1139 	return rc;
1140 }
1141 
1142 void smb2_deferred_work_close(struct work_struct *work)
1143 {
1144 	struct cifsFileInfo *cfile = container_of(work,
1145 			struct cifsFileInfo, deferred.work);
1146 
1147 	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1148 	cifs_del_deferred_close(cfile);
1149 	cfile->deferred_close_scheduled = false;
1150 	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1151 	_cifsFileInfo_put(cfile, true, false);
1152 }
1153 
1154 static bool
1155 smb2_can_defer_close(struct inode *inode, struct cifs_deferred_close *dclose)
1156 {
1157 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1158 	struct cifsInodeInfo *cinode = CIFS_I(inode);
1159 
1160 	return (cifs_sb->ctx->closetimeo && cinode->lease_granted && dclose &&
1161 			(cinode->oplock == CIFS_CACHE_RHW_FLG ||
1162 			 cinode->oplock == CIFS_CACHE_RH_FLG) &&
1163 			!test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags));
1164 
1165 }
1166 
1167 int cifs_close(struct inode *inode, struct file *file)
1168 {
1169 	struct cifsFileInfo *cfile;
1170 	struct cifsInodeInfo *cinode = CIFS_I(inode);
1171 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1172 	struct cifs_deferred_close *dclose;
1173 
1174 	cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1175 
1176 	if (file->private_data != NULL) {
1177 		cfile = file->private_data;
1178 		file->private_data = NULL;
1179 		dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1180 		if ((cfile->status_file_deleted == false) &&
1181 		    (smb2_can_defer_close(inode, dclose))) {
1182 			if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1183 				inode_set_mtime_to_ts(inode,
1184 						      inode_set_ctime_current(inode));
1185 			}
1186 			spin_lock(&cinode->deferred_lock);
1187 			cifs_add_deferred_close(cfile, dclose);
1188 			if (cfile->deferred_close_scheduled &&
1189 			    delayed_work_pending(&cfile->deferred)) {
1190 				/*
1191 				 * If there is no pending work, mod_delayed_work queues new work.
1192 				 * So, Increase the ref count to avoid use-after-free.
1193 				 */
1194 				if (!mod_delayed_work(deferredclose_wq,
1195 						&cfile->deferred, cifs_sb->ctx->closetimeo))
1196 					cifsFileInfo_get(cfile);
1197 			} else {
1198 				/* Deferred close for files */
1199 				queue_delayed_work(deferredclose_wq,
1200 						&cfile->deferred, cifs_sb->ctx->closetimeo);
1201 				cfile->deferred_close_scheduled = true;
1202 				spin_unlock(&cinode->deferred_lock);
1203 				return 0;
1204 			}
1205 			spin_unlock(&cinode->deferred_lock);
1206 			_cifsFileInfo_put(cfile, true, false);
1207 		} else {
1208 			_cifsFileInfo_put(cfile, true, false);
1209 			kfree(dclose);
1210 		}
1211 	}
1212 
1213 	/* return code from the ->release op is always ignored */
1214 	return 0;
1215 }
1216 
1217 void
1218 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1219 {
1220 	struct cifsFileInfo *open_file, *tmp;
1221 	struct list_head tmp_list;
1222 
1223 	if (!tcon->use_persistent || !tcon->need_reopen_files)
1224 		return;
1225 
1226 	tcon->need_reopen_files = false;
1227 
1228 	cifs_dbg(FYI, "Reopen persistent handles\n");
1229 	INIT_LIST_HEAD(&tmp_list);
1230 
1231 	/* list all files open on tree connection, reopen resilient handles  */
1232 	spin_lock(&tcon->open_file_lock);
1233 	list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1234 		if (!open_file->invalidHandle)
1235 			continue;
1236 		cifsFileInfo_get(open_file);
1237 		list_add_tail(&open_file->rlist, &tmp_list);
1238 	}
1239 	spin_unlock(&tcon->open_file_lock);
1240 
1241 	list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1242 		if (cifs_reopen_file(open_file, false /* do not flush */))
1243 			tcon->need_reopen_files = true;
1244 		list_del_init(&open_file->rlist);
1245 		cifsFileInfo_put(open_file);
1246 	}
1247 }
1248 
1249 int cifs_closedir(struct inode *inode, struct file *file)
1250 {
1251 	int rc = 0;
1252 	unsigned int xid;
1253 	struct cifsFileInfo *cfile = file->private_data;
1254 	struct cifs_tcon *tcon;
1255 	struct TCP_Server_Info *server;
1256 	char *buf;
1257 
1258 	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1259 
1260 	if (cfile == NULL)
1261 		return rc;
1262 
1263 	xid = get_xid();
1264 	tcon = tlink_tcon(cfile->tlink);
1265 	server = tcon->ses->server;
1266 
1267 	cifs_dbg(FYI, "Freeing private data in close dir\n");
1268 	spin_lock(&cfile->file_info_lock);
1269 	if (server->ops->dir_needs_close(cfile)) {
1270 		cfile->invalidHandle = true;
1271 		spin_unlock(&cfile->file_info_lock);
1272 		if (server->ops->close_dir)
1273 			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1274 		else
1275 			rc = -ENOSYS;
1276 		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1277 		/* not much we can do if it fails anyway, ignore rc */
1278 		rc = 0;
1279 	} else
1280 		spin_unlock(&cfile->file_info_lock);
1281 
1282 	buf = cfile->srch_inf.ntwrk_buf_start;
1283 	if (buf) {
1284 		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1285 		cfile->srch_inf.ntwrk_buf_start = NULL;
1286 		if (cfile->srch_inf.smallBuf)
1287 			cifs_small_buf_release(buf);
1288 		else
1289 			cifs_buf_release(buf);
1290 	}
1291 
1292 	cifs_put_tlink(cfile->tlink);
1293 	kfree(file->private_data);
1294 	file->private_data = NULL;
1295 	/* BB can we lock the filestruct while this is going on? */
1296 	free_xid(xid);
1297 	return rc;
1298 }
1299 
1300 static struct cifsLockInfo *
1301 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1302 {
1303 	struct cifsLockInfo *lock =
1304 		kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1305 	if (!lock)
1306 		return lock;
1307 	lock->offset = offset;
1308 	lock->length = length;
1309 	lock->type = type;
1310 	lock->pid = current->tgid;
1311 	lock->flags = flags;
1312 	INIT_LIST_HEAD(&lock->blist);
1313 	init_waitqueue_head(&lock->block_q);
1314 	return lock;
1315 }
1316 
1317 void
1318 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1319 {
1320 	struct cifsLockInfo *li, *tmp;
1321 	list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1322 		list_del_init(&li->blist);
1323 		wake_up(&li->block_q);
1324 	}
1325 }
1326 
1327 #define CIFS_LOCK_OP	0
1328 #define CIFS_READ_OP	1
1329 #define CIFS_WRITE_OP	2
1330 
1331 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1332 static bool
1333 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1334 			    __u64 length, __u8 type, __u16 flags,
1335 			    struct cifsFileInfo *cfile,
1336 			    struct cifsLockInfo **conf_lock, int rw_check)
1337 {
1338 	struct cifsLockInfo *li;
1339 	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1340 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1341 
1342 	list_for_each_entry(li, &fdlocks->locks, llist) {
1343 		if (offset + length <= li->offset ||
1344 		    offset >= li->offset + li->length)
1345 			continue;
1346 		if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1347 		    server->ops->compare_fids(cfile, cur_cfile)) {
1348 			/* shared lock prevents write op through the same fid */
1349 			if (!(li->type & server->vals->shared_lock_type) ||
1350 			    rw_check != CIFS_WRITE_OP)
1351 				continue;
1352 		}
1353 		if ((type & server->vals->shared_lock_type) &&
1354 		    ((server->ops->compare_fids(cfile, cur_cfile) &&
1355 		     current->tgid == li->pid) || type == li->type))
1356 			continue;
1357 		if (rw_check == CIFS_LOCK_OP &&
1358 		    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1359 		    server->ops->compare_fids(cfile, cur_cfile))
1360 			continue;
1361 		if (conf_lock)
1362 			*conf_lock = li;
1363 		return true;
1364 	}
1365 	return false;
1366 }
1367 
1368 bool
1369 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1370 			__u8 type, __u16 flags,
1371 			struct cifsLockInfo **conf_lock, int rw_check)
1372 {
1373 	bool rc = false;
1374 	struct cifs_fid_locks *cur;
1375 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1376 
1377 	list_for_each_entry(cur, &cinode->llist, llist) {
1378 		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1379 						 flags, cfile, conf_lock,
1380 						 rw_check);
1381 		if (rc)
1382 			break;
1383 	}
1384 
1385 	return rc;
1386 }
1387 
1388 /*
1389  * Check if there is another lock that prevents us to set the lock (mandatory
1390  * style). If such a lock exists, update the flock structure with its
1391  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1392  * or leave it the same if we can't. Returns 0 if we don't need to request to
1393  * the server or 1 otherwise.
1394  */
1395 static int
1396 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1397 	       __u8 type, struct file_lock *flock)
1398 {
1399 	int rc = 0;
1400 	struct cifsLockInfo *conf_lock;
1401 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1402 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1403 	bool exist;
1404 
1405 	down_read(&cinode->lock_sem);
1406 
1407 	exist = cifs_find_lock_conflict(cfile, offset, length, type,
1408 					flock->fl_flags, &conf_lock,
1409 					CIFS_LOCK_OP);
1410 	if (exist) {
1411 		flock->fl_start = conf_lock->offset;
1412 		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1413 		flock->fl_pid = conf_lock->pid;
1414 		if (conf_lock->type & server->vals->shared_lock_type)
1415 			flock->fl_type = F_RDLCK;
1416 		else
1417 			flock->fl_type = F_WRLCK;
1418 	} else if (!cinode->can_cache_brlcks)
1419 		rc = 1;
1420 	else
1421 		flock->fl_type = F_UNLCK;
1422 
1423 	up_read(&cinode->lock_sem);
1424 	return rc;
1425 }
1426 
1427 static void
1428 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1429 {
1430 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1431 	cifs_down_write(&cinode->lock_sem);
1432 	list_add_tail(&lock->llist, &cfile->llist->locks);
1433 	up_write(&cinode->lock_sem);
1434 }
1435 
1436 /*
1437  * Set the byte-range lock (mandatory style). Returns:
1438  * 1) 0, if we set the lock and don't need to request to the server;
1439  * 2) 1, if no locks prevent us but we need to request to the server;
1440  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1441  */
1442 static int
1443 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1444 		 bool wait)
1445 {
1446 	struct cifsLockInfo *conf_lock;
1447 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1448 	bool exist;
1449 	int rc = 0;
1450 
1451 try_again:
1452 	exist = false;
1453 	cifs_down_write(&cinode->lock_sem);
1454 
1455 	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1456 					lock->type, lock->flags, &conf_lock,
1457 					CIFS_LOCK_OP);
1458 	if (!exist && cinode->can_cache_brlcks) {
1459 		list_add_tail(&lock->llist, &cfile->llist->locks);
1460 		up_write(&cinode->lock_sem);
1461 		return rc;
1462 	}
1463 
1464 	if (!exist)
1465 		rc = 1;
1466 	else if (!wait)
1467 		rc = -EACCES;
1468 	else {
1469 		list_add_tail(&lock->blist, &conf_lock->blist);
1470 		up_write(&cinode->lock_sem);
1471 		rc = wait_event_interruptible(lock->block_q,
1472 					(lock->blist.prev == &lock->blist) &&
1473 					(lock->blist.next == &lock->blist));
1474 		if (!rc)
1475 			goto try_again;
1476 		cifs_down_write(&cinode->lock_sem);
1477 		list_del_init(&lock->blist);
1478 	}
1479 
1480 	up_write(&cinode->lock_sem);
1481 	return rc;
1482 }
1483 
1484 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1485 /*
1486  * Check if there is another lock that prevents us to set the lock (posix
1487  * style). If such a lock exists, update the flock structure with its
1488  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1489  * or leave it the same if we can't. Returns 0 if we don't need to request to
1490  * the server or 1 otherwise.
1491  */
1492 static int
1493 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1494 {
1495 	int rc = 0;
1496 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1497 	unsigned char saved_type = flock->fl_type;
1498 
1499 	if ((flock->fl_flags & FL_POSIX) == 0)
1500 		return 1;
1501 
1502 	down_read(&cinode->lock_sem);
1503 	posix_test_lock(file, flock);
1504 
1505 	if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1506 		flock->fl_type = saved_type;
1507 		rc = 1;
1508 	}
1509 
1510 	up_read(&cinode->lock_sem);
1511 	return rc;
1512 }
1513 
1514 /*
1515  * Set the byte-range lock (posix style). Returns:
1516  * 1) <0, if the error occurs while setting the lock;
1517  * 2) 0, if we set the lock and don't need to request to the server;
1518  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1519  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1520  */
1521 static int
1522 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1523 {
1524 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1525 	int rc = FILE_LOCK_DEFERRED + 1;
1526 
1527 	if ((flock->fl_flags & FL_POSIX) == 0)
1528 		return rc;
1529 
1530 	cifs_down_write(&cinode->lock_sem);
1531 	if (!cinode->can_cache_brlcks) {
1532 		up_write(&cinode->lock_sem);
1533 		return rc;
1534 	}
1535 
1536 	rc = posix_lock_file(file, flock, NULL);
1537 	up_write(&cinode->lock_sem);
1538 	return rc;
1539 }
1540 
1541 int
1542 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1543 {
1544 	unsigned int xid;
1545 	int rc = 0, stored_rc;
1546 	struct cifsLockInfo *li, *tmp;
1547 	struct cifs_tcon *tcon;
1548 	unsigned int num, max_num, max_buf;
1549 	LOCKING_ANDX_RANGE *buf, *cur;
1550 	static const int types[] = {
1551 		LOCKING_ANDX_LARGE_FILES,
1552 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1553 	};
1554 	int i;
1555 
1556 	xid = get_xid();
1557 	tcon = tlink_tcon(cfile->tlink);
1558 
1559 	/*
1560 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1561 	 * and check it before using.
1562 	 */
1563 	max_buf = tcon->ses->server->maxBuf;
1564 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1565 		free_xid(xid);
1566 		return -EINVAL;
1567 	}
1568 
1569 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1570 		     PAGE_SIZE);
1571 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1572 			PAGE_SIZE);
1573 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1574 						sizeof(LOCKING_ANDX_RANGE);
1575 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1576 	if (!buf) {
1577 		free_xid(xid);
1578 		return -ENOMEM;
1579 	}
1580 
1581 	for (i = 0; i < 2; i++) {
1582 		cur = buf;
1583 		num = 0;
1584 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1585 			if (li->type != types[i])
1586 				continue;
1587 			cur->Pid = cpu_to_le16(li->pid);
1588 			cur->LengthLow = cpu_to_le32((u32)li->length);
1589 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1590 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1591 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1592 			if (++num == max_num) {
1593 				stored_rc = cifs_lockv(xid, tcon,
1594 						       cfile->fid.netfid,
1595 						       (__u8)li->type, 0, num,
1596 						       buf);
1597 				if (stored_rc)
1598 					rc = stored_rc;
1599 				cur = buf;
1600 				num = 0;
1601 			} else
1602 				cur++;
1603 		}
1604 
1605 		if (num) {
1606 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1607 					       (__u8)types[i], 0, num, buf);
1608 			if (stored_rc)
1609 				rc = stored_rc;
1610 		}
1611 	}
1612 
1613 	kfree(buf);
1614 	free_xid(xid);
1615 	return rc;
1616 }
1617 
1618 static __u32
1619 hash_lockowner(fl_owner_t owner)
1620 {
1621 	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1622 }
1623 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1624 
1625 struct lock_to_push {
1626 	struct list_head llist;
1627 	__u64 offset;
1628 	__u64 length;
1629 	__u32 pid;
1630 	__u16 netfid;
1631 	__u8 type;
1632 };
1633 
1634 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1635 static int
1636 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1637 {
1638 	struct inode *inode = d_inode(cfile->dentry);
1639 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1640 	struct file_lock *flock;
1641 	struct file_lock_context *flctx = locks_inode_context(inode);
1642 	unsigned int count = 0, i;
1643 	int rc = 0, xid, type;
1644 	struct list_head locks_to_send, *el;
1645 	struct lock_to_push *lck, *tmp;
1646 	__u64 length;
1647 
1648 	xid = get_xid();
1649 
1650 	if (!flctx)
1651 		goto out;
1652 
1653 	spin_lock(&flctx->flc_lock);
1654 	list_for_each(el, &flctx->flc_posix) {
1655 		count++;
1656 	}
1657 	spin_unlock(&flctx->flc_lock);
1658 
1659 	INIT_LIST_HEAD(&locks_to_send);
1660 
1661 	/*
1662 	 * Allocating count locks is enough because no FL_POSIX locks can be
1663 	 * added to the list while we are holding cinode->lock_sem that
1664 	 * protects locking operations of this inode.
1665 	 */
1666 	for (i = 0; i < count; i++) {
1667 		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1668 		if (!lck) {
1669 			rc = -ENOMEM;
1670 			goto err_out;
1671 		}
1672 		list_add_tail(&lck->llist, &locks_to_send);
1673 	}
1674 
1675 	el = locks_to_send.next;
1676 	spin_lock(&flctx->flc_lock);
1677 	list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1678 		if (el == &locks_to_send) {
1679 			/*
1680 			 * The list ended. We don't have enough allocated
1681 			 * structures - something is really wrong.
1682 			 */
1683 			cifs_dbg(VFS, "Can't push all brlocks!\n");
1684 			break;
1685 		}
1686 		length = cifs_flock_len(flock);
1687 		if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1688 			type = CIFS_RDLCK;
1689 		else
1690 			type = CIFS_WRLCK;
1691 		lck = list_entry(el, struct lock_to_push, llist);
1692 		lck->pid = hash_lockowner(flock->fl_owner);
1693 		lck->netfid = cfile->fid.netfid;
1694 		lck->length = length;
1695 		lck->type = type;
1696 		lck->offset = flock->fl_start;
1697 	}
1698 	spin_unlock(&flctx->flc_lock);
1699 
1700 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1701 		int stored_rc;
1702 
1703 		stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1704 					     lck->offset, lck->length, NULL,
1705 					     lck->type, 0);
1706 		if (stored_rc)
1707 			rc = stored_rc;
1708 		list_del(&lck->llist);
1709 		kfree(lck);
1710 	}
1711 
1712 out:
1713 	free_xid(xid);
1714 	return rc;
1715 err_out:
1716 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1717 		list_del(&lck->llist);
1718 		kfree(lck);
1719 	}
1720 	goto out;
1721 }
1722 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1723 
1724 static int
1725 cifs_push_locks(struct cifsFileInfo *cfile)
1726 {
1727 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1728 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1729 	int rc = 0;
1730 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1731 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1732 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1733 
1734 	/* we are going to update can_cache_brlcks here - need a write access */
1735 	cifs_down_write(&cinode->lock_sem);
1736 	if (!cinode->can_cache_brlcks) {
1737 		up_write(&cinode->lock_sem);
1738 		return rc;
1739 	}
1740 
1741 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1742 	if (cap_unix(tcon->ses) &&
1743 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1744 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1745 		rc = cifs_push_posix_locks(cfile);
1746 	else
1747 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1748 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
1749 
1750 	cinode->can_cache_brlcks = false;
1751 	up_write(&cinode->lock_sem);
1752 	return rc;
1753 }
1754 
1755 static void
1756 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1757 		bool *wait_flag, struct TCP_Server_Info *server)
1758 {
1759 	if (flock->fl_flags & FL_POSIX)
1760 		cifs_dbg(FYI, "Posix\n");
1761 	if (flock->fl_flags & FL_FLOCK)
1762 		cifs_dbg(FYI, "Flock\n");
1763 	if (flock->fl_flags & FL_SLEEP) {
1764 		cifs_dbg(FYI, "Blocking lock\n");
1765 		*wait_flag = true;
1766 	}
1767 	if (flock->fl_flags & FL_ACCESS)
1768 		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1769 	if (flock->fl_flags & FL_LEASE)
1770 		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1771 	if (flock->fl_flags &
1772 	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1773 	       FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1774 		cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1775 
1776 	*type = server->vals->large_lock_type;
1777 	if (flock->fl_type == F_WRLCK) {
1778 		cifs_dbg(FYI, "F_WRLCK\n");
1779 		*type |= server->vals->exclusive_lock_type;
1780 		*lock = 1;
1781 	} else if (flock->fl_type == F_UNLCK) {
1782 		cifs_dbg(FYI, "F_UNLCK\n");
1783 		*type |= server->vals->unlock_lock_type;
1784 		*unlock = 1;
1785 		/* Check if unlock includes more than one lock range */
1786 	} else if (flock->fl_type == F_RDLCK) {
1787 		cifs_dbg(FYI, "F_RDLCK\n");
1788 		*type |= server->vals->shared_lock_type;
1789 		*lock = 1;
1790 	} else if (flock->fl_type == F_EXLCK) {
1791 		cifs_dbg(FYI, "F_EXLCK\n");
1792 		*type |= server->vals->exclusive_lock_type;
1793 		*lock = 1;
1794 	} else if (flock->fl_type == F_SHLCK) {
1795 		cifs_dbg(FYI, "F_SHLCK\n");
1796 		*type |= server->vals->shared_lock_type;
1797 		*lock = 1;
1798 	} else
1799 		cifs_dbg(FYI, "Unknown type of lock\n");
1800 }
1801 
1802 static int
1803 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1804 	   bool wait_flag, bool posix_lck, unsigned int xid)
1805 {
1806 	int rc = 0;
1807 	__u64 length = cifs_flock_len(flock);
1808 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1809 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1810 	struct TCP_Server_Info *server = tcon->ses->server;
1811 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1812 	__u16 netfid = cfile->fid.netfid;
1813 
1814 	if (posix_lck) {
1815 		int posix_lock_type;
1816 
1817 		rc = cifs_posix_lock_test(file, flock);
1818 		if (!rc)
1819 			return rc;
1820 
1821 		if (type & server->vals->shared_lock_type)
1822 			posix_lock_type = CIFS_RDLCK;
1823 		else
1824 			posix_lock_type = CIFS_WRLCK;
1825 		rc = CIFSSMBPosixLock(xid, tcon, netfid,
1826 				      hash_lockowner(flock->fl_owner),
1827 				      flock->fl_start, length, flock,
1828 				      posix_lock_type, wait_flag);
1829 		return rc;
1830 	}
1831 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1832 
1833 	rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1834 	if (!rc)
1835 		return rc;
1836 
1837 	/* BB we could chain these into one lock request BB */
1838 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1839 				    1, 0, false);
1840 	if (rc == 0) {
1841 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1842 					    type, 0, 1, false);
1843 		flock->fl_type = F_UNLCK;
1844 		if (rc != 0)
1845 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1846 				 rc);
1847 		return 0;
1848 	}
1849 
1850 	if (type & server->vals->shared_lock_type) {
1851 		flock->fl_type = F_WRLCK;
1852 		return 0;
1853 	}
1854 
1855 	type &= ~server->vals->exclusive_lock_type;
1856 
1857 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1858 				    type | server->vals->shared_lock_type,
1859 				    1, 0, false);
1860 	if (rc == 0) {
1861 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1862 			type | server->vals->shared_lock_type, 0, 1, false);
1863 		flock->fl_type = F_RDLCK;
1864 		if (rc != 0)
1865 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1866 				 rc);
1867 	} else
1868 		flock->fl_type = F_WRLCK;
1869 
1870 	return 0;
1871 }
1872 
1873 void
1874 cifs_move_llist(struct list_head *source, struct list_head *dest)
1875 {
1876 	struct list_head *li, *tmp;
1877 	list_for_each_safe(li, tmp, source)
1878 		list_move(li, dest);
1879 }
1880 
1881 void
1882 cifs_free_llist(struct list_head *llist)
1883 {
1884 	struct cifsLockInfo *li, *tmp;
1885 	list_for_each_entry_safe(li, tmp, llist, llist) {
1886 		cifs_del_lock_waiters(li);
1887 		list_del(&li->llist);
1888 		kfree(li);
1889 	}
1890 }
1891 
1892 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1893 int
1894 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1895 		  unsigned int xid)
1896 {
1897 	int rc = 0, stored_rc;
1898 	static const int types[] = {
1899 		LOCKING_ANDX_LARGE_FILES,
1900 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1901 	};
1902 	unsigned int i;
1903 	unsigned int max_num, num, max_buf;
1904 	LOCKING_ANDX_RANGE *buf, *cur;
1905 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1906 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1907 	struct cifsLockInfo *li, *tmp;
1908 	__u64 length = cifs_flock_len(flock);
1909 	struct list_head tmp_llist;
1910 
1911 	INIT_LIST_HEAD(&tmp_llist);
1912 
1913 	/*
1914 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1915 	 * and check it before using.
1916 	 */
1917 	max_buf = tcon->ses->server->maxBuf;
1918 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1919 		return -EINVAL;
1920 
1921 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1922 		     PAGE_SIZE);
1923 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1924 			PAGE_SIZE);
1925 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1926 						sizeof(LOCKING_ANDX_RANGE);
1927 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1928 	if (!buf)
1929 		return -ENOMEM;
1930 
1931 	cifs_down_write(&cinode->lock_sem);
1932 	for (i = 0; i < 2; i++) {
1933 		cur = buf;
1934 		num = 0;
1935 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1936 			if (flock->fl_start > li->offset ||
1937 			    (flock->fl_start + length) <
1938 			    (li->offset + li->length))
1939 				continue;
1940 			if (current->tgid != li->pid)
1941 				continue;
1942 			if (types[i] != li->type)
1943 				continue;
1944 			if (cinode->can_cache_brlcks) {
1945 				/*
1946 				 * We can cache brlock requests - simply remove
1947 				 * a lock from the file's list.
1948 				 */
1949 				list_del(&li->llist);
1950 				cifs_del_lock_waiters(li);
1951 				kfree(li);
1952 				continue;
1953 			}
1954 			cur->Pid = cpu_to_le16(li->pid);
1955 			cur->LengthLow = cpu_to_le32((u32)li->length);
1956 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1957 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1958 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1959 			/*
1960 			 * We need to save a lock here to let us add it again to
1961 			 * the file's list if the unlock range request fails on
1962 			 * the server.
1963 			 */
1964 			list_move(&li->llist, &tmp_llist);
1965 			if (++num == max_num) {
1966 				stored_rc = cifs_lockv(xid, tcon,
1967 						       cfile->fid.netfid,
1968 						       li->type, num, 0, buf);
1969 				if (stored_rc) {
1970 					/*
1971 					 * We failed on the unlock range
1972 					 * request - add all locks from the tmp
1973 					 * list to the head of the file's list.
1974 					 */
1975 					cifs_move_llist(&tmp_llist,
1976 							&cfile->llist->locks);
1977 					rc = stored_rc;
1978 				} else
1979 					/*
1980 					 * The unlock range request succeed -
1981 					 * free the tmp list.
1982 					 */
1983 					cifs_free_llist(&tmp_llist);
1984 				cur = buf;
1985 				num = 0;
1986 			} else
1987 				cur++;
1988 		}
1989 		if (num) {
1990 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1991 					       types[i], num, 0, buf);
1992 			if (stored_rc) {
1993 				cifs_move_llist(&tmp_llist,
1994 						&cfile->llist->locks);
1995 				rc = stored_rc;
1996 			} else
1997 				cifs_free_llist(&tmp_llist);
1998 		}
1999 	}
2000 
2001 	up_write(&cinode->lock_sem);
2002 	kfree(buf);
2003 	return rc;
2004 }
2005 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
2006 
2007 static int
2008 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
2009 	   bool wait_flag, bool posix_lck, int lock, int unlock,
2010 	   unsigned int xid)
2011 {
2012 	int rc = 0;
2013 	__u64 length = cifs_flock_len(flock);
2014 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2015 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2016 	struct TCP_Server_Info *server = tcon->ses->server;
2017 	struct inode *inode = d_inode(cfile->dentry);
2018 
2019 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
2020 	if (posix_lck) {
2021 		int posix_lock_type;
2022 
2023 		rc = cifs_posix_lock_set(file, flock);
2024 		if (rc <= FILE_LOCK_DEFERRED)
2025 			return rc;
2026 
2027 		if (type & server->vals->shared_lock_type)
2028 			posix_lock_type = CIFS_RDLCK;
2029 		else
2030 			posix_lock_type = CIFS_WRLCK;
2031 
2032 		if (unlock == 1)
2033 			posix_lock_type = CIFS_UNLCK;
2034 
2035 		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
2036 				      hash_lockowner(flock->fl_owner),
2037 				      flock->fl_start, length,
2038 				      NULL, posix_lock_type, wait_flag);
2039 		goto out;
2040 	}
2041 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
2042 	if (lock) {
2043 		struct cifsLockInfo *lock;
2044 
2045 		lock = cifs_lock_init(flock->fl_start, length, type,
2046 				      flock->fl_flags);
2047 		if (!lock)
2048 			return -ENOMEM;
2049 
2050 		rc = cifs_lock_add_if(cfile, lock, wait_flag);
2051 		if (rc < 0) {
2052 			kfree(lock);
2053 			return rc;
2054 		}
2055 		if (!rc)
2056 			goto out;
2057 
2058 		/*
2059 		 * Windows 7 server can delay breaking lease from read to None
2060 		 * if we set a byte-range lock on a file - break it explicitly
2061 		 * before sending the lock to the server to be sure the next
2062 		 * read won't conflict with non-overlapted locks due to
2063 		 * pagereading.
2064 		 */
2065 		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
2066 					CIFS_CACHE_READ(CIFS_I(inode))) {
2067 			cifs_zap_mapping(inode);
2068 			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
2069 				 inode);
2070 			CIFS_I(inode)->oplock = 0;
2071 		}
2072 
2073 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
2074 					    type, 1, 0, wait_flag);
2075 		if (rc) {
2076 			kfree(lock);
2077 			return rc;
2078 		}
2079 
2080 		cifs_lock_add(cfile, lock);
2081 	} else if (unlock)
2082 		rc = server->ops->mand_unlock_range(cfile, flock, xid);
2083 
2084 out:
2085 	if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
2086 		/*
2087 		 * If this is a request to remove all locks because we
2088 		 * are closing the file, it doesn't matter if the
2089 		 * unlocking failed as both cifs.ko and the SMB server
2090 		 * remove the lock on file close
2091 		 */
2092 		if (rc) {
2093 			cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
2094 			if (!(flock->fl_flags & FL_CLOSE))
2095 				return rc;
2096 		}
2097 		rc = locks_lock_file_wait(file, flock);
2098 	}
2099 	return rc;
2100 }
2101 
2102 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
2103 {
2104 	int rc, xid;
2105 	int lock = 0, unlock = 0;
2106 	bool wait_flag = false;
2107 	bool posix_lck = false;
2108 	struct cifs_sb_info *cifs_sb;
2109 	struct cifs_tcon *tcon;
2110 	struct cifsFileInfo *cfile;
2111 	__u32 type;
2112 
2113 	xid = get_xid();
2114 
2115 	if (!(fl->fl_flags & FL_FLOCK)) {
2116 		rc = -ENOLCK;
2117 		free_xid(xid);
2118 		return rc;
2119 	}
2120 
2121 	cfile = (struct cifsFileInfo *)file->private_data;
2122 	tcon = tlink_tcon(cfile->tlink);
2123 
2124 	cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
2125 			tcon->ses->server);
2126 	cifs_sb = CIFS_FILE_SB(file);
2127 
2128 	if (cap_unix(tcon->ses) &&
2129 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2130 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2131 		posix_lck = true;
2132 
2133 	if (!lock && !unlock) {
2134 		/*
2135 		 * if no lock or unlock then nothing to do since we do not
2136 		 * know what it is
2137 		 */
2138 		rc = -EOPNOTSUPP;
2139 		free_xid(xid);
2140 		return rc;
2141 	}
2142 
2143 	rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2144 			xid);
2145 	free_xid(xid);
2146 	return rc;
2147 
2148 
2149 }
2150 
2151 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2152 {
2153 	int rc, xid;
2154 	int lock = 0, unlock = 0;
2155 	bool wait_flag = false;
2156 	bool posix_lck = false;
2157 	struct cifs_sb_info *cifs_sb;
2158 	struct cifs_tcon *tcon;
2159 	struct cifsFileInfo *cfile;
2160 	__u32 type;
2161 
2162 	rc = -EACCES;
2163 	xid = get_xid();
2164 
2165 	cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2166 		 flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
2167 		 (long long)flock->fl_end);
2168 
2169 	cfile = (struct cifsFileInfo *)file->private_data;
2170 	tcon = tlink_tcon(cfile->tlink);
2171 
2172 	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2173 			tcon->ses->server);
2174 	cifs_sb = CIFS_FILE_SB(file);
2175 	set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2176 
2177 	if (cap_unix(tcon->ses) &&
2178 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2179 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2180 		posix_lck = true;
2181 	/*
2182 	 * BB add code here to normalize offset and length to account for
2183 	 * negative length which we can not accept over the wire.
2184 	 */
2185 	if (IS_GETLK(cmd)) {
2186 		rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2187 		free_xid(xid);
2188 		return rc;
2189 	}
2190 
2191 	if (!lock && !unlock) {
2192 		/*
2193 		 * if no lock or unlock then nothing to do since we do not
2194 		 * know what it is
2195 		 */
2196 		free_xid(xid);
2197 		return -EOPNOTSUPP;
2198 	}
2199 
2200 	rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2201 			xid);
2202 	free_xid(xid);
2203 	return rc;
2204 }
2205 
2206 /*
2207  * update the file size (if needed) after a write. Should be called with
2208  * the inode->i_lock held
2209  */
2210 void
2211 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2212 		      unsigned int bytes_written)
2213 {
2214 	loff_t end_of_write = offset + bytes_written;
2215 
2216 	if (end_of_write > cifsi->server_eof)
2217 		cifsi->server_eof = end_of_write;
2218 }
2219 
2220 static ssize_t
2221 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2222 	   size_t write_size, loff_t *offset)
2223 {
2224 	int rc = 0;
2225 	unsigned int bytes_written = 0;
2226 	unsigned int total_written;
2227 	struct cifs_tcon *tcon;
2228 	struct TCP_Server_Info *server;
2229 	unsigned int xid;
2230 	struct dentry *dentry = open_file->dentry;
2231 	struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2232 	struct cifs_io_parms io_parms = {0};
2233 
2234 	cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2235 		 write_size, *offset, dentry);
2236 
2237 	tcon = tlink_tcon(open_file->tlink);
2238 	server = tcon->ses->server;
2239 
2240 	if (!server->ops->sync_write)
2241 		return -ENOSYS;
2242 
2243 	xid = get_xid();
2244 
2245 	for (total_written = 0; write_size > total_written;
2246 	     total_written += bytes_written) {
2247 		rc = -EAGAIN;
2248 		while (rc == -EAGAIN) {
2249 			struct kvec iov[2];
2250 			unsigned int len;
2251 
2252 			if (open_file->invalidHandle) {
2253 				/* we could deadlock if we called
2254 				   filemap_fdatawait from here so tell
2255 				   reopen_file not to flush data to
2256 				   server now */
2257 				rc = cifs_reopen_file(open_file, false);
2258 				if (rc != 0)
2259 					break;
2260 			}
2261 
2262 			len = min(server->ops->wp_retry_size(d_inode(dentry)),
2263 				  (unsigned int)write_size - total_written);
2264 			/* iov[0] is reserved for smb header */
2265 			iov[1].iov_base = (char *)write_data + total_written;
2266 			iov[1].iov_len = len;
2267 			io_parms.pid = pid;
2268 			io_parms.tcon = tcon;
2269 			io_parms.offset = *offset;
2270 			io_parms.length = len;
2271 			rc = server->ops->sync_write(xid, &open_file->fid,
2272 					&io_parms, &bytes_written, iov, 1);
2273 		}
2274 		if (rc || (bytes_written == 0)) {
2275 			if (total_written)
2276 				break;
2277 			else {
2278 				free_xid(xid);
2279 				return rc;
2280 			}
2281 		} else {
2282 			spin_lock(&d_inode(dentry)->i_lock);
2283 			cifs_update_eof(cifsi, *offset, bytes_written);
2284 			spin_unlock(&d_inode(dentry)->i_lock);
2285 			*offset += bytes_written;
2286 		}
2287 	}
2288 
2289 	cifs_stats_bytes_written(tcon, total_written);
2290 
2291 	if (total_written > 0) {
2292 		spin_lock(&d_inode(dentry)->i_lock);
2293 		if (*offset > d_inode(dentry)->i_size) {
2294 			i_size_write(d_inode(dentry), *offset);
2295 			d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2296 		}
2297 		spin_unlock(&d_inode(dentry)->i_lock);
2298 	}
2299 	mark_inode_dirty_sync(d_inode(dentry));
2300 	free_xid(xid);
2301 	return total_written;
2302 }
2303 
2304 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2305 					bool fsuid_only)
2306 {
2307 	struct cifsFileInfo *open_file = NULL;
2308 	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2309 
2310 	/* only filter by fsuid on multiuser mounts */
2311 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2312 		fsuid_only = false;
2313 
2314 	spin_lock(&cifs_inode->open_file_lock);
2315 	/* we could simply get the first_list_entry since write-only entries
2316 	   are always at the end of the list but since the first entry might
2317 	   have a close pending, we go through the whole list */
2318 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2319 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2320 			continue;
2321 		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2322 			if ((!open_file->invalidHandle)) {
2323 				/* found a good file */
2324 				/* lock it so it will not be closed on us */
2325 				cifsFileInfo_get(open_file);
2326 				spin_unlock(&cifs_inode->open_file_lock);
2327 				return open_file;
2328 			} /* else might as well continue, and look for
2329 			     another, or simply have the caller reopen it
2330 			     again rather than trying to fix this handle */
2331 		} else /* write only file */
2332 			break; /* write only files are last so must be done */
2333 	}
2334 	spin_unlock(&cifs_inode->open_file_lock);
2335 	return NULL;
2336 }
2337 
2338 /* Return -EBADF if no handle is found and general rc otherwise */
2339 int
2340 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2341 		       struct cifsFileInfo **ret_file)
2342 {
2343 	struct cifsFileInfo *open_file, *inv_file = NULL;
2344 	struct cifs_sb_info *cifs_sb;
2345 	bool any_available = false;
2346 	int rc = -EBADF;
2347 	unsigned int refind = 0;
2348 	bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2349 	bool with_delete = flags & FIND_WR_WITH_DELETE;
2350 	*ret_file = NULL;
2351 
2352 	/*
2353 	 * Having a null inode here (because mapping->host was set to zero by
2354 	 * the VFS or MM) should not happen but we had reports of on oops (due
2355 	 * to it being zero) during stress testcases so we need to check for it
2356 	 */
2357 
2358 	if (cifs_inode == NULL) {
2359 		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2360 		dump_stack();
2361 		return rc;
2362 	}
2363 
2364 	cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2365 
2366 	/* only filter by fsuid on multiuser mounts */
2367 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2368 		fsuid_only = false;
2369 
2370 	spin_lock(&cifs_inode->open_file_lock);
2371 refind_writable:
2372 	if (refind > MAX_REOPEN_ATT) {
2373 		spin_unlock(&cifs_inode->open_file_lock);
2374 		return rc;
2375 	}
2376 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2377 		if (!any_available && open_file->pid != current->tgid)
2378 			continue;
2379 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2380 			continue;
2381 		if (with_delete && !(open_file->fid.access & DELETE))
2382 			continue;
2383 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2384 			if (!open_file->invalidHandle) {
2385 				/* found a good writable file */
2386 				cifsFileInfo_get(open_file);
2387 				spin_unlock(&cifs_inode->open_file_lock);
2388 				*ret_file = open_file;
2389 				return 0;
2390 			} else {
2391 				if (!inv_file)
2392 					inv_file = open_file;
2393 			}
2394 		}
2395 	}
2396 	/* couldn't find useable FH with same pid, try any available */
2397 	if (!any_available) {
2398 		any_available = true;
2399 		goto refind_writable;
2400 	}
2401 
2402 	if (inv_file) {
2403 		any_available = false;
2404 		cifsFileInfo_get(inv_file);
2405 	}
2406 
2407 	spin_unlock(&cifs_inode->open_file_lock);
2408 
2409 	if (inv_file) {
2410 		rc = cifs_reopen_file(inv_file, false);
2411 		if (!rc) {
2412 			*ret_file = inv_file;
2413 			return 0;
2414 		}
2415 
2416 		spin_lock(&cifs_inode->open_file_lock);
2417 		list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2418 		spin_unlock(&cifs_inode->open_file_lock);
2419 		cifsFileInfo_put(inv_file);
2420 		++refind;
2421 		inv_file = NULL;
2422 		spin_lock(&cifs_inode->open_file_lock);
2423 		goto refind_writable;
2424 	}
2425 
2426 	return rc;
2427 }
2428 
2429 struct cifsFileInfo *
2430 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2431 {
2432 	struct cifsFileInfo *cfile;
2433 	int rc;
2434 
2435 	rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2436 	if (rc)
2437 		cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2438 
2439 	return cfile;
2440 }
2441 
2442 int
2443 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2444 		       int flags,
2445 		       struct cifsFileInfo **ret_file)
2446 {
2447 	struct cifsFileInfo *cfile;
2448 	void *page = alloc_dentry_path();
2449 
2450 	*ret_file = NULL;
2451 
2452 	spin_lock(&tcon->open_file_lock);
2453 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2454 		struct cifsInodeInfo *cinode;
2455 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2456 		if (IS_ERR(full_path)) {
2457 			spin_unlock(&tcon->open_file_lock);
2458 			free_dentry_path(page);
2459 			return PTR_ERR(full_path);
2460 		}
2461 		if (strcmp(full_path, name))
2462 			continue;
2463 
2464 		cinode = CIFS_I(d_inode(cfile->dentry));
2465 		spin_unlock(&tcon->open_file_lock);
2466 		free_dentry_path(page);
2467 		return cifs_get_writable_file(cinode, flags, ret_file);
2468 	}
2469 
2470 	spin_unlock(&tcon->open_file_lock);
2471 	free_dentry_path(page);
2472 	return -ENOENT;
2473 }
2474 
2475 int
2476 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2477 		       struct cifsFileInfo **ret_file)
2478 {
2479 	struct cifsFileInfo *cfile;
2480 	void *page = alloc_dentry_path();
2481 
2482 	*ret_file = NULL;
2483 
2484 	spin_lock(&tcon->open_file_lock);
2485 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2486 		struct cifsInodeInfo *cinode;
2487 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2488 		if (IS_ERR(full_path)) {
2489 			spin_unlock(&tcon->open_file_lock);
2490 			free_dentry_path(page);
2491 			return PTR_ERR(full_path);
2492 		}
2493 		if (strcmp(full_path, name))
2494 			continue;
2495 
2496 		cinode = CIFS_I(d_inode(cfile->dentry));
2497 		spin_unlock(&tcon->open_file_lock);
2498 		free_dentry_path(page);
2499 		*ret_file = find_readable_file(cinode, 0);
2500 		return *ret_file ? 0 : -ENOENT;
2501 	}
2502 
2503 	spin_unlock(&tcon->open_file_lock);
2504 	free_dentry_path(page);
2505 	return -ENOENT;
2506 }
2507 
2508 void
2509 cifs_writedata_release(struct kref *refcount)
2510 {
2511 	struct cifs_writedata *wdata = container_of(refcount,
2512 					struct cifs_writedata, refcount);
2513 #ifdef CONFIG_CIFS_SMB_DIRECT
2514 	if (wdata->mr) {
2515 		smbd_deregister_mr(wdata->mr);
2516 		wdata->mr = NULL;
2517 	}
2518 #endif
2519 
2520 	if (wdata->cfile)
2521 		cifsFileInfo_put(wdata->cfile);
2522 
2523 	kfree(wdata);
2524 }
2525 
2526 /*
2527  * Write failed with a retryable error. Resend the write request. It's also
2528  * possible that the page was redirtied so re-clean the page.
2529  */
2530 static void
2531 cifs_writev_requeue(struct cifs_writedata *wdata)
2532 {
2533 	int rc = 0;
2534 	struct inode *inode = d_inode(wdata->cfile->dentry);
2535 	struct TCP_Server_Info *server;
2536 	unsigned int rest_len = wdata->bytes;
2537 	loff_t fpos = wdata->offset;
2538 
2539 	server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2540 	do {
2541 		struct cifs_writedata *wdata2;
2542 		unsigned int wsize, cur_len;
2543 
2544 		wsize = server->ops->wp_retry_size(inode);
2545 		if (wsize < rest_len) {
2546 			if (wsize < PAGE_SIZE) {
2547 				rc = -EOPNOTSUPP;
2548 				break;
2549 			}
2550 			cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
2551 		} else {
2552 			cur_len = rest_len;
2553 		}
2554 
2555 		wdata2 = cifs_writedata_alloc(cifs_writev_complete);
2556 		if (!wdata2) {
2557 			rc = -ENOMEM;
2558 			break;
2559 		}
2560 
2561 		wdata2->sync_mode = wdata->sync_mode;
2562 		wdata2->offset	= fpos;
2563 		wdata2->bytes	= cur_len;
2564 		wdata2->iter	= wdata->iter;
2565 
2566 		iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
2567 		iov_iter_truncate(&wdata2->iter, wdata2->bytes);
2568 
2569 		if (iov_iter_is_xarray(&wdata2->iter))
2570 			/* Check for pages having been redirtied and clean
2571 			 * them.  We can do this by walking the xarray.  If
2572 			 * it's not an xarray, then it's a DIO and we shouldn't
2573 			 * be mucking around with the page bits.
2574 			 */
2575 			cifs_undirty_folios(inode, fpos, cur_len);
2576 
2577 		rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2578 					    &wdata2->cfile);
2579 		if (!wdata2->cfile) {
2580 			cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2581 				 rc);
2582 			if (!is_retryable_error(rc))
2583 				rc = -EBADF;
2584 		} else {
2585 			wdata2->pid = wdata2->cfile->pid;
2586 			rc = server->ops->async_writev(wdata2,
2587 						       cifs_writedata_release);
2588 		}
2589 
2590 		kref_put(&wdata2->refcount, cifs_writedata_release);
2591 		if (rc) {
2592 			if (is_retryable_error(rc))
2593 				continue;
2594 			fpos += cur_len;
2595 			rest_len -= cur_len;
2596 			break;
2597 		}
2598 
2599 		fpos += cur_len;
2600 		rest_len -= cur_len;
2601 	} while (rest_len > 0);
2602 
2603 	/* Clean up remaining pages from the original wdata */
2604 	if (iov_iter_is_xarray(&wdata->iter))
2605 		cifs_pages_write_failed(inode, fpos, rest_len);
2606 
2607 	if (rc != 0 && !is_retryable_error(rc))
2608 		mapping_set_error(inode->i_mapping, rc);
2609 	kref_put(&wdata->refcount, cifs_writedata_release);
2610 }
2611 
2612 void
2613 cifs_writev_complete(struct work_struct *work)
2614 {
2615 	struct cifs_writedata *wdata = container_of(work,
2616 						struct cifs_writedata, work);
2617 	struct inode *inode = d_inode(wdata->cfile->dentry);
2618 
2619 	if (wdata->result == 0) {
2620 		spin_lock(&inode->i_lock);
2621 		cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2622 		spin_unlock(&inode->i_lock);
2623 		cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2624 					 wdata->bytes);
2625 	} else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2626 		return cifs_writev_requeue(wdata);
2627 
2628 	if (wdata->result == -EAGAIN)
2629 		cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
2630 	else if (wdata->result < 0)
2631 		cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
2632 	else
2633 		cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
2634 
2635 	if (wdata->result != -EAGAIN)
2636 		mapping_set_error(inode->i_mapping, wdata->result);
2637 	kref_put(&wdata->refcount, cifs_writedata_release);
2638 }
2639 
2640 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
2641 {
2642 	struct cifs_writedata *wdata;
2643 
2644 	wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2645 	if (wdata != NULL) {
2646 		kref_init(&wdata->refcount);
2647 		INIT_LIST_HEAD(&wdata->list);
2648 		init_completion(&wdata->done);
2649 		INIT_WORK(&wdata->work, complete);
2650 	}
2651 	return wdata;
2652 }
2653 
2654 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2655 {
2656 	struct address_space *mapping = page->mapping;
2657 	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2658 	char *write_data;
2659 	int rc = -EFAULT;
2660 	int bytes_written = 0;
2661 	struct inode *inode;
2662 	struct cifsFileInfo *open_file;
2663 
2664 	if (!mapping || !mapping->host)
2665 		return -EFAULT;
2666 
2667 	inode = page->mapping->host;
2668 
2669 	offset += (loff_t)from;
2670 	write_data = kmap(page);
2671 	write_data += from;
2672 
2673 	if ((to > PAGE_SIZE) || (from > to)) {
2674 		kunmap(page);
2675 		return -EIO;
2676 	}
2677 
2678 	/* racing with truncate? */
2679 	if (offset > mapping->host->i_size) {
2680 		kunmap(page);
2681 		return 0; /* don't care */
2682 	}
2683 
2684 	/* check to make sure that we are not extending the file */
2685 	if (mapping->host->i_size - offset < (loff_t)to)
2686 		to = (unsigned)(mapping->host->i_size - offset);
2687 
2688 	rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2689 				    &open_file);
2690 	if (!rc) {
2691 		bytes_written = cifs_write(open_file, open_file->pid,
2692 					   write_data, to - from, &offset);
2693 		cifsFileInfo_put(open_file);
2694 		/* Does mm or vfs already set times? */
2695 		simple_inode_init_ts(inode);
2696 		if ((bytes_written > 0) && (offset))
2697 			rc = 0;
2698 		else if (bytes_written < 0)
2699 			rc = bytes_written;
2700 		else
2701 			rc = -EFAULT;
2702 	} else {
2703 		cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2704 		if (!is_retryable_error(rc))
2705 			rc = -EIO;
2706 	}
2707 
2708 	kunmap(page);
2709 	return rc;
2710 }
2711 
2712 /*
2713  * Extend the region to be written back to include subsequent contiguously
2714  * dirty pages if possible, but don't sleep while doing so.
2715  */
2716 static void cifs_extend_writeback(struct address_space *mapping,
2717 				  struct xa_state *xas,
2718 				  long *_count,
2719 				  loff_t start,
2720 				  int max_pages,
2721 				  loff_t max_len,
2722 				  size_t *_len)
2723 {
2724 	struct folio_batch batch;
2725 	struct folio *folio;
2726 	unsigned int nr_pages;
2727 	pgoff_t index = (start + *_len) / PAGE_SIZE;
2728 	size_t len;
2729 	bool stop = true;
2730 	unsigned int i;
2731 
2732 	folio_batch_init(&batch);
2733 
2734 	do {
2735 		/* Firstly, we gather up a batch of contiguous dirty pages
2736 		 * under the RCU read lock - but we can't clear the dirty flags
2737 		 * there if any of those pages are mapped.
2738 		 */
2739 		rcu_read_lock();
2740 
2741 		xas_for_each(xas, folio, ULONG_MAX) {
2742 			stop = true;
2743 			if (xas_retry(xas, folio))
2744 				continue;
2745 			if (xa_is_value(folio))
2746 				break;
2747 			if (folio->index != index) {
2748 				xas_reset(xas);
2749 				break;
2750 			}
2751 
2752 			if (!folio_try_get(folio)) {
2753 				xas_reset(xas);
2754 				continue;
2755 			}
2756 			nr_pages = folio_nr_pages(folio);
2757 			if (nr_pages > max_pages) {
2758 				xas_reset(xas);
2759 				break;
2760 			}
2761 
2762 			/* Has the page moved or been split? */
2763 			if (unlikely(folio != xas_reload(xas))) {
2764 				folio_put(folio);
2765 				xas_reset(xas);
2766 				break;
2767 			}
2768 
2769 			if (!folio_trylock(folio)) {
2770 				folio_put(folio);
2771 				xas_reset(xas);
2772 				break;
2773 			}
2774 			if (!folio_test_dirty(folio) ||
2775 			    folio_test_writeback(folio)) {
2776 				folio_unlock(folio);
2777 				folio_put(folio);
2778 				xas_reset(xas);
2779 				break;
2780 			}
2781 
2782 			max_pages -= nr_pages;
2783 			len = folio_size(folio);
2784 			stop = false;
2785 
2786 			index += nr_pages;
2787 			*_count -= nr_pages;
2788 			*_len += len;
2789 			if (max_pages <= 0 || *_len >= max_len || *_count <= 0)
2790 				stop = true;
2791 
2792 			if (!folio_batch_add(&batch, folio))
2793 				break;
2794 			if (stop)
2795 				break;
2796 		}
2797 
2798 		xas_pause(xas);
2799 		rcu_read_unlock();
2800 
2801 		/* Now, if we obtained any pages, we can shift them to being
2802 		 * writable and mark them for caching.
2803 		 */
2804 		if (!folio_batch_count(&batch))
2805 			break;
2806 
2807 		for (i = 0; i < folio_batch_count(&batch); i++) {
2808 			folio = batch.folios[i];
2809 			/* The folio should be locked, dirty and not undergoing
2810 			 * writeback from the loop above.
2811 			 */
2812 			if (!folio_clear_dirty_for_io(folio))
2813 				WARN_ON(1);
2814 			folio_start_writeback(folio);
2815 			folio_unlock(folio);
2816 		}
2817 
2818 		folio_batch_release(&batch);
2819 		cond_resched();
2820 	} while (!stop);
2821 }
2822 
2823 /*
2824  * Write back the locked page and any subsequent non-locked dirty pages.
2825  */
2826 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
2827 						 struct writeback_control *wbc,
2828 						 struct xa_state *xas,
2829 						 struct folio *folio,
2830 						 unsigned long long start,
2831 						 unsigned long long end)
2832 {
2833 	struct inode *inode = mapping->host;
2834 	struct TCP_Server_Info *server;
2835 	struct cifs_writedata *wdata;
2836 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2837 	struct cifs_credits credits_on_stack;
2838 	struct cifs_credits *credits = &credits_on_stack;
2839 	struct cifsFileInfo *cfile = NULL;
2840 	unsigned long long i_size = i_size_read(inode), max_len;
2841 	unsigned int xid, wsize;
2842 	size_t len = folio_size(folio);
2843 	long count = wbc->nr_to_write;
2844 	int rc;
2845 
2846 	/* The folio should be locked, dirty and not undergoing writeback. */
2847 	if (!folio_clear_dirty_for_io(folio))
2848 		WARN_ON_ONCE(1);
2849 	folio_start_writeback(folio);
2850 
2851 	count -= folio_nr_pages(folio);
2852 
2853 	xid = get_xid();
2854 	server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2855 
2856 	rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2857 	if (rc) {
2858 		cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
2859 		goto err_xid;
2860 	}
2861 
2862 	rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2863 					   &wsize, credits);
2864 	if (rc != 0)
2865 		goto err_close;
2866 
2867 	wdata = cifs_writedata_alloc(cifs_writev_complete);
2868 	if (!wdata) {
2869 		rc = -ENOMEM;
2870 		goto err_uncredit;
2871 	}
2872 
2873 	wdata->sync_mode = wbc->sync_mode;
2874 	wdata->offset = folio_pos(folio);
2875 	wdata->pid = cfile->pid;
2876 	wdata->credits = credits_on_stack;
2877 	wdata->cfile = cfile;
2878 	wdata->server = server;
2879 	cfile = NULL;
2880 
2881 	/* Find all consecutive lockable dirty pages that have contiguous
2882 	 * written regions, stopping when we find a page that is not
2883 	 * immediately lockable, is not dirty or is missing, or we reach the
2884 	 * end of the range.
2885 	 */
2886 	if (start < i_size) {
2887 		/* Trim the write to the EOF; the extra data is ignored.  Also
2888 		 * put an upper limit on the size of a single storedata op.
2889 		 */
2890 		max_len = wsize;
2891 		max_len = min_t(unsigned long long, max_len, end - start + 1);
2892 		max_len = min_t(unsigned long long, max_len, i_size - start);
2893 
2894 		if (len < max_len) {
2895 			int max_pages = INT_MAX;
2896 
2897 #ifdef CONFIG_CIFS_SMB_DIRECT
2898 			if (server->smbd_conn)
2899 				max_pages = server->smbd_conn->max_frmr_depth;
2900 #endif
2901 			max_pages -= folio_nr_pages(folio);
2902 
2903 			if (max_pages > 0)
2904 				cifs_extend_writeback(mapping, xas, &count, start,
2905 						      max_pages, max_len, &len);
2906 		}
2907 	}
2908 	len = min_t(unsigned long long, len, i_size - start);
2909 
2910 	/* We now have a contiguous set of dirty pages, each with writeback
2911 	 * set; the first page is still locked at this point, but all the rest
2912 	 * have been unlocked.
2913 	 */
2914 	folio_unlock(folio);
2915 	wdata->bytes = len;
2916 
2917 	if (start < i_size) {
2918 		iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
2919 				start, len);
2920 
2921 		rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2922 		if (rc)
2923 			goto err_wdata;
2924 
2925 		if (wdata->cfile->invalidHandle)
2926 			rc = -EAGAIN;
2927 		else
2928 			rc = wdata->server->ops->async_writev(wdata,
2929 							      cifs_writedata_release);
2930 		if (rc >= 0) {
2931 			kref_put(&wdata->refcount, cifs_writedata_release);
2932 			goto err_close;
2933 		}
2934 	} else {
2935 		/* The dirty region was entirely beyond the EOF. */
2936 		cifs_pages_written_back(inode, start, len);
2937 		rc = 0;
2938 	}
2939 
2940 err_wdata:
2941 	kref_put(&wdata->refcount, cifs_writedata_release);
2942 err_uncredit:
2943 	add_credits_and_wake_if(server, credits, 0);
2944 err_close:
2945 	if (cfile)
2946 		cifsFileInfo_put(cfile);
2947 err_xid:
2948 	free_xid(xid);
2949 	if (rc == 0) {
2950 		wbc->nr_to_write = count;
2951 		rc = len;
2952 	} else if (is_retryable_error(rc)) {
2953 		cifs_pages_write_redirty(inode, start, len);
2954 	} else {
2955 		cifs_pages_write_failed(inode, start, len);
2956 		mapping_set_error(mapping, rc);
2957 	}
2958 	/* Indication to update ctime and mtime as close is deferred */
2959 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2960 	return rc;
2961 }
2962 
2963 /*
2964  * write a region of pages back to the server
2965  */
2966 static ssize_t cifs_writepages_begin(struct address_space *mapping,
2967 				     struct writeback_control *wbc,
2968 				     struct xa_state *xas,
2969 				     unsigned long long *_start,
2970 				     unsigned long long end)
2971 {
2972 	struct folio *folio;
2973 	unsigned long long start = *_start;
2974 	ssize_t ret;
2975 	int skips = 0;
2976 
2977 search_again:
2978 	/* Find the first dirty page. */
2979 	rcu_read_lock();
2980 
2981 	for (;;) {
2982 		folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY);
2983 		if (xas_retry(xas, folio) || xa_is_value(folio))
2984 			continue;
2985 		if (!folio)
2986 			break;
2987 
2988 		if (!folio_try_get(folio)) {
2989 			xas_reset(xas);
2990 			continue;
2991 		}
2992 
2993 		if (unlikely(folio != xas_reload(xas))) {
2994 			folio_put(folio);
2995 			xas_reset(xas);
2996 			continue;
2997 		}
2998 
2999 		xas_pause(xas);
3000 		break;
3001 	}
3002 	rcu_read_unlock();
3003 	if (!folio)
3004 		return 0;
3005 
3006 	start = folio_pos(folio); /* May regress with THPs */
3007 
3008 	/* At this point we hold neither the i_pages lock nor the page lock:
3009 	 * the page may be truncated or invalidated (changing page->mapping to
3010 	 * NULL), or even swizzled back from swapper_space to tmpfs file
3011 	 * mapping
3012 	 */
3013 lock_again:
3014 	if (wbc->sync_mode != WB_SYNC_NONE) {
3015 		ret = folio_lock_killable(folio);
3016 		if (ret < 0)
3017 			return ret;
3018 	} else {
3019 		if (!folio_trylock(folio))
3020 			goto search_again;
3021 	}
3022 
3023 	if (folio->mapping != mapping ||
3024 	    !folio_test_dirty(folio)) {
3025 		start += folio_size(folio);
3026 		folio_unlock(folio);
3027 		goto search_again;
3028 	}
3029 
3030 	if (folio_test_writeback(folio) ||
3031 	    folio_test_fscache(folio)) {
3032 		folio_unlock(folio);
3033 		if (wbc->sync_mode != WB_SYNC_NONE) {
3034 			folio_wait_writeback(folio);
3035 #ifdef CONFIG_CIFS_FSCACHE
3036 			folio_wait_fscache(folio);
3037 #endif
3038 			goto lock_again;
3039 		}
3040 
3041 		start += folio_size(folio);
3042 		if (wbc->sync_mode == WB_SYNC_NONE) {
3043 			if (skips >= 5 || need_resched()) {
3044 				ret = 0;
3045 				goto out;
3046 			}
3047 			skips++;
3048 		}
3049 		goto search_again;
3050 	}
3051 
3052 	ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end);
3053 out:
3054 	if (ret > 0)
3055 		*_start = start + ret;
3056 	return ret;
3057 }
3058 
3059 /*
3060  * Write a region of pages back to the server
3061  */
3062 static int cifs_writepages_region(struct address_space *mapping,
3063 				  struct writeback_control *wbc,
3064 				  unsigned long long *_start,
3065 				  unsigned long long end)
3066 {
3067 	ssize_t ret;
3068 
3069 	XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE);
3070 
3071 	do {
3072 		ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end);
3073 		if (ret > 0 && wbc->nr_to_write > 0)
3074 			cond_resched();
3075 	} while (ret > 0 && wbc->nr_to_write > 0);
3076 
3077 	return ret > 0 ? 0 : ret;
3078 }
3079 
3080 /*
3081  * Write some of the pending data back to the server
3082  */
3083 static int cifs_writepages(struct address_space *mapping,
3084 			   struct writeback_control *wbc)
3085 {
3086 	loff_t start, end;
3087 	int ret;
3088 
3089 	/* We have to be careful as we can end up racing with setattr()
3090 	 * truncating the pagecache since the caller doesn't take a lock here
3091 	 * to prevent it.
3092 	 */
3093 
3094 	if (wbc->range_cyclic && mapping->writeback_index) {
3095 		start = mapping->writeback_index * PAGE_SIZE;
3096 		ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3097 		if (ret < 0)
3098 			goto out;
3099 
3100 		if (wbc->nr_to_write <= 0) {
3101 			mapping->writeback_index = start / PAGE_SIZE;
3102 			goto out;
3103 		}
3104 
3105 		start = 0;
3106 		end = mapping->writeback_index * PAGE_SIZE;
3107 		mapping->writeback_index = 0;
3108 		ret = cifs_writepages_region(mapping, wbc, &start, end);
3109 		if (ret == 0)
3110 			mapping->writeback_index = start / PAGE_SIZE;
3111 	} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
3112 		start = 0;
3113 		ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3114 		if (wbc->nr_to_write > 0 && ret == 0)
3115 			mapping->writeback_index = start / PAGE_SIZE;
3116 	} else {
3117 		start = wbc->range_start;
3118 		ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end);
3119 	}
3120 
3121 out:
3122 	return ret;
3123 }
3124 
3125 static int
3126 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
3127 {
3128 	int rc;
3129 	unsigned int xid;
3130 
3131 	xid = get_xid();
3132 /* BB add check for wbc flags */
3133 	get_page(page);
3134 	if (!PageUptodate(page))
3135 		cifs_dbg(FYI, "ppw - page not up to date\n");
3136 
3137 	/*
3138 	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
3139 	 *
3140 	 * A writepage() implementation always needs to do either this,
3141 	 * or re-dirty the page with "redirty_page_for_writepage()" in
3142 	 * the case of a failure.
3143 	 *
3144 	 * Just unlocking the page will cause the radix tree tag-bits
3145 	 * to fail to update with the state of the page correctly.
3146 	 */
3147 	set_page_writeback(page);
3148 retry_write:
3149 	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
3150 	if (is_retryable_error(rc)) {
3151 		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
3152 			goto retry_write;
3153 		redirty_page_for_writepage(wbc, page);
3154 	} else if (rc != 0) {
3155 		SetPageError(page);
3156 		mapping_set_error(page->mapping, rc);
3157 	} else {
3158 		SetPageUptodate(page);
3159 	}
3160 	end_page_writeback(page);
3161 	put_page(page);
3162 	free_xid(xid);
3163 	return rc;
3164 }
3165 
3166 static int cifs_write_end(struct file *file, struct address_space *mapping,
3167 			loff_t pos, unsigned len, unsigned copied,
3168 			struct page *page, void *fsdata)
3169 {
3170 	int rc;
3171 	struct inode *inode = mapping->host;
3172 	struct cifsFileInfo *cfile = file->private_data;
3173 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
3174 	struct folio *folio = page_folio(page);
3175 	__u32 pid;
3176 
3177 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3178 		pid = cfile->pid;
3179 	else
3180 		pid = current->tgid;
3181 
3182 	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
3183 		 page, pos, copied);
3184 
3185 	if (folio_test_checked(folio)) {
3186 		if (copied == len)
3187 			folio_mark_uptodate(folio);
3188 		folio_clear_checked(folio);
3189 	} else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE)
3190 		folio_mark_uptodate(folio);
3191 
3192 	if (!folio_test_uptodate(folio)) {
3193 		char *page_data;
3194 		unsigned offset = pos & (PAGE_SIZE - 1);
3195 		unsigned int xid;
3196 
3197 		xid = get_xid();
3198 		/* this is probably better than directly calling
3199 		   partialpage_write since in this function the file handle is
3200 		   known which we might as well	leverage */
3201 		/* BB check if anything else missing out of ppw
3202 		   such as updating last write time */
3203 		page_data = kmap(page);
3204 		rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
3205 		/* if (rc < 0) should we set writebehind rc? */
3206 		kunmap(page);
3207 
3208 		free_xid(xid);
3209 	} else {
3210 		rc = copied;
3211 		pos += copied;
3212 		set_page_dirty(page);
3213 	}
3214 
3215 	if (rc > 0) {
3216 		spin_lock(&inode->i_lock);
3217 		if (pos > inode->i_size) {
3218 			loff_t additional_blocks = (512 - 1 + copied) >> 9;
3219 
3220 			i_size_write(inode, pos);
3221 			/*
3222 			 * Estimate new allocation size based on the amount written.
3223 			 * This will be updated from server on close (and on queryinfo)
3224 			 */
3225 			inode->i_blocks = min_t(blkcnt_t, (512 - 1 + pos) >> 9,
3226 						inode->i_blocks + additional_blocks);
3227 		}
3228 		spin_unlock(&inode->i_lock);
3229 	}
3230 
3231 	unlock_page(page);
3232 	put_page(page);
3233 	/* Indication to update ctime and mtime as close is deferred */
3234 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3235 
3236 	return rc;
3237 }
3238 
3239 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3240 		      int datasync)
3241 {
3242 	unsigned int xid;
3243 	int rc = 0;
3244 	struct cifs_tcon *tcon;
3245 	struct TCP_Server_Info *server;
3246 	struct cifsFileInfo *smbfile = file->private_data;
3247 	struct inode *inode = file_inode(file);
3248 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3249 
3250 	rc = file_write_and_wait_range(file, start, end);
3251 	if (rc) {
3252 		trace_cifs_fsync_err(inode->i_ino, rc);
3253 		return rc;
3254 	}
3255 
3256 	xid = get_xid();
3257 
3258 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3259 		 file, datasync);
3260 
3261 	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3262 		rc = cifs_zap_mapping(inode);
3263 		if (rc) {
3264 			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3265 			rc = 0; /* don't care about it in fsync */
3266 		}
3267 	}
3268 
3269 	tcon = tlink_tcon(smbfile->tlink);
3270 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3271 		server = tcon->ses->server;
3272 		if (server->ops->flush == NULL) {
3273 			rc = -ENOSYS;
3274 			goto strict_fsync_exit;
3275 		}
3276 
3277 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3278 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3279 			if (smbfile) {
3280 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3281 				cifsFileInfo_put(smbfile);
3282 			} else
3283 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3284 		} else
3285 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3286 	}
3287 
3288 strict_fsync_exit:
3289 	free_xid(xid);
3290 	return rc;
3291 }
3292 
3293 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3294 {
3295 	unsigned int xid;
3296 	int rc = 0;
3297 	struct cifs_tcon *tcon;
3298 	struct TCP_Server_Info *server;
3299 	struct cifsFileInfo *smbfile = file->private_data;
3300 	struct inode *inode = file_inode(file);
3301 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3302 
3303 	rc = file_write_and_wait_range(file, start, end);
3304 	if (rc) {
3305 		trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3306 		return rc;
3307 	}
3308 
3309 	xid = get_xid();
3310 
3311 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3312 		 file, datasync);
3313 
3314 	tcon = tlink_tcon(smbfile->tlink);
3315 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3316 		server = tcon->ses->server;
3317 		if (server->ops->flush == NULL) {
3318 			rc = -ENOSYS;
3319 			goto fsync_exit;
3320 		}
3321 
3322 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3323 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3324 			if (smbfile) {
3325 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3326 				cifsFileInfo_put(smbfile);
3327 			} else
3328 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3329 		} else
3330 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3331 	}
3332 
3333 fsync_exit:
3334 	free_xid(xid);
3335 	return rc;
3336 }
3337 
3338 /*
3339  * As file closes, flush all cached write data for this inode checking
3340  * for write behind errors.
3341  */
3342 int cifs_flush(struct file *file, fl_owner_t id)
3343 {
3344 	struct inode *inode = file_inode(file);
3345 	int rc = 0;
3346 
3347 	if (file->f_mode & FMODE_WRITE)
3348 		rc = filemap_write_and_wait(inode->i_mapping);
3349 
3350 	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3351 	if (rc) {
3352 		/* get more nuanced writeback errors */
3353 		rc = filemap_check_wb_err(file->f_mapping, 0);
3354 		trace_cifs_flush_err(inode->i_ino, rc);
3355 	}
3356 	return rc;
3357 }
3358 
3359 static void
3360 cifs_uncached_writedata_release(struct kref *refcount)
3361 {
3362 	struct cifs_writedata *wdata = container_of(refcount,
3363 					struct cifs_writedata, refcount);
3364 
3365 	kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3366 	cifs_writedata_release(refcount);
3367 }
3368 
3369 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3370 
3371 static void
3372 cifs_uncached_writev_complete(struct work_struct *work)
3373 {
3374 	struct cifs_writedata *wdata = container_of(work,
3375 					struct cifs_writedata, work);
3376 	struct inode *inode = d_inode(wdata->cfile->dentry);
3377 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
3378 
3379 	spin_lock(&inode->i_lock);
3380 	cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3381 	if (cifsi->server_eof > inode->i_size)
3382 		i_size_write(inode, cifsi->server_eof);
3383 	spin_unlock(&inode->i_lock);
3384 
3385 	complete(&wdata->done);
3386 	collect_uncached_write_data(wdata->ctx);
3387 	/* the below call can possibly free the last ref to aio ctx */
3388 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3389 }
3390 
3391 static int
3392 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3393 	struct cifs_aio_ctx *ctx)
3394 {
3395 	unsigned int wsize;
3396 	struct cifs_credits credits;
3397 	int rc;
3398 	struct TCP_Server_Info *server = wdata->server;
3399 
3400 	do {
3401 		if (wdata->cfile->invalidHandle) {
3402 			rc = cifs_reopen_file(wdata->cfile, false);
3403 			if (rc == -EAGAIN)
3404 				continue;
3405 			else if (rc)
3406 				break;
3407 		}
3408 
3409 
3410 		/*
3411 		 * Wait for credits to resend this wdata.
3412 		 * Note: we are attempting to resend the whole wdata not in
3413 		 * segments
3414 		 */
3415 		do {
3416 			rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3417 						&wsize, &credits);
3418 			if (rc)
3419 				goto fail;
3420 
3421 			if (wsize < wdata->bytes) {
3422 				add_credits_and_wake_if(server, &credits, 0);
3423 				msleep(1000);
3424 			}
3425 		} while (wsize < wdata->bytes);
3426 		wdata->credits = credits;
3427 
3428 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3429 
3430 		if (!rc) {
3431 			if (wdata->cfile->invalidHandle)
3432 				rc = -EAGAIN;
3433 			else {
3434 				wdata->replay = true;
3435 #ifdef CONFIG_CIFS_SMB_DIRECT
3436 				if (wdata->mr) {
3437 					wdata->mr->need_invalidate = true;
3438 					smbd_deregister_mr(wdata->mr);
3439 					wdata->mr = NULL;
3440 				}
3441 #endif
3442 				rc = server->ops->async_writev(wdata,
3443 					cifs_uncached_writedata_release);
3444 			}
3445 		}
3446 
3447 		/* If the write was successfully sent, we are done */
3448 		if (!rc) {
3449 			list_add_tail(&wdata->list, wdata_list);
3450 			return 0;
3451 		}
3452 
3453 		/* Roll back credits and retry if needed */
3454 		add_credits_and_wake_if(server, &wdata->credits, 0);
3455 	} while (rc == -EAGAIN);
3456 
3457 fail:
3458 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3459 	return rc;
3460 }
3461 
3462 /*
3463  * Select span of a bvec iterator we're going to use.  Limit it by both maximum
3464  * size and maximum number of segments.
3465  */
3466 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size,
3467 				     size_t max_segs, unsigned int *_nsegs)
3468 {
3469 	const struct bio_vec *bvecs = iter->bvec;
3470 	unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
3471 	size_t len, span = 0, n = iter->count;
3472 	size_t skip = iter->iov_offset;
3473 
3474 	if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0)
3475 		return 0;
3476 
3477 	while (n && ix < nbv && skip) {
3478 		len = bvecs[ix].bv_len;
3479 		if (skip < len)
3480 			break;
3481 		skip -= len;
3482 		n -= len;
3483 		ix++;
3484 	}
3485 
3486 	while (n && ix < nbv) {
3487 		len = min3(n, bvecs[ix].bv_len - skip, max_size);
3488 		span += len;
3489 		max_size -= len;
3490 		nsegs++;
3491 		ix++;
3492 		if (max_size == 0 || nsegs >= max_segs)
3493 			break;
3494 		skip = 0;
3495 		n -= len;
3496 	}
3497 
3498 	*_nsegs = nsegs;
3499 	return span;
3500 }
3501 
3502 static int
3503 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from,
3504 		     struct cifsFileInfo *open_file,
3505 		     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3506 		     struct cifs_aio_ctx *ctx)
3507 {
3508 	int rc = 0;
3509 	size_t cur_len, max_len;
3510 	struct cifs_writedata *wdata;
3511 	pid_t pid;
3512 	struct TCP_Server_Info *server;
3513 	unsigned int xid, max_segs = INT_MAX;
3514 
3515 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3516 		pid = open_file->pid;
3517 	else
3518 		pid = current->tgid;
3519 
3520 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3521 	xid = get_xid();
3522 
3523 #ifdef CONFIG_CIFS_SMB_DIRECT
3524 	if (server->smbd_conn)
3525 		max_segs = server->smbd_conn->max_frmr_depth;
3526 #endif
3527 
3528 	do {
3529 		struct cifs_credits credits_on_stack;
3530 		struct cifs_credits *credits = &credits_on_stack;
3531 		unsigned int wsize, nsegs = 0;
3532 
3533 		if (signal_pending(current)) {
3534 			rc = -EINTR;
3535 			break;
3536 		}
3537 
3538 		if (open_file->invalidHandle) {
3539 			rc = cifs_reopen_file(open_file, false);
3540 			if (rc == -EAGAIN)
3541 				continue;
3542 			else if (rc)
3543 				break;
3544 		}
3545 
3546 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3547 						   &wsize, credits);
3548 		if (rc)
3549 			break;
3550 
3551 		max_len = min_t(const size_t, len, wsize);
3552 		if (!max_len) {
3553 			rc = -EAGAIN;
3554 			add_credits_and_wake_if(server, credits, 0);
3555 			break;
3556 		}
3557 
3558 		cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs);
3559 		cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3560 			 cur_len, max_len, nsegs, from->nr_segs, max_segs);
3561 		if (cur_len == 0) {
3562 			rc = -EIO;
3563 			add_credits_and_wake_if(server, credits, 0);
3564 			break;
3565 		}
3566 
3567 		wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
3568 		if (!wdata) {
3569 			rc = -ENOMEM;
3570 			add_credits_and_wake_if(server, credits, 0);
3571 			break;
3572 		}
3573 
3574 		wdata->sync_mode = WB_SYNC_ALL;
3575 		wdata->offset	= (__u64)fpos;
3576 		wdata->cfile	= cifsFileInfo_get(open_file);
3577 		wdata->server	= server;
3578 		wdata->pid	= pid;
3579 		wdata->bytes	= cur_len;
3580 		wdata->credits	= credits_on_stack;
3581 		wdata->iter	= *from;
3582 		wdata->ctx	= ctx;
3583 		kref_get(&ctx->refcount);
3584 
3585 		iov_iter_truncate(&wdata->iter, cur_len);
3586 
3587 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3588 
3589 		if (!rc) {
3590 			if (wdata->cfile->invalidHandle)
3591 				rc = -EAGAIN;
3592 			else
3593 				rc = server->ops->async_writev(wdata,
3594 					cifs_uncached_writedata_release);
3595 		}
3596 
3597 		if (rc) {
3598 			add_credits_and_wake_if(server, &wdata->credits, 0);
3599 			kref_put(&wdata->refcount,
3600 				 cifs_uncached_writedata_release);
3601 			if (rc == -EAGAIN)
3602 				continue;
3603 			break;
3604 		}
3605 
3606 		list_add_tail(&wdata->list, wdata_list);
3607 		iov_iter_advance(from, cur_len);
3608 		fpos += cur_len;
3609 		len -= cur_len;
3610 	} while (len > 0);
3611 
3612 	free_xid(xid);
3613 	return rc;
3614 }
3615 
3616 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3617 {
3618 	struct cifs_writedata *wdata, *tmp;
3619 	struct cifs_tcon *tcon;
3620 	struct cifs_sb_info *cifs_sb;
3621 	struct dentry *dentry = ctx->cfile->dentry;
3622 	ssize_t rc;
3623 
3624 	tcon = tlink_tcon(ctx->cfile->tlink);
3625 	cifs_sb = CIFS_SB(dentry->d_sb);
3626 
3627 	mutex_lock(&ctx->aio_mutex);
3628 
3629 	if (list_empty(&ctx->list)) {
3630 		mutex_unlock(&ctx->aio_mutex);
3631 		return;
3632 	}
3633 
3634 	rc = ctx->rc;
3635 	/*
3636 	 * Wait for and collect replies for any successful sends in order of
3637 	 * increasing offset. Once an error is hit, then return without waiting
3638 	 * for any more replies.
3639 	 */
3640 restart_loop:
3641 	list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3642 		if (!rc) {
3643 			if (!try_wait_for_completion(&wdata->done)) {
3644 				mutex_unlock(&ctx->aio_mutex);
3645 				return;
3646 			}
3647 
3648 			if (wdata->result)
3649 				rc = wdata->result;
3650 			else
3651 				ctx->total_len += wdata->bytes;
3652 
3653 			/* resend call if it's a retryable error */
3654 			if (rc == -EAGAIN) {
3655 				struct list_head tmp_list;
3656 				struct iov_iter tmp_from = ctx->iter;
3657 
3658 				INIT_LIST_HEAD(&tmp_list);
3659 				list_del_init(&wdata->list);
3660 
3661 				if (ctx->direct_io)
3662 					rc = cifs_resend_wdata(
3663 						wdata, &tmp_list, ctx);
3664 				else {
3665 					iov_iter_advance(&tmp_from,
3666 						 wdata->offset - ctx->pos);
3667 
3668 					rc = cifs_write_from_iter(wdata->offset,
3669 						wdata->bytes, &tmp_from,
3670 						ctx->cfile, cifs_sb, &tmp_list,
3671 						ctx);
3672 
3673 					kref_put(&wdata->refcount,
3674 						cifs_uncached_writedata_release);
3675 				}
3676 
3677 				list_splice(&tmp_list, &ctx->list);
3678 				goto restart_loop;
3679 			}
3680 		}
3681 		list_del_init(&wdata->list);
3682 		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3683 	}
3684 
3685 	cifs_stats_bytes_written(tcon, ctx->total_len);
3686 	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3687 
3688 	ctx->rc = (rc == 0) ? ctx->total_len : rc;
3689 
3690 	mutex_unlock(&ctx->aio_mutex);
3691 
3692 	if (ctx->iocb && ctx->iocb->ki_complete)
3693 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3694 	else
3695 		complete(&ctx->done);
3696 }
3697 
3698 static ssize_t __cifs_writev(
3699 	struct kiocb *iocb, struct iov_iter *from, bool direct)
3700 {
3701 	struct file *file = iocb->ki_filp;
3702 	ssize_t total_written = 0;
3703 	struct cifsFileInfo *cfile;
3704 	struct cifs_tcon *tcon;
3705 	struct cifs_sb_info *cifs_sb;
3706 	struct cifs_aio_ctx *ctx;
3707 	int rc;
3708 
3709 	rc = generic_write_checks(iocb, from);
3710 	if (rc <= 0)
3711 		return rc;
3712 
3713 	cifs_sb = CIFS_FILE_SB(file);
3714 	cfile = file->private_data;
3715 	tcon = tlink_tcon(cfile->tlink);
3716 
3717 	if (!tcon->ses->server->ops->async_writev)
3718 		return -ENOSYS;
3719 
3720 	ctx = cifs_aio_ctx_alloc();
3721 	if (!ctx)
3722 		return -ENOMEM;
3723 
3724 	ctx->cfile = cifsFileInfo_get(cfile);
3725 
3726 	if (!is_sync_kiocb(iocb))
3727 		ctx->iocb = iocb;
3728 
3729 	ctx->pos = iocb->ki_pos;
3730 	ctx->direct_io = direct;
3731 	ctx->nr_pinned_pages = 0;
3732 
3733 	if (user_backed_iter(from)) {
3734 		/*
3735 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
3736 		 * they contain references to the calling process's virtual
3737 		 * memory layout which won't be available in an async worker
3738 		 * thread.  This also takes a pin on every folio involved.
3739 		 */
3740 		rc = netfs_extract_user_iter(from, iov_iter_count(from),
3741 					     &ctx->iter, 0);
3742 		if (rc < 0) {
3743 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3744 			return rc;
3745 		}
3746 
3747 		ctx->nr_pinned_pages = rc;
3748 		ctx->bv = (void *)ctx->iter.bvec;
3749 		ctx->bv_need_unpin = iov_iter_extract_will_pin(from);
3750 	} else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
3751 		   !is_sync_kiocb(iocb)) {
3752 		/*
3753 		 * If the op is asynchronous, we need to copy the list attached
3754 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
3755 		 * will be pinned by the caller; in any case, we may or may not
3756 		 * be able to pin the pages, so we don't try.
3757 		 */
3758 		ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL);
3759 		if (!ctx->bv) {
3760 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3761 			return -ENOMEM;
3762 		}
3763 	} else {
3764 		/*
3765 		 * Otherwise, we just pass the iterator down as-is and rely on
3766 		 * the caller to make sure the pages referred to by the
3767 		 * iterator don't evaporate.
3768 		 */
3769 		ctx->iter = *from;
3770 	}
3771 
3772 	ctx->len = iov_iter_count(&ctx->iter);
3773 
3774 	/* grab a lock here due to read response handlers can access ctx */
3775 	mutex_lock(&ctx->aio_mutex);
3776 
3777 	rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter,
3778 				  cfile, cifs_sb, &ctx->list, ctx);
3779 
3780 	/*
3781 	 * If at least one write was successfully sent, then discard any rc
3782 	 * value from the later writes. If the other write succeeds, then
3783 	 * we'll end up returning whatever was written. If it fails, then
3784 	 * we'll get a new rc value from that.
3785 	 */
3786 	if (!list_empty(&ctx->list))
3787 		rc = 0;
3788 
3789 	mutex_unlock(&ctx->aio_mutex);
3790 
3791 	if (rc) {
3792 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3793 		return rc;
3794 	}
3795 
3796 	if (!is_sync_kiocb(iocb)) {
3797 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3798 		return -EIOCBQUEUED;
3799 	}
3800 
3801 	rc = wait_for_completion_killable(&ctx->done);
3802 	if (rc) {
3803 		mutex_lock(&ctx->aio_mutex);
3804 		ctx->rc = rc = -EINTR;
3805 		total_written = ctx->total_len;
3806 		mutex_unlock(&ctx->aio_mutex);
3807 	} else {
3808 		rc = ctx->rc;
3809 		total_written = ctx->total_len;
3810 	}
3811 
3812 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
3813 
3814 	if (unlikely(!total_written))
3815 		return rc;
3816 
3817 	iocb->ki_pos += total_written;
3818 	return total_written;
3819 }
3820 
3821 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3822 {
3823 	struct file *file = iocb->ki_filp;
3824 
3825 	cifs_revalidate_mapping(file->f_inode);
3826 	return __cifs_writev(iocb, from, true);
3827 }
3828 
3829 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3830 {
3831 	return __cifs_writev(iocb, from, false);
3832 }
3833 
3834 static ssize_t
3835 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3836 {
3837 	struct file *file = iocb->ki_filp;
3838 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3839 	struct inode *inode = file->f_mapping->host;
3840 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3841 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3842 	ssize_t rc;
3843 
3844 	inode_lock(inode);
3845 	/*
3846 	 * We need to hold the sem to be sure nobody modifies lock list
3847 	 * with a brlock that prevents writing.
3848 	 */
3849 	down_read(&cinode->lock_sem);
3850 
3851 	rc = generic_write_checks(iocb, from);
3852 	if (rc <= 0)
3853 		goto out;
3854 
3855 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3856 				     server->vals->exclusive_lock_type, 0,
3857 				     NULL, CIFS_WRITE_OP))
3858 		rc = __generic_file_write_iter(iocb, from);
3859 	else
3860 		rc = -EACCES;
3861 out:
3862 	up_read(&cinode->lock_sem);
3863 	inode_unlock(inode);
3864 
3865 	if (rc > 0)
3866 		rc = generic_write_sync(iocb, rc);
3867 	return rc;
3868 }
3869 
3870 ssize_t
3871 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3872 {
3873 	struct inode *inode = file_inode(iocb->ki_filp);
3874 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3875 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3876 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3877 						iocb->ki_filp->private_data;
3878 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3879 	ssize_t written;
3880 
3881 	written = cifs_get_writer(cinode);
3882 	if (written)
3883 		return written;
3884 
3885 	if (CIFS_CACHE_WRITE(cinode)) {
3886 		if (cap_unix(tcon->ses) &&
3887 		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3888 		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3889 			written = generic_file_write_iter(iocb, from);
3890 			goto out;
3891 		}
3892 		written = cifs_writev(iocb, from);
3893 		goto out;
3894 	}
3895 	/*
3896 	 * For non-oplocked files in strict cache mode we need to write the data
3897 	 * to the server exactly from the pos to pos+len-1 rather than flush all
3898 	 * affected pages because it may cause a error with mandatory locks on
3899 	 * these pages but not on the region from pos to ppos+len-1.
3900 	 */
3901 	written = cifs_user_writev(iocb, from);
3902 	if (CIFS_CACHE_READ(cinode)) {
3903 		/*
3904 		 * We have read level caching and we have just sent a write
3905 		 * request to the server thus making data in the cache stale.
3906 		 * Zap the cache and set oplock/lease level to NONE to avoid
3907 		 * reading stale data from the cache. All subsequent read
3908 		 * operations will read new data from the server.
3909 		 */
3910 		cifs_zap_mapping(inode);
3911 		cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3912 			 inode);
3913 		cinode->oplock = 0;
3914 	}
3915 out:
3916 	cifs_put_writer(cinode);
3917 	return written;
3918 }
3919 
3920 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
3921 {
3922 	struct cifs_readdata *rdata;
3923 
3924 	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3925 	if (rdata) {
3926 		kref_init(&rdata->refcount);
3927 		INIT_LIST_HEAD(&rdata->list);
3928 		init_completion(&rdata->done);
3929 		INIT_WORK(&rdata->work, complete);
3930 	}
3931 
3932 	return rdata;
3933 }
3934 
3935 void
3936 cifs_readdata_release(struct kref *refcount)
3937 {
3938 	struct cifs_readdata *rdata = container_of(refcount,
3939 					struct cifs_readdata, refcount);
3940 
3941 	if (rdata->ctx)
3942 		kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3943 #ifdef CONFIG_CIFS_SMB_DIRECT
3944 	if (rdata->mr) {
3945 		smbd_deregister_mr(rdata->mr);
3946 		rdata->mr = NULL;
3947 	}
3948 #endif
3949 	if (rdata->cfile)
3950 		cifsFileInfo_put(rdata->cfile);
3951 
3952 	kfree(rdata);
3953 }
3954 
3955 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3956 
3957 static void
3958 cifs_uncached_readv_complete(struct work_struct *work)
3959 {
3960 	struct cifs_readdata *rdata = container_of(work,
3961 						struct cifs_readdata, work);
3962 
3963 	complete(&rdata->done);
3964 	collect_uncached_read_data(rdata->ctx);
3965 	/* the below call can possibly free the last ref to aio ctx */
3966 	kref_put(&rdata->refcount, cifs_readdata_release);
3967 }
3968 
3969 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3970 			struct list_head *rdata_list,
3971 			struct cifs_aio_ctx *ctx)
3972 {
3973 	unsigned int rsize;
3974 	struct cifs_credits credits;
3975 	int rc;
3976 	struct TCP_Server_Info *server;
3977 
3978 	/* XXX: should we pick a new channel here? */
3979 	server = rdata->server;
3980 
3981 	do {
3982 		if (rdata->cfile->invalidHandle) {
3983 			rc = cifs_reopen_file(rdata->cfile, true);
3984 			if (rc == -EAGAIN)
3985 				continue;
3986 			else if (rc)
3987 				break;
3988 		}
3989 
3990 		/*
3991 		 * Wait for credits to resend this rdata.
3992 		 * Note: we are attempting to resend the whole rdata not in
3993 		 * segments
3994 		 */
3995 		do {
3996 			rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3997 						&rsize, &credits);
3998 
3999 			if (rc)
4000 				goto fail;
4001 
4002 			if (rsize < rdata->bytes) {
4003 				add_credits_and_wake_if(server, &credits, 0);
4004 				msleep(1000);
4005 			}
4006 		} while (rsize < rdata->bytes);
4007 		rdata->credits = credits;
4008 
4009 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4010 		if (!rc) {
4011 			if (rdata->cfile->invalidHandle)
4012 				rc = -EAGAIN;
4013 			else {
4014 #ifdef CONFIG_CIFS_SMB_DIRECT
4015 				if (rdata->mr) {
4016 					rdata->mr->need_invalidate = true;
4017 					smbd_deregister_mr(rdata->mr);
4018 					rdata->mr = NULL;
4019 				}
4020 #endif
4021 				rc = server->ops->async_readv(rdata);
4022 			}
4023 		}
4024 
4025 		/* If the read was successfully sent, we are done */
4026 		if (!rc) {
4027 			/* Add to aio pending list */
4028 			list_add_tail(&rdata->list, rdata_list);
4029 			return 0;
4030 		}
4031 
4032 		/* Roll back credits and retry if needed */
4033 		add_credits_and_wake_if(server, &rdata->credits, 0);
4034 	} while (rc == -EAGAIN);
4035 
4036 fail:
4037 	kref_put(&rdata->refcount, cifs_readdata_release);
4038 	return rc;
4039 }
4040 
4041 static int
4042 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file,
4043 		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
4044 		     struct cifs_aio_ctx *ctx)
4045 {
4046 	struct cifs_readdata *rdata;
4047 	unsigned int rsize, nsegs, max_segs = INT_MAX;
4048 	struct cifs_credits credits_on_stack;
4049 	struct cifs_credits *credits = &credits_on_stack;
4050 	size_t cur_len, max_len;
4051 	int rc;
4052 	pid_t pid;
4053 	struct TCP_Server_Info *server;
4054 
4055 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4056 
4057 #ifdef CONFIG_CIFS_SMB_DIRECT
4058 	if (server->smbd_conn)
4059 		max_segs = server->smbd_conn->max_frmr_depth;
4060 #endif
4061 
4062 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4063 		pid = open_file->pid;
4064 	else
4065 		pid = current->tgid;
4066 
4067 	do {
4068 		if (open_file->invalidHandle) {
4069 			rc = cifs_reopen_file(open_file, true);
4070 			if (rc == -EAGAIN)
4071 				continue;
4072 			else if (rc)
4073 				break;
4074 		}
4075 
4076 		if (cifs_sb->ctx->rsize == 0)
4077 			cifs_sb->ctx->rsize =
4078 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4079 							     cifs_sb->ctx);
4080 
4081 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4082 						   &rsize, credits);
4083 		if (rc)
4084 			break;
4085 
4086 		max_len = min_t(size_t, len, rsize);
4087 
4088 		cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len,
4089 						 max_segs, &nsegs);
4090 		cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n",
4091 			 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs);
4092 		if (cur_len == 0) {
4093 			rc = -EIO;
4094 			add_credits_and_wake_if(server, credits, 0);
4095 			break;
4096 		}
4097 
4098 		rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
4099 		if (!rdata) {
4100 			add_credits_and_wake_if(server, credits, 0);
4101 			rc = -ENOMEM;
4102 			break;
4103 		}
4104 
4105 		rdata->server	= server;
4106 		rdata->cfile	= cifsFileInfo_get(open_file);
4107 		rdata->offset	= fpos;
4108 		rdata->bytes	= cur_len;
4109 		rdata->pid	= pid;
4110 		rdata->credits	= credits_on_stack;
4111 		rdata->ctx	= ctx;
4112 		kref_get(&ctx->refcount);
4113 
4114 		rdata->iter	= ctx->iter;
4115 		iov_iter_truncate(&rdata->iter, cur_len);
4116 
4117 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4118 
4119 		if (!rc) {
4120 			if (rdata->cfile->invalidHandle)
4121 				rc = -EAGAIN;
4122 			else
4123 				rc = server->ops->async_readv(rdata);
4124 		}
4125 
4126 		if (rc) {
4127 			add_credits_and_wake_if(server, &rdata->credits, 0);
4128 			kref_put(&rdata->refcount, cifs_readdata_release);
4129 			if (rc == -EAGAIN)
4130 				continue;
4131 			break;
4132 		}
4133 
4134 		list_add_tail(&rdata->list, rdata_list);
4135 		iov_iter_advance(&ctx->iter, cur_len);
4136 		fpos += cur_len;
4137 		len -= cur_len;
4138 	} while (len > 0);
4139 
4140 	return rc;
4141 }
4142 
4143 static void
4144 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4145 {
4146 	struct cifs_readdata *rdata, *tmp;
4147 	struct cifs_sb_info *cifs_sb;
4148 	int rc;
4149 
4150 	cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4151 
4152 	mutex_lock(&ctx->aio_mutex);
4153 
4154 	if (list_empty(&ctx->list)) {
4155 		mutex_unlock(&ctx->aio_mutex);
4156 		return;
4157 	}
4158 
4159 	rc = ctx->rc;
4160 	/* the loop below should proceed in the order of increasing offsets */
4161 again:
4162 	list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4163 		if (!rc) {
4164 			if (!try_wait_for_completion(&rdata->done)) {
4165 				mutex_unlock(&ctx->aio_mutex);
4166 				return;
4167 			}
4168 
4169 			if (rdata->result == -EAGAIN) {
4170 				/* resend call if it's a retryable error */
4171 				struct list_head tmp_list;
4172 				unsigned int got_bytes = rdata->got_bytes;
4173 
4174 				list_del_init(&rdata->list);
4175 				INIT_LIST_HEAD(&tmp_list);
4176 
4177 				if (ctx->direct_io) {
4178 					/*
4179 					 * Re-use rdata as this is a
4180 					 * direct I/O
4181 					 */
4182 					rc = cifs_resend_rdata(
4183 						rdata,
4184 						&tmp_list, ctx);
4185 				} else {
4186 					rc = cifs_send_async_read(
4187 						rdata->offset + got_bytes,
4188 						rdata->bytes - got_bytes,
4189 						rdata->cfile, cifs_sb,
4190 						&tmp_list, ctx);
4191 
4192 					kref_put(&rdata->refcount,
4193 						cifs_readdata_release);
4194 				}
4195 
4196 				list_splice(&tmp_list, &ctx->list);
4197 
4198 				goto again;
4199 			} else if (rdata->result)
4200 				rc = rdata->result;
4201 
4202 			/* if there was a short read -- discard anything left */
4203 			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4204 				rc = -ENODATA;
4205 
4206 			ctx->total_len += rdata->got_bytes;
4207 		}
4208 		list_del_init(&rdata->list);
4209 		kref_put(&rdata->refcount, cifs_readdata_release);
4210 	}
4211 
4212 	/* mask nodata case */
4213 	if (rc == -ENODATA)
4214 		rc = 0;
4215 
4216 	ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4217 
4218 	mutex_unlock(&ctx->aio_mutex);
4219 
4220 	if (ctx->iocb && ctx->iocb->ki_complete)
4221 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4222 	else
4223 		complete(&ctx->done);
4224 }
4225 
4226 static ssize_t __cifs_readv(
4227 	struct kiocb *iocb, struct iov_iter *to, bool direct)
4228 {
4229 	size_t len;
4230 	struct file *file = iocb->ki_filp;
4231 	struct cifs_sb_info *cifs_sb;
4232 	struct cifsFileInfo *cfile;
4233 	struct cifs_tcon *tcon;
4234 	ssize_t rc, total_read = 0;
4235 	loff_t offset = iocb->ki_pos;
4236 	struct cifs_aio_ctx *ctx;
4237 
4238 	len = iov_iter_count(to);
4239 	if (!len)
4240 		return 0;
4241 
4242 	cifs_sb = CIFS_FILE_SB(file);
4243 	cfile = file->private_data;
4244 	tcon = tlink_tcon(cfile->tlink);
4245 
4246 	if (!tcon->ses->server->ops->async_readv)
4247 		return -ENOSYS;
4248 
4249 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4250 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4251 
4252 	ctx = cifs_aio_ctx_alloc();
4253 	if (!ctx)
4254 		return -ENOMEM;
4255 
4256 	ctx->pos	= offset;
4257 	ctx->direct_io	= direct;
4258 	ctx->len	= len;
4259 	ctx->cfile	= cifsFileInfo_get(cfile);
4260 	ctx->nr_pinned_pages = 0;
4261 
4262 	if (!is_sync_kiocb(iocb))
4263 		ctx->iocb = iocb;
4264 
4265 	if (user_backed_iter(to)) {
4266 		/*
4267 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
4268 		 * they contain references to the calling process's virtual
4269 		 * memory layout which won't be available in an async worker
4270 		 * thread.  This also takes a pin on every folio involved.
4271 		 */
4272 		rc = netfs_extract_user_iter(to, iov_iter_count(to),
4273 					     &ctx->iter, 0);
4274 		if (rc < 0) {
4275 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4276 			return rc;
4277 		}
4278 
4279 		ctx->nr_pinned_pages = rc;
4280 		ctx->bv = (void *)ctx->iter.bvec;
4281 		ctx->bv_need_unpin = iov_iter_extract_will_pin(to);
4282 		ctx->should_dirty = true;
4283 	} else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
4284 		   !is_sync_kiocb(iocb)) {
4285 		/*
4286 		 * If the op is asynchronous, we need to copy the list attached
4287 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
4288 		 * will be retained by the caller; in any case, we may or may
4289 		 * not be able to pin the pages, so we don't try.
4290 		 */
4291 		ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL);
4292 		if (!ctx->bv) {
4293 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4294 			return -ENOMEM;
4295 		}
4296 	} else {
4297 		/*
4298 		 * Otherwise, we just pass the iterator down as-is and rely on
4299 		 * the caller to make sure the pages referred to by the
4300 		 * iterator don't evaporate.
4301 		 */
4302 		ctx->iter = *to;
4303 	}
4304 
4305 	if (direct) {
4306 		rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4307 						  offset, offset + len - 1);
4308 		if (rc) {
4309 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4310 			return -EAGAIN;
4311 		}
4312 	}
4313 
4314 	/* grab a lock here due to read response handlers can access ctx */
4315 	mutex_lock(&ctx->aio_mutex);
4316 
4317 	rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4318 
4319 	/* if at least one read request send succeeded, then reset rc */
4320 	if (!list_empty(&ctx->list))
4321 		rc = 0;
4322 
4323 	mutex_unlock(&ctx->aio_mutex);
4324 
4325 	if (rc) {
4326 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4327 		return rc;
4328 	}
4329 
4330 	if (!is_sync_kiocb(iocb)) {
4331 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4332 		return -EIOCBQUEUED;
4333 	}
4334 
4335 	rc = wait_for_completion_killable(&ctx->done);
4336 	if (rc) {
4337 		mutex_lock(&ctx->aio_mutex);
4338 		ctx->rc = rc = -EINTR;
4339 		total_read = ctx->total_len;
4340 		mutex_unlock(&ctx->aio_mutex);
4341 	} else {
4342 		rc = ctx->rc;
4343 		total_read = ctx->total_len;
4344 	}
4345 
4346 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
4347 
4348 	if (total_read) {
4349 		iocb->ki_pos += total_read;
4350 		return total_read;
4351 	}
4352 	return rc;
4353 }
4354 
4355 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4356 {
4357 	return __cifs_readv(iocb, to, true);
4358 }
4359 
4360 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4361 {
4362 	return __cifs_readv(iocb, to, false);
4363 }
4364 
4365 ssize_t
4366 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4367 {
4368 	struct inode *inode = file_inode(iocb->ki_filp);
4369 	struct cifsInodeInfo *cinode = CIFS_I(inode);
4370 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4371 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4372 						iocb->ki_filp->private_data;
4373 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4374 	int rc = -EACCES;
4375 
4376 	/*
4377 	 * In strict cache mode we need to read from the server all the time
4378 	 * if we don't have level II oplock because the server can delay mtime
4379 	 * change - so we can't make a decision about inode invalidating.
4380 	 * And we can also fail with pagereading if there are mandatory locks
4381 	 * on pages affected by this read but not on the region from pos to
4382 	 * pos+len-1.
4383 	 */
4384 	if (!CIFS_CACHE_READ(cinode))
4385 		return cifs_user_readv(iocb, to);
4386 
4387 	if (cap_unix(tcon->ses) &&
4388 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4389 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4390 		return generic_file_read_iter(iocb, to);
4391 
4392 	/*
4393 	 * We need to hold the sem to be sure nobody modifies lock list
4394 	 * with a brlock that prevents reading.
4395 	 */
4396 	down_read(&cinode->lock_sem);
4397 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4398 				     tcon->ses->server->vals->shared_lock_type,
4399 				     0, NULL, CIFS_READ_OP))
4400 		rc = generic_file_read_iter(iocb, to);
4401 	up_read(&cinode->lock_sem);
4402 	return rc;
4403 }
4404 
4405 static ssize_t
4406 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4407 {
4408 	int rc = -EACCES;
4409 	unsigned int bytes_read = 0;
4410 	unsigned int total_read;
4411 	unsigned int current_read_size;
4412 	unsigned int rsize;
4413 	struct cifs_sb_info *cifs_sb;
4414 	struct cifs_tcon *tcon;
4415 	struct TCP_Server_Info *server;
4416 	unsigned int xid;
4417 	char *cur_offset;
4418 	struct cifsFileInfo *open_file;
4419 	struct cifs_io_parms io_parms = {0};
4420 	int buf_type = CIFS_NO_BUFFER;
4421 	__u32 pid;
4422 
4423 	xid = get_xid();
4424 	cifs_sb = CIFS_FILE_SB(file);
4425 
4426 	/* FIXME: set up handlers for larger reads and/or convert to async */
4427 	rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4428 
4429 	if (file->private_data == NULL) {
4430 		rc = -EBADF;
4431 		free_xid(xid);
4432 		return rc;
4433 	}
4434 	open_file = file->private_data;
4435 	tcon = tlink_tcon(open_file->tlink);
4436 	server = cifs_pick_channel(tcon->ses);
4437 
4438 	if (!server->ops->sync_read) {
4439 		free_xid(xid);
4440 		return -ENOSYS;
4441 	}
4442 
4443 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4444 		pid = open_file->pid;
4445 	else
4446 		pid = current->tgid;
4447 
4448 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4449 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4450 
4451 	for (total_read = 0, cur_offset = read_data; read_size > total_read;
4452 	     total_read += bytes_read, cur_offset += bytes_read) {
4453 		do {
4454 			current_read_size = min_t(uint, read_size - total_read,
4455 						  rsize);
4456 			/*
4457 			 * For windows me and 9x we do not want to request more
4458 			 * than it negotiated since it will refuse the read
4459 			 * then.
4460 			 */
4461 			if (!(tcon->ses->capabilities &
4462 				tcon->ses->server->vals->cap_large_files)) {
4463 				current_read_size = min_t(uint,
4464 					current_read_size, CIFSMaxBufSize);
4465 			}
4466 			if (open_file->invalidHandle) {
4467 				rc = cifs_reopen_file(open_file, true);
4468 				if (rc != 0)
4469 					break;
4470 			}
4471 			io_parms.pid = pid;
4472 			io_parms.tcon = tcon;
4473 			io_parms.offset = *offset;
4474 			io_parms.length = current_read_size;
4475 			io_parms.server = server;
4476 			rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4477 						    &bytes_read, &cur_offset,
4478 						    &buf_type);
4479 		} while (rc == -EAGAIN);
4480 
4481 		if (rc || (bytes_read == 0)) {
4482 			if (total_read) {
4483 				break;
4484 			} else {
4485 				free_xid(xid);
4486 				return rc;
4487 			}
4488 		} else {
4489 			cifs_stats_bytes_read(tcon, total_read);
4490 			*offset += bytes_read;
4491 		}
4492 	}
4493 	free_xid(xid);
4494 	return total_read;
4495 }
4496 
4497 /*
4498  * If the page is mmap'ed into a process' page tables, then we need to make
4499  * sure that it doesn't change while being written back.
4500  */
4501 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4502 {
4503 	struct folio *folio = page_folio(vmf->page);
4504 
4505 	/* Wait for the folio to be written to the cache before we allow it to
4506 	 * be modified.  We then assume the entire folio will need writing back.
4507 	 */
4508 #ifdef CONFIG_CIFS_FSCACHE
4509 	if (folio_test_fscache(folio) &&
4510 	    folio_wait_fscache_killable(folio) < 0)
4511 		return VM_FAULT_RETRY;
4512 #endif
4513 
4514 	folio_wait_writeback(folio);
4515 
4516 	if (folio_lock_killable(folio) < 0)
4517 		return VM_FAULT_RETRY;
4518 	return VM_FAULT_LOCKED;
4519 }
4520 
4521 static const struct vm_operations_struct cifs_file_vm_ops = {
4522 	.fault = filemap_fault,
4523 	.map_pages = filemap_map_pages,
4524 	.page_mkwrite = cifs_page_mkwrite,
4525 };
4526 
4527 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4528 {
4529 	int xid, rc = 0;
4530 	struct inode *inode = file_inode(file);
4531 
4532 	xid = get_xid();
4533 
4534 	if (!CIFS_CACHE_READ(CIFS_I(inode)))
4535 		rc = cifs_zap_mapping(inode);
4536 	if (!rc)
4537 		rc = generic_file_mmap(file, vma);
4538 	if (!rc)
4539 		vma->vm_ops = &cifs_file_vm_ops;
4540 
4541 	free_xid(xid);
4542 	return rc;
4543 }
4544 
4545 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4546 {
4547 	int rc, xid;
4548 
4549 	xid = get_xid();
4550 
4551 	rc = cifs_revalidate_file(file);
4552 	if (rc)
4553 		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4554 			 rc);
4555 	if (!rc)
4556 		rc = generic_file_mmap(file, vma);
4557 	if (!rc)
4558 		vma->vm_ops = &cifs_file_vm_ops;
4559 
4560 	free_xid(xid);
4561 	return rc;
4562 }
4563 
4564 /*
4565  * Unlock a bunch of folios in the pagecache.
4566  */
4567 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
4568 {
4569 	struct folio *folio;
4570 	XA_STATE(xas, &mapping->i_pages, first);
4571 
4572 	rcu_read_lock();
4573 	xas_for_each(&xas, folio, last) {
4574 		folio_unlock(folio);
4575 	}
4576 	rcu_read_unlock();
4577 }
4578 
4579 static void cifs_readahead_complete(struct work_struct *work)
4580 {
4581 	struct cifs_readdata *rdata = container_of(work,
4582 						   struct cifs_readdata, work);
4583 	struct folio *folio;
4584 	pgoff_t last;
4585 	bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
4586 
4587 	XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
4588 
4589 	if (good)
4590 		cifs_readahead_to_fscache(rdata->mapping->host,
4591 					  rdata->offset, rdata->bytes);
4592 
4593 	if (iov_iter_count(&rdata->iter) > 0)
4594 		iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
4595 
4596 	last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE;
4597 
4598 	rcu_read_lock();
4599 	xas_for_each(&xas, folio, last) {
4600 		if (good) {
4601 			flush_dcache_folio(folio);
4602 			folio_mark_uptodate(folio);
4603 		}
4604 		folio_unlock(folio);
4605 	}
4606 	rcu_read_unlock();
4607 
4608 	kref_put(&rdata->refcount, cifs_readdata_release);
4609 }
4610 
4611 static void cifs_readahead(struct readahead_control *ractl)
4612 {
4613 	struct cifsFileInfo *open_file = ractl->file->private_data;
4614 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4615 	struct TCP_Server_Info *server;
4616 	unsigned int xid, nr_pages, cache_nr_pages = 0;
4617 	unsigned int ra_pages;
4618 	pgoff_t next_cached = ULONG_MAX, ra_index;
4619 	bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4620 		cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4621 	bool check_cache = caching;
4622 	pid_t pid;
4623 	int rc = 0;
4624 
4625 	/* Note that readahead_count() lags behind our dequeuing of pages from
4626 	 * the ractl, wo we have to keep track for ourselves.
4627 	 */
4628 	ra_pages = readahead_count(ractl);
4629 	ra_index = readahead_index(ractl);
4630 
4631 	xid = get_xid();
4632 
4633 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4634 		pid = open_file->pid;
4635 	else
4636 		pid = current->tgid;
4637 
4638 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4639 
4640 	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4641 		 __func__, ractl->file, ractl->mapping, ra_pages);
4642 
4643 	/*
4644 	 * Chop the readahead request up into rsize-sized read requests.
4645 	 */
4646 	while ((nr_pages = ra_pages)) {
4647 		unsigned int i, rsize;
4648 		struct cifs_readdata *rdata;
4649 		struct cifs_credits credits_on_stack;
4650 		struct cifs_credits *credits = &credits_on_stack;
4651 		struct folio *folio;
4652 		pgoff_t fsize;
4653 
4654 		/*
4655 		 * Find out if we have anything cached in the range of
4656 		 * interest, and if so, where the next chunk of cached data is.
4657 		 */
4658 		if (caching) {
4659 			if (check_cache) {
4660 				rc = cifs_fscache_query_occupancy(
4661 					ractl->mapping->host, ra_index, nr_pages,
4662 					&next_cached, &cache_nr_pages);
4663 				if (rc < 0)
4664 					caching = false;
4665 				check_cache = false;
4666 			}
4667 
4668 			if (ra_index == next_cached) {
4669 				/*
4670 				 * TODO: Send a whole batch of pages to be read
4671 				 * by the cache.
4672 				 */
4673 				folio = readahead_folio(ractl);
4674 				fsize = folio_nr_pages(folio);
4675 				ra_pages -= fsize;
4676 				ra_index += fsize;
4677 				if (cifs_readpage_from_fscache(ractl->mapping->host,
4678 							       &folio->page) < 0) {
4679 					/*
4680 					 * TODO: Deal with cache read failure
4681 					 * here, but for the moment, delegate
4682 					 * that to readpage.
4683 					 */
4684 					caching = false;
4685 				}
4686 				folio_unlock(folio);
4687 				next_cached += fsize;
4688 				cache_nr_pages -= fsize;
4689 				if (cache_nr_pages == 0)
4690 					check_cache = true;
4691 				continue;
4692 			}
4693 		}
4694 
4695 		if (open_file->invalidHandle) {
4696 			rc = cifs_reopen_file(open_file, true);
4697 			if (rc) {
4698 				if (rc == -EAGAIN)
4699 					continue;
4700 				break;
4701 			}
4702 		}
4703 
4704 		if (cifs_sb->ctx->rsize == 0)
4705 			cifs_sb->ctx->rsize =
4706 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4707 							     cifs_sb->ctx);
4708 
4709 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4710 						   &rsize, credits);
4711 		if (rc)
4712 			break;
4713 		nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages);
4714 		if (next_cached != ULONG_MAX)
4715 			nr_pages = min_t(size_t, nr_pages, next_cached - ra_index);
4716 
4717 		/*
4718 		 * Give up immediately if rsize is too small to read an entire
4719 		 * page. The VFS will fall back to readpage. We should never
4720 		 * reach this point however since we set ra_pages to 0 when the
4721 		 * rsize is smaller than a cache page.
4722 		 */
4723 		if (unlikely(!nr_pages)) {
4724 			add_credits_and_wake_if(server, credits, 0);
4725 			break;
4726 		}
4727 
4728 		rdata = cifs_readdata_alloc(cifs_readahead_complete);
4729 		if (!rdata) {
4730 			/* best to give up if we're out of mem */
4731 			add_credits_and_wake_if(server, credits, 0);
4732 			break;
4733 		}
4734 
4735 		rdata->offset	= ra_index * PAGE_SIZE;
4736 		rdata->bytes	= nr_pages * PAGE_SIZE;
4737 		rdata->cfile	= cifsFileInfo_get(open_file);
4738 		rdata->server	= server;
4739 		rdata->mapping	= ractl->mapping;
4740 		rdata->pid	= pid;
4741 		rdata->credits	= credits_on_stack;
4742 
4743 		for (i = 0; i < nr_pages; i++) {
4744 			if (!readahead_folio(ractl))
4745 				WARN_ON(1);
4746 		}
4747 		ra_pages -= nr_pages;
4748 		ra_index += nr_pages;
4749 
4750 		iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages,
4751 				rdata->offset, rdata->bytes);
4752 
4753 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4754 		if (!rc) {
4755 			if (rdata->cfile->invalidHandle)
4756 				rc = -EAGAIN;
4757 			else
4758 				rc = server->ops->async_readv(rdata);
4759 		}
4760 
4761 		if (rc) {
4762 			add_credits_and_wake_if(server, &rdata->credits, 0);
4763 			cifs_unlock_folios(rdata->mapping,
4764 					   rdata->offset / PAGE_SIZE,
4765 					   (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
4766 			/* Fallback to the readpage in error/reconnect cases */
4767 			kref_put(&rdata->refcount, cifs_readdata_release);
4768 			break;
4769 		}
4770 
4771 		kref_put(&rdata->refcount, cifs_readdata_release);
4772 	}
4773 
4774 	free_xid(xid);
4775 }
4776 
4777 /*
4778  * cifs_readpage_worker must be called with the page pinned
4779  */
4780 static int cifs_readpage_worker(struct file *file, struct page *page,
4781 	loff_t *poffset)
4782 {
4783 	struct inode *inode = file_inode(file);
4784 	struct timespec64 atime, mtime;
4785 	char *read_data;
4786 	int rc;
4787 
4788 	/* Is the page cached? */
4789 	rc = cifs_readpage_from_fscache(inode, page);
4790 	if (rc == 0)
4791 		goto read_complete;
4792 
4793 	read_data = kmap(page);
4794 	/* for reads over a certain size could initiate async read ahead */
4795 
4796 	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4797 
4798 	if (rc < 0)
4799 		goto io_error;
4800 	else
4801 		cifs_dbg(FYI, "Bytes read %d\n", rc);
4802 
4803 	/* we do not want atime to be less than mtime, it broke some apps */
4804 	atime = inode_set_atime_to_ts(inode, current_time(inode));
4805 	mtime = inode_get_mtime(inode);
4806 	if (timespec64_compare(&atime, &mtime) < 0)
4807 		inode_set_atime_to_ts(inode, inode_get_mtime(inode));
4808 
4809 	if (PAGE_SIZE > rc)
4810 		memset(read_data + rc, 0, PAGE_SIZE - rc);
4811 
4812 	flush_dcache_page(page);
4813 	SetPageUptodate(page);
4814 	rc = 0;
4815 
4816 io_error:
4817 	kunmap(page);
4818 
4819 read_complete:
4820 	unlock_page(page);
4821 	return rc;
4822 }
4823 
4824 static int cifs_read_folio(struct file *file, struct folio *folio)
4825 {
4826 	struct page *page = &folio->page;
4827 	loff_t offset = page_file_offset(page);
4828 	int rc = -EACCES;
4829 	unsigned int xid;
4830 
4831 	xid = get_xid();
4832 
4833 	if (file->private_data == NULL) {
4834 		rc = -EBADF;
4835 		free_xid(xid);
4836 		return rc;
4837 	}
4838 
4839 	cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4840 		 page, (int)offset, (int)offset);
4841 
4842 	rc = cifs_readpage_worker(file, page, &offset);
4843 
4844 	free_xid(xid);
4845 	return rc;
4846 }
4847 
4848 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4849 {
4850 	struct cifsFileInfo *open_file;
4851 
4852 	spin_lock(&cifs_inode->open_file_lock);
4853 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4854 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4855 			spin_unlock(&cifs_inode->open_file_lock);
4856 			return 1;
4857 		}
4858 	}
4859 	spin_unlock(&cifs_inode->open_file_lock);
4860 	return 0;
4861 }
4862 
4863 /* We do not want to update the file size from server for inodes
4864    open for write - to avoid races with writepage extending
4865    the file - in the future we could consider allowing
4866    refreshing the inode only on increases in the file size
4867    but this is tricky to do without racing with writebehind
4868    page caching in the current Linux kernel design */
4869 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file,
4870 			    bool from_readdir)
4871 {
4872 	if (!cifsInode)
4873 		return true;
4874 
4875 	if (is_inode_writable(cifsInode) ||
4876 		((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) {
4877 		/* This inode is open for write at least once */
4878 		struct cifs_sb_info *cifs_sb;
4879 
4880 		cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4881 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4882 			/* since no page cache to corrupt on directio
4883 			we can change size safely */
4884 			return true;
4885 		}
4886 
4887 		if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4888 			return true;
4889 
4890 		return false;
4891 	} else
4892 		return true;
4893 }
4894 
4895 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4896 			loff_t pos, unsigned len,
4897 			struct page **pagep, void **fsdata)
4898 {
4899 	int oncethru = 0;
4900 	pgoff_t index = pos >> PAGE_SHIFT;
4901 	loff_t offset = pos & (PAGE_SIZE - 1);
4902 	loff_t page_start = pos & PAGE_MASK;
4903 	loff_t i_size;
4904 	struct page *page;
4905 	int rc = 0;
4906 
4907 	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4908 
4909 start:
4910 	page = grab_cache_page_write_begin(mapping, index);
4911 	if (!page) {
4912 		rc = -ENOMEM;
4913 		goto out;
4914 	}
4915 
4916 	if (PageUptodate(page))
4917 		goto out;
4918 
4919 	/*
4920 	 * If we write a full page it will be up to date, no need to read from
4921 	 * the server. If the write is short, we'll end up doing a sync write
4922 	 * instead.
4923 	 */
4924 	if (len == PAGE_SIZE)
4925 		goto out;
4926 
4927 	/*
4928 	 * optimize away the read when we have an oplock, and we're not
4929 	 * expecting to use any of the data we'd be reading in. That
4930 	 * is, when the page lies beyond the EOF, or straddles the EOF
4931 	 * and the write will cover all of the existing data.
4932 	 */
4933 	if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4934 		i_size = i_size_read(mapping->host);
4935 		if (page_start >= i_size ||
4936 		    (offset == 0 && (pos + len) >= i_size)) {
4937 			zero_user_segments(page, 0, offset,
4938 					   offset + len,
4939 					   PAGE_SIZE);
4940 			/*
4941 			 * PageChecked means that the parts of the page
4942 			 * to which we're not writing are considered up
4943 			 * to date. Once the data is copied to the
4944 			 * page, it can be set uptodate.
4945 			 */
4946 			SetPageChecked(page);
4947 			goto out;
4948 		}
4949 	}
4950 
4951 	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4952 		/*
4953 		 * might as well read a page, it is fast enough. If we get
4954 		 * an error, we don't need to return it. cifs_write_end will
4955 		 * do a sync write instead since PG_uptodate isn't set.
4956 		 */
4957 		cifs_readpage_worker(file, page, &page_start);
4958 		put_page(page);
4959 		oncethru = 1;
4960 		goto start;
4961 	} else {
4962 		/* we could try using another file handle if there is one -
4963 		   but how would we lock it to prevent close of that handle
4964 		   racing with this read? In any case
4965 		   this will be written out by write_end so is fine */
4966 	}
4967 out:
4968 	*pagep = page;
4969 	return rc;
4970 }
4971 
4972 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4973 {
4974 	if (folio_test_private(folio))
4975 		return 0;
4976 	if (folio_test_fscache(folio)) {
4977 		if (current_is_kswapd() || !(gfp & __GFP_FS))
4978 			return false;
4979 		folio_wait_fscache(folio);
4980 	}
4981 	fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4982 	return true;
4983 }
4984 
4985 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4986 				 size_t length)
4987 {
4988 	folio_wait_fscache(folio);
4989 }
4990 
4991 static int cifs_launder_folio(struct folio *folio)
4992 {
4993 	int rc = 0;
4994 	loff_t range_start = folio_pos(folio);
4995 	loff_t range_end = range_start + folio_size(folio);
4996 	struct writeback_control wbc = {
4997 		.sync_mode = WB_SYNC_ALL,
4998 		.nr_to_write = 0,
4999 		.range_start = range_start,
5000 		.range_end = range_end,
5001 	};
5002 
5003 	cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
5004 
5005 	if (folio_clear_dirty_for_io(folio))
5006 		rc = cifs_writepage_locked(&folio->page, &wbc);
5007 
5008 	folio_wait_fscache(folio);
5009 	return rc;
5010 }
5011 
5012 void cifs_oplock_break(struct work_struct *work)
5013 {
5014 	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
5015 						  oplock_break);
5016 	struct inode *inode = d_inode(cfile->dentry);
5017 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
5018 	struct cifsInodeInfo *cinode = CIFS_I(inode);
5019 	struct cifs_tcon *tcon;
5020 	struct TCP_Server_Info *server;
5021 	struct tcon_link *tlink;
5022 	int rc = 0;
5023 	bool purge_cache = false, oplock_break_cancelled;
5024 	__u64 persistent_fid, volatile_fid;
5025 	__u16 net_fid;
5026 
5027 	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
5028 			TASK_UNINTERRUPTIBLE);
5029 
5030 	tlink = cifs_sb_tlink(cifs_sb);
5031 	if (IS_ERR(tlink))
5032 		goto out;
5033 	tcon = tlink_tcon(tlink);
5034 	server = tcon->ses->server;
5035 
5036 	server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
5037 				      cfile->oplock_epoch, &purge_cache);
5038 
5039 	if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
5040 						cifs_has_mand_locks(cinode)) {
5041 		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
5042 			 inode);
5043 		cinode->oplock = 0;
5044 	}
5045 
5046 	if (inode && S_ISREG(inode->i_mode)) {
5047 		if (CIFS_CACHE_READ(cinode))
5048 			break_lease(inode, O_RDONLY);
5049 		else
5050 			break_lease(inode, O_WRONLY);
5051 		rc = filemap_fdatawrite(inode->i_mapping);
5052 		if (!CIFS_CACHE_READ(cinode) || purge_cache) {
5053 			rc = filemap_fdatawait(inode->i_mapping);
5054 			mapping_set_error(inode->i_mapping, rc);
5055 			cifs_zap_mapping(inode);
5056 		}
5057 		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
5058 		if (CIFS_CACHE_WRITE(cinode))
5059 			goto oplock_break_ack;
5060 	}
5061 
5062 	rc = cifs_push_locks(cfile);
5063 	if (rc)
5064 		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
5065 
5066 oplock_break_ack:
5067 	/*
5068 	 * When oplock break is received and there are no active
5069 	 * file handles but cached, then schedule deferred close immediately.
5070 	 * So, new open will not use cached handle.
5071 	 */
5072 
5073 	if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
5074 		cifs_close_deferred_file(cinode);
5075 
5076 	persistent_fid = cfile->fid.persistent_fid;
5077 	volatile_fid = cfile->fid.volatile_fid;
5078 	net_fid = cfile->fid.netfid;
5079 	oplock_break_cancelled = cfile->oplock_break_cancelled;
5080 
5081 	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
5082 	/*
5083 	 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
5084 	 * an acknowledgment to be sent when the file has already been closed.
5085 	 */
5086 	spin_lock(&cinode->open_file_lock);
5087 	/* check list empty since can race with kill_sb calling tree disconnect */
5088 	if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
5089 		spin_unlock(&cinode->open_file_lock);
5090 		rc = server->ops->oplock_response(tcon, persistent_fid,
5091 						  volatile_fid, net_fid, cinode);
5092 		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
5093 	} else
5094 		spin_unlock(&cinode->open_file_lock);
5095 
5096 	cifs_put_tlink(tlink);
5097 out:
5098 	cifs_done_oplock_break(cinode);
5099 }
5100 
5101 /*
5102  * The presence of cifs_direct_io() in the address space ops vector
5103  * allowes open() O_DIRECT flags which would have failed otherwise.
5104  *
5105  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5106  * so this method should never be called.
5107  *
5108  * Direct IO is not yet supported in the cached mode.
5109  */
5110 static ssize_t
5111 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5112 {
5113         /*
5114          * FIXME
5115          * Eventually need to support direct IO for non forcedirectio mounts
5116          */
5117         return -EINVAL;
5118 }
5119 
5120 static int cifs_swap_activate(struct swap_info_struct *sis,
5121 			      struct file *swap_file, sector_t *span)
5122 {
5123 	struct cifsFileInfo *cfile = swap_file->private_data;
5124 	struct inode *inode = swap_file->f_mapping->host;
5125 	unsigned long blocks;
5126 	long long isize;
5127 
5128 	cifs_dbg(FYI, "swap activate\n");
5129 
5130 	if (!swap_file->f_mapping->a_ops->swap_rw)
5131 		/* Cannot support swap */
5132 		return -EINVAL;
5133 
5134 	spin_lock(&inode->i_lock);
5135 	blocks = inode->i_blocks;
5136 	isize = inode->i_size;
5137 	spin_unlock(&inode->i_lock);
5138 	if (blocks*512 < isize) {
5139 		pr_warn("swap activate: swapfile has holes\n");
5140 		return -EINVAL;
5141 	}
5142 	*span = sis->pages;
5143 
5144 	pr_warn_once("Swap support over SMB3 is experimental\n");
5145 
5146 	/*
5147 	 * TODO: consider adding ACL (or documenting how) to prevent other
5148 	 * users (on this or other systems) from reading it
5149 	 */
5150 
5151 
5152 	/* TODO: add sk_set_memalloc(inet) or similar */
5153 
5154 	if (cfile)
5155 		cfile->swapfile = true;
5156 	/*
5157 	 * TODO: Since file already open, we can't open with DENY_ALL here
5158 	 * but we could add call to grab a byte range lock to prevent others
5159 	 * from reading or writing the file
5160 	 */
5161 
5162 	sis->flags |= SWP_FS_OPS;
5163 	return add_swap_extent(sis, 0, sis->max, 0);
5164 }
5165 
5166 static void cifs_swap_deactivate(struct file *file)
5167 {
5168 	struct cifsFileInfo *cfile = file->private_data;
5169 
5170 	cifs_dbg(FYI, "swap deactivate\n");
5171 
5172 	/* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5173 
5174 	if (cfile)
5175 		cfile->swapfile = false;
5176 
5177 	/* do we need to unpin (or unlock) the file */
5178 }
5179 
5180 /*
5181  * Mark a page as having been made dirty and thus needing writeback.  We also
5182  * need to pin the cache object to write back to.
5183  */
5184 #ifdef CONFIG_CIFS_FSCACHE
5185 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
5186 {
5187 	return fscache_dirty_folio(mapping, folio,
5188 					cifs_inode_cookie(mapping->host));
5189 }
5190 #else
5191 #define cifs_dirty_folio filemap_dirty_folio
5192 #endif
5193 
5194 const struct address_space_operations cifs_addr_ops = {
5195 	.read_folio = cifs_read_folio,
5196 	.readahead = cifs_readahead,
5197 	.writepages = cifs_writepages,
5198 	.write_begin = cifs_write_begin,
5199 	.write_end = cifs_write_end,
5200 	.dirty_folio = cifs_dirty_folio,
5201 	.release_folio = cifs_release_folio,
5202 	.direct_IO = cifs_direct_io,
5203 	.invalidate_folio = cifs_invalidate_folio,
5204 	.launder_folio = cifs_launder_folio,
5205 	.migrate_folio = filemap_migrate_folio,
5206 	/*
5207 	 * TODO: investigate and if useful we could add an is_dirty_writeback
5208 	 * helper if needed
5209 	 */
5210 	.swap_activate = cifs_swap_activate,
5211 	.swap_deactivate = cifs_swap_deactivate,
5212 };
5213 
5214 /*
5215  * cifs_readahead requires the server to support a buffer large enough to
5216  * contain the header plus one complete page of data.  Otherwise, we need
5217  * to leave cifs_readahead out of the address space operations.
5218  */
5219 const struct address_space_operations cifs_addr_ops_smallbuf = {
5220 	.read_folio = cifs_read_folio,
5221 	.writepages = cifs_writepages,
5222 	.write_begin = cifs_write_begin,
5223 	.write_end = cifs_write_end,
5224 	.dirty_folio = cifs_dirty_folio,
5225 	.release_folio = cifs_release_folio,
5226 	.invalidate_folio = cifs_invalidate_folio,
5227 	.launder_folio = cifs_launder_folio,
5228 	.migrate_folio = filemap_migrate_folio,
5229 };
5230