xref: /openbmc/linux/fs/smb/client/file.c (revision 94b00cd6)
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/filelock.h>
13 #include <linux/backing-dev.h>
14 #include <linux/stat.h>
15 #include <linux/fcntl.h>
16 #include <linux/pagemap.h>
17 #include <linux/pagevec.h>
18 #include <linux/writeback.h>
19 #include <linux/task_io_accounting_ops.h>
20 #include <linux/delay.h>
21 #include <linux/mount.h>
22 #include <linux/slab.h>
23 #include <linux/swap.h>
24 #include <linux/mm.h>
25 #include <asm/div64.h>
26 #include "cifsfs.h"
27 #include "cifspdu.h"
28 #include "cifsglob.h"
29 #include "cifsproto.h"
30 #include "smb2proto.h"
31 #include "cifs_unicode.h"
32 #include "cifs_debug.h"
33 #include "cifs_fs_sb.h"
34 #include "fscache.h"
35 #include "smbdirect.h"
36 #include "fs_context.h"
37 #include "cifs_ioctl.h"
38 #include "cached_dir.h"
39 
40 /*
41  * Remove the dirty flags from a span of pages.
42  */
43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
44 {
45 	struct address_space *mapping = inode->i_mapping;
46 	struct folio *folio;
47 	pgoff_t end;
48 
49 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
50 
51 	rcu_read_lock();
52 
53 	end = (start + len - 1) / PAGE_SIZE;
54 	xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
55 		if (xas_retry(&xas, folio))
56 			continue;
57 		xas_pause(&xas);
58 		rcu_read_unlock();
59 		folio_lock(folio);
60 		folio_clear_dirty_for_io(folio);
61 		folio_unlock(folio);
62 		rcu_read_lock();
63 	}
64 
65 	rcu_read_unlock();
66 }
67 
68 /*
69  * Completion of write to server.
70  */
71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
72 {
73 	struct address_space *mapping = inode->i_mapping;
74 	struct folio *folio;
75 	pgoff_t end;
76 
77 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
78 
79 	if (!len)
80 		return;
81 
82 	rcu_read_lock();
83 
84 	end = (start + len - 1) / PAGE_SIZE;
85 	xas_for_each(&xas, folio, end) {
86 		if (xas_retry(&xas, folio))
87 			continue;
88 		if (!folio_test_writeback(folio)) {
89 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
90 				  len, start, folio->index, end);
91 			continue;
92 		}
93 
94 		folio_detach_private(folio);
95 		folio_end_writeback(folio);
96 	}
97 
98 	rcu_read_unlock();
99 }
100 
101 /*
102  * Failure of write to server.
103  */
104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len)
105 {
106 	struct address_space *mapping = inode->i_mapping;
107 	struct folio *folio;
108 	pgoff_t end;
109 
110 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
111 
112 	if (!len)
113 		return;
114 
115 	rcu_read_lock();
116 
117 	end = (start + len - 1) / PAGE_SIZE;
118 	xas_for_each(&xas, folio, end) {
119 		if (xas_retry(&xas, folio))
120 			continue;
121 		if (!folio_test_writeback(folio)) {
122 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
123 				  len, start, folio->index, end);
124 			continue;
125 		}
126 
127 		folio_set_error(folio);
128 		folio_end_writeback(folio);
129 	}
130 
131 	rcu_read_unlock();
132 }
133 
134 /*
135  * Redirty pages after a temporary failure.
136  */
137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len)
138 {
139 	struct address_space *mapping = inode->i_mapping;
140 	struct folio *folio;
141 	pgoff_t end;
142 
143 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
144 
145 	if (!len)
146 		return;
147 
148 	rcu_read_lock();
149 
150 	end = (start + len - 1) / PAGE_SIZE;
151 	xas_for_each(&xas, folio, end) {
152 		if (!folio_test_writeback(folio)) {
153 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
154 				  len, start, folio->index, end);
155 			continue;
156 		}
157 
158 		filemap_dirty_folio(folio->mapping, folio);
159 		folio_end_writeback(folio);
160 	}
161 
162 	rcu_read_unlock();
163 }
164 
165 /*
166  * Mark as invalid, all open files on tree connections since they
167  * were closed when session to server was lost.
168  */
169 void
170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
171 {
172 	struct cifsFileInfo *open_file = NULL;
173 	struct list_head *tmp;
174 	struct list_head *tmp1;
175 
176 	/* only send once per connect */
177 	spin_lock(&tcon->tc_lock);
178 	if (tcon->need_reconnect)
179 		tcon->status = TID_NEED_RECON;
180 
181 	if (tcon->status != TID_NEED_RECON) {
182 		spin_unlock(&tcon->tc_lock);
183 		return;
184 	}
185 	tcon->status = TID_IN_FILES_INVALIDATE;
186 	spin_unlock(&tcon->tc_lock);
187 
188 	/* list all files open on tree connection and mark them invalid */
189 	spin_lock(&tcon->open_file_lock);
190 	list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
191 		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
192 		open_file->invalidHandle = true;
193 		open_file->oplock_break_cancelled = true;
194 	}
195 	spin_unlock(&tcon->open_file_lock);
196 
197 	invalidate_all_cached_dirs(tcon);
198 	spin_lock(&tcon->tc_lock);
199 	if (tcon->status == TID_IN_FILES_INVALIDATE)
200 		tcon->status = TID_NEED_TCON;
201 	spin_unlock(&tcon->tc_lock);
202 
203 	/*
204 	 * BB Add call to invalidate_inodes(sb) for all superblocks mounted
205 	 * to this tcon.
206 	 */
207 }
208 
209 static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache)
210 {
211 	if ((flags & O_ACCMODE) == O_RDONLY)
212 		return GENERIC_READ;
213 	else if ((flags & O_ACCMODE) == O_WRONLY)
214 		return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE;
215 	else if ((flags & O_ACCMODE) == O_RDWR) {
216 		/* GENERIC_ALL is too much permission to request
217 		   can cause unnecessary access denied on create */
218 		/* return GENERIC_ALL; */
219 		return (GENERIC_READ | GENERIC_WRITE);
220 	}
221 
222 	return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
223 		FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
224 		FILE_READ_DATA);
225 }
226 
227 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
228 static u32 cifs_posix_convert_flags(unsigned int flags)
229 {
230 	u32 posix_flags = 0;
231 
232 	if ((flags & O_ACCMODE) == O_RDONLY)
233 		posix_flags = SMB_O_RDONLY;
234 	else if ((flags & O_ACCMODE) == O_WRONLY)
235 		posix_flags = SMB_O_WRONLY;
236 	else if ((flags & O_ACCMODE) == O_RDWR)
237 		posix_flags = SMB_O_RDWR;
238 
239 	if (flags & O_CREAT) {
240 		posix_flags |= SMB_O_CREAT;
241 		if (flags & O_EXCL)
242 			posix_flags |= SMB_O_EXCL;
243 	} else if (flags & O_EXCL)
244 		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
245 			 current->comm, current->tgid);
246 
247 	if (flags & O_TRUNC)
248 		posix_flags |= SMB_O_TRUNC;
249 	/* be safe and imply O_SYNC for O_DSYNC */
250 	if (flags & O_DSYNC)
251 		posix_flags |= SMB_O_SYNC;
252 	if (flags & O_DIRECTORY)
253 		posix_flags |= SMB_O_DIRECTORY;
254 	if (flags & O_NOFOLLOW)
255 		posix_flags |= SMB_O_NOFOLLOW;
256 	if (flags & O_DIRECT)
257 		posix_flags |= SMB_O_DIRECT;
258 
259 	return posix_flags;
260 }
261 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
262 
263 static inline int cifs_get_disposition(unsigned int flags)
264 {
265 	if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
266 		return FILE_CREATE;
267 	else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
268 		return FILE_OVERWRITE_IF;
269 	else if ((flags & O_CREAT) == O_CREAT)
270 		return FILE_OPEN_IF;
271 	else if ((flags & O_TRUNC) == O_TRUNC)
272 		return FILE_OVERWRITE;
273 	else
274 		return FILE_OPEN;
275 }
276 
277 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
278 int cifs_posix_open(const char *full_path, struct inode **pinode,
279 			struct super_block *sb, int mode, unsigned int f_flags,
280 			__u32 *poplock, __u16 *pnetfid, unsigned int xid)
281 {
282 	int rc;
283 	FILE_UNIX_BASIC_INFO *presp_data;
284 	__u32 posix_flags = 0;
285 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
286 	struct cifs_fattr fattr;
287 	struct tcon_link *tlink;
288 	struct cifs_tcon *tcon;
289 
290 	cifs_dbg(FYI, "posix open %s\n", full_path);
291 
292 	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
293 	if (presp_data == NULL)
294 		return -ENOMEM;
295 
296 	tlink = cifs_sb_tlink(cifs_sb);
297 	if (IS_ERR(tlink)) {
298 		rc = PTR_ERR(tlink);
299 		goto posix_open_ret;
300 	}
301 
302 	tcon = tlink_tcon(tlink);
303 	mode &= ~current_umask();
304 
305 	posix_flags = cifs_posix_convert_flags(f_flags);
306 	rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
307 			     poplock, full_path, cifs_sb->local_nls,
308 			     cifs_remap(cifs_sb));
309 	cifs_put_tlink(tlink);
310 
311 	if (rc)
312 		goto posix_open_ret;
313 
314 	if (presp_data->Type == cpu_to_le32(-1))
315 		goto posix_open_ret; /* open ok, caller does qpathinfo */
316 
317 	if (!pinode)
318 		goto posix_open_ret; /* caller does not need info */
319 
320 	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
321 
322 	/* get new inode and set it up */
323 	if (*pinode == NULL) {
324 		cifs_fill_uniqueid(sb, &fattr);
325 		*pinode = cifs_iget(sb, &fattr);
326 		if (!*pinode) {
327 			rc = -ENOMEM;
328 			goto posix_open_ret;
329 		}
330 	} else {
331 		cifs_revalidate_mapping(*pinode);
332 		rc = cifs_fattr_to_inode(*pinode, &fattr, false);
333 	}
334 
335 posix_open_ret:
336 	kfree(presp_data);
337 	return rc;
338 }
339 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
340 
341 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
342 			struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
343 			struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
344 {
345 	int rc;
346 	int desired_access;
347 	int disposition;
348 	int create_options = CREATE_NOT_DIR;
349 	struct TCP_Server_Info *server = tcon->ses->server;
350 	struct cifs_open_parms oparms;
351 	int rdwr_for_fscache = 0;
352 
353 	if (!server->ops->open)
354 		return -ENOSYS;
355 
356 	/* If we're caching, we need to be able to fill in around partial writes. */
357 	if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY)
358 		rdwr_for_fscache = 1;
359 
360 	desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache);
361 
362 /*********************************************************************
363  *  open flag mapping table:
364  *
365  *	POSIX Flag            CIFS Disposition
366  *	----------            ----------------
367  *	O_CREAT               FILE_OPEN_IF
368  *	O_CREAT | O_EXCL      FILE_CREATE
369  *	O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
370  *	O_TRUNC               FILE_OVERWRITE
371  *	none of the above     FILE_OPEN
372  *
373  *	Note that there is not a direct match between disposition
374  *	FILE_SUPERSEDE (ie create whether or not file exists although
375  *	O_CREAT | O_TRUNC is similar but truncates the existing
376  *	file rather than creating a new file as FILE_SUPERSEDE does
377  *	(which uses the attributes / metadata passed in on open call)
378  *?
379  *?  O_SYNC is a reasonable match to CIFS writethrough flag
380  *?  and the read write flags match reasonably.  O_LARGEFILE
381  *?  is irrelevant because largefile support is always used
382  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
383  *	 O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
384  *********************************************************************/
385 
386 	disposition = cifs_get_disposition(f_flags);
387 
388 	/* BB pass O_SYNC flag through on file attributes .. BB */
389 
390 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
391 	if (f_flags & O_SYNC)
392 		create_options |= CREATE_WRITE_THROUGH;
393 
394 	if (f_flags & O_DIRECT)
395 		create_options |= CREATE_NO_BUFFER;
396 
397 retry_open:
398 	oparms = (struct cifs_open_parms) {
399 		.tcon = tcon,
400 		.cifs_sb = cifs_sb,
401 		.desired_access = desired_access,
402 		.create_options = cifs_create_options(cifs_sb, create_options),
403 		.disposition = disposition,
404 		.path = full_path,
405 		.fid = fid,
406 	};
407 
408 	rc = server->ops->open(xid, &oparms, oplock, buf);
409 	if (rc) {
410 		if (rc == -EACCES && rdwr_for_fscache == 1) {
411 			desired_access = cifs_convert_flags(f_flags, 0);
412 			rdwr_for_fscache = 2;
413 			goto retry_open;
414 		}
415 		return rc;
416 	}
417 	if (rdwr_for_fscache == 2)
418 		cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
419 
420 	/* TODO: Add support for calling posix query info but with passing in fid */
421 	if (tcon->unix_ext)
422 		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
423 					      xid);
424 	else
425 		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
426 					 xid, fid);
427 
428 	if (rc) {
429 		server->ops->close(xid, tcon, fid);
430 		if (rc == -ESTALE)
431 			rc = -EOPENSTALE;
432 	}
433 
434 	return rc;
435 }
436 
437 static bool
438 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
439 {
440 	struct cifs_fid_locks *cur;
441 	bool has_locks = false;
442 
443 	down_read(&cinode->lock_sem);
444 	list_for_each_entry(cur, &cinode->llist, llist) {
445 		if (!list_empty(&cur->locks)) {
446 			has_locks = true;
447 			break;
448 		}
449 	}
450 	up_read(&cinode->lock_sem);
451 	return has_locks;
452 }
453 
454 void
455 cifs_down_write(struct rw_semaphore *sem)
456 {
457 	while (!down_write_trylock(sem))
458 		msleep(10);
459 }
460 
461 static void cifsFileInfo_put_work(struct work_struct *work);
462 void serverclose_work(struct work_struct *work);
463 
464 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
465 				       struct tcon_link *tlink, __u32 oplock,
466 				       const char *symlink_target)
467 {
468 	struct dentry *dentry = file_dentry(file);
469 	struct inode *inode = d_inode(dentry);
470 	struct cifsInodeInfo *cinode = CIFS_I(inode);
471 	struct cifsFileInfo *cfile;
472 	struct cifs_fid_locks *fdlocks;
473 	struct cifs_tcon *tcon = tlink_tcon(tlink);
474 	struct TCP_Server_Info *server = tcon->ses->server;
475 
476 	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
477 	if (cfile == NULL)
478 		return cfile;
479 
480 	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
481 	if (!fdlocks) {
482 		kfree(cfile);
483 		return NULL;
484 	}
485 
486 	if (symlink_target) {
487 		cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
488 		if (!cfile->symlink_target) {
489 			kfree(fdlocks);
490 			kfree(cfile);
491 			return NULL;
492 		}
493 	}
494 
495 	INIT_LIST_HEAD(&fdlocks->locks);
496 	fdlocks->cfile = cfile;
497 	cfile->llist = fdlocks;
498 
499 	cfile->count = 1;
500 	cfile->pid = current->tgid;
501 	cfile->uid = current_fsuid();
502 	cfile->dentry = dget(dentry);
503 	cfile->f_flags = file->f_flags;
504 	cfile->invalidHandle = false;
505 	cfile->deferred_close_scheduled = false;
506 	cfile->tlink = cifs_get_tlink(tlink);
507 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
508 	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
509 	INIT_WORK(&cfile->serverclose, serverclose_work);
510 	INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
511 	mutex_init(&cfile->fh_mutex);
512 	spin_lock_init(&cfile->file_info_lock);
513 
514 	cifs_sb_active(inode->i_sb);
515 
516 	/*
517 	 * If the server returned a read oplock and we have mandatory brlocks,
518 	 * set oplock level to None.
519 	 */
520 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
521 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
522 		oplock = 0;
523 	}
524 
525 	cifs_down_write(&cinode->lock_sem);
526 	list_add(&fdlocks->llist, &cinode->llist);
527 	up_write(&cinode->lock_sem);
528 
529 	spin_lock(&tcon->open_file_lock);
530 	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
531 		oplock = fid->pending_open->oplock;
532 	list_del(&fid->pending_open->olist);
533 
534 	fid->purge_cache = false;
535 	server->ops->set_fid(cfile, fid, oplock);
536 
537 	list_add(&cfile->tlist, &tcon->openFileList);
538 	atomic_inc(&tcon->num_local_opens);
539 
540 	/* if readable file instance put first in list*/
541 	spin_lock(&cinode->open_file_lock);
542 	if (file->f_mode & FMODE_READ)
543 		list_add(&cfile->flist, &cinode->openFileList);
544 	else
545 		list_add_tail(&cfile->flist, &cinode->openFileList);
546 	spin_unlock(&cinode->open_file_lock);
547 	spin_unlock(&tcon->open_file_lock);
548 
549 	if (fid->purge_cache)
550 		cifs_zap_mapping(inode);
551 
552 	file->private_data = cfile;
553 	return cfile;
554 }
555 
556 struct cifsFileInfo *
557 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
558 {
559 	spin_lock(&cifs_file->file_info_lock);
560 	cifsFileInfo_get_locked(cifs_file);
561 	spin_unlock(&cifs_file->file_info_lock);
562 	return cifs_file;
563 }
564 
565 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
566 {
567 	struct inode *inode = d_inode(cifs_file->dentry);
568 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
569 	struct cifsLockInfo *li, *tmp;
570 	struct super_block *sb = inode->i_sb;
571 
572 	/*
573 	 * Delete any outstanding lock records. We'll lose them when the file
574 	 * is closed anyway.
575 	 */
576 	cifs_down_write(&cifsi->lock_sem);
577 	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
578 		list_del(&li->llist);
579 		cifs_del_lock_waiters(li);
580 		kfree(li);
581 	}
582 	list_del(&cifs_file->llist->llist);
583 	kfree(cifs_file->llist);
584 	up_write(&cifsi->lock_sem);
585 
586 	cifs_put_tlink(cifs_file->tlink);
587 	dput(cifs_file->dentry);
588 	cifs_sb_deactive(sb);
589 	kfree(cifs_file->symlink_target);
590 	kfree(cifs_file);
591 }
592 
593 static void cifsFileInfo_put_work(struct work_struct *work)
594 {
595 	struct cifsFileInfo *cifs_file = container_of(work,
596 			struct cifsFileInfo, put);
597 
598 	cifsFileInfo_put_final(cifs_file);
599 }
600 
601 void serverclose_work(struct work_struct *work)
602 {
603 	struct cifsFileInfo *cifs_file = container_of(work,
604 			struct cifsFileInfo, serverclose);
605 
606 	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
607 
608 	struct TCP_Server_Info *server = tcon->ses->server;
609 	int rc = 0;
610 	int retries = 0;
611 	int MAX_RETRIES = 4;
612 
613 	do {
614 		if (server->ops->close_getattr)
615 			rc = server->ops->close_getattr(0, tcon, cifs_file);
616 		else if (server->ops->close)
617 			rc = server->ops->close(0, tcon, &cifs_file->fid);
618 
619 		if (rc == -EBUSY || rc == -EAGAIN) {
620 			retries++;
621 			msleep(250);
622 		}
623 	} while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES)
624 	);
625 
626 	if (retries == MAX_RETRIES)
627 		pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES);
628 
629 	if (cifs_file->offload)
630 		queue_work(fileinfo_put_wq, &cifs_file->put);
631 	else
632 		cifsFileInfo_put_final(cifs_file);
633 }
634 
635 /**
636  * cifsFileInfo_put - release a reference of file priv data
637  *
638  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
639  *
640  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
641  */
642 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
643 {
644 	_cifsFileInfo_put(cifs_file, true, true);
645 }
646 
647 /**
648  * _cifsFileInfo_put - release a reference of file priv data
649  *
650  * This may involve closing the filehandle @cifs_file out on the
651  * server. Must be called without holding tcon->open_file_lock,
652  * cinode->open_file_lock and cifs_file->file_info_lock.
653  *
654  * If @wait_for_oplock_handler is true and we are releasing the last
655  * reference, wait for any running oplock break handler of the file
656  * and cancel any pending one.
657  *
658  * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
659  * @wait_oplock_handler: must be false if called from oplock_break_handler
660  * @offload:	not offloaded on close and oplock breaks
661  *
662  */
663 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
664 		       bool wait_oplock_handler, bool offload)
665 {
666 	struct inode *inode = d_inode(cifs_file->dentry);
667 	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
668 	struct TCP_Server_Info *server = tcon->ses->server;
669 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
670 	struct super_block *sb = inode->i_sb;
671 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
672 	struct cifs_fid fid = {};
673 	struct cifs_pending_open open;
674 	bool oplock_break_cancelled;
675 	bool serverclose_offloaded = false;
676 
677 	spin_lock(&tcon->open_file_lock);
678 	spin_lock(&cifsi->open_file_lock);
679 	spin_lock(&cifs_file->file_info_lock);
680 
681 	cifs_file->offload = offload;
682 	if (--cifs_file->count > 0) {
683 		spin_unlock(&cifs_file->file_info_lock);
684 		spin_unlock(&cifsi->open_file_lock);
685 		spin_unlock(&tcon->open_file_lock);
686 		return;
687 	}
688 	spin_unlock(&cifs_file->file_info_lock);
689 
690 	if (server->ops->get_lease_key)
691 		server->ops->get_lease_key(inode, &fid);
692 
693 	/* store open in pending opens to make sure we don't miss lease break */
694 	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
695 
696 	/* remove it from the lists */
697 	list_del(&cifs_file->flist);
698 	list_del(&cifs_file->tlist);
699 	atomic_dec(&tcon->num_local_opens);
700 
701 	if (list_empty(&cifsi->openFileList)) {
702 		cifs_dbg(FYI, "closing last open instance for inode %p\n",
703 			 d_inode(cifs_file->dentry));
704 		/*
705 		 * In strict cache mode we need invalidate mapping on the last
706 		 * close  because it may cause a error when we open this file
707 		 * again and get at least level II oplock.
708 		 */
709 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
710 			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
711 		cifs_set_oplock_level(cifsi, 0);
712 	}
713 
714 	spin_unlock(&cifsi->open_file_lock);
715 	spin_unlock(&tcon->open_file_lock);
716 
717 	oplock_break_cancelled = wait_oplock_handler ?
718 		cancel_work_sync(&cifs_file->oplock_break) : false;
719 
720 	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
721 		struct TCP_Server_Info *server = tcon->ses->server;
722 		unsigned int xid;
723 		int rc = 0;
724 
725 		xid = get_xid();
726 		if (server->ops->close_getattr)
727 			rc = server->ops->close_getattr(xid, tcon, cifs_file);
728 		else if (server->ops->close)
729 			rc = server->ops->close(xid, tcon, &cifs_file->fid);
730 		_free_xid(xid);
731 
732 		if (rc == -EBUSY || rc == -EAGAIN) {
733 			// Server close failed, hence offloading it as an async op
734 			queue_work(serverclose_wq, &cifs_file->serverclose);
735 			serverclose_offloaded = true;
736 		}
737 	}
738 
739 	if (oplock_break_cancelled)
740 		cifs_done_oplock_break(cifsi);
741 
742 	cifs_del_pending_open(&open);
743 
744 	// if serverclose has been offloaded to wq (on failure), it will
745 	// handle offloading put as well. If serverclose not offloaded,
746 	// we need to handle offloading put here.
747 	if (!serverclose_offloaded) {
748 		if (offload)
749 			queue_work(fileinfo_put_wq, &cifs_file->put);
750 		else
751 			cifsFileInfo_put_final(cifs_file);
752 	}
753 }
754 
755 int cifs_open(struct inode *inode, struct file *file)
756 
757 {
758 	int rc = -EACCES;
759 	unsigned int xid;
760 	__u32 oplock;
761 	struct cifs_sb_info *cifs_sb;
762 	struct TCP_Server_Info *server;
763 	struct cifs_tcon *tcon;
764 	struct tcon_link *tlink;
765 	struct cifsFileInfo *cfile = NULL;
766 	void *page;
767 	const char *full_path;
768 	bool posix_open_ok = false;
769 	struct cifs_fid fid = {};
770 	struct cifs_pending_open open;
771 	struct cifs_open_info_data data = {};
772 
773 	xid = get_xid();
774 
775 	cifs_sb = CIFS_SB(inode->i_sb);
776 	if (unlikely(cifs_forced_shutdown(cifs_sb))) {
777 		free_xid(xid);
778 		return -EIO;
779 	}
780 
781 	tlink = cifs_sb_tlink(cifs_sb);
782 	if (IS_ERR(tlink)) {
783 		free_xid(xid);
784 		return PTR_ERR(tlink);
785 	}
786 	tcon = tlink_tcon(tlink);
787 	server = tcon->ses->server;
788 
789 	page = alloc_dentry_path();
790 	full_path = build_path_from_dentry(file_dentry(file), page);
791 	if (IS_ERR(full_path)) {
792 		rc = PTR_ERR(full_path);
793 		goto out;
794 	}
795 
796 	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
797 		 inode, file->f_flags, full_path);
798 
799 	if (file->f_flags & O_DIRECT &&
800 	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
801 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
802 			file->f_op = &cifs_file_direct_nobrl_ops;
803 		else
804 			file->f_op = &cifs_file_direct_ops;
805 	}
806 
807 	/* Get the cached handle as SMB2 close is deferred */
808 	rc = cifs_get_readable_path(tcon, full_path, &cfile);
809 	if (rc == 0) {
810 		if (file->f_flags == cfile->f_flags) {
811 			file->private_data = cfile;
812 			spin_lock(&CIFS_I(inode)->deferred_lock);
813 			cifs_del_deferred_close(cfile);
814 			spin_unlock(&CIFS_I(inode)->deferred_lock);
815 			goto use_cache;
816 		} else {
817 			_cifsFileInfo_put(cfile, true, false);
818 		}
819 	}
820 
821 	if (server->oplocks)
822 		oplock = REQ_OPLOCK;
823 	else
824 		oplock = 0;
825 
826 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
827 	if (!tcon->broken_posix_open && tcon->unix_ext &&
828 	    cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
829 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
830 		/* can not refresh inode info since size could be stale */
831 		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
832 				cifs_sb->ctx->file_mode /* ignored */,
833 				file->f_flags, &oplock, &fid.netfid, xid);
834 		if (rc == 0) {
835 			cifs_dbg(FYI, "posix open succeeded\n");
836 			posix_open_ok = true;
837 		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
838 			if (tcon->ses->serverNOS)
839 				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
840 					 tcon->ses->ip_addr,
841 					 tcon->ses->serverNOS);
842 			tcon->broken_posix_open = true;
843 		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
844 			 (rc != -EOPNOTSUPP)) /* path not found or net err */
845 			goto out;
846 		/*
847 		 * Else fallthrough to retry open the old way on network i/o
848 		 * or DFS errors.
849 		 */
850 	}
851 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
852 
853 	if (server->ops->get_lease_key)
854 		server->ops->get_lease_key(inode, &fid);
855 
856 	cifs_add_pending_open(&fid, tlink, &open);
857 
858 	if (!posix_open_ok) {
859 		if (server->ops->get_lease_key)
860 			server->ops->get_lease_key(inode, &fid);
861 
862 		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
863 				  xid, &data);
864 		if (rc) {
865 			cifs_del_pending_open(&open);
866 			goto out;
867 		}
868 	}
869 
870 	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
871 	if (cfile == NULL) {
872 		if (server->ops->close)
873 			server->ops->close(xid, tcon, &fid);
874 		cifs_del_pending_open(&open);
875 		rc = -ENOMEM;
876 		goto out;
877 	}
878 
879 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
880 	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
881 		/*
882 		 * Time to set mode which we can not set earlier due to
883 		 * problems creating new read-only files.
884 		 */
885 		struct cifs_unix_set_info_args args = {
886 			.mode	= inode->i_mode,
887 			.uid	= INVALID_UID, /* no change */
888 			.gid	= INVALID_GID, /* no change */
889 			.ctime	= NO_CHANGE_64,
890 			.atime	= NO_CHANGE_64,
891 			.mtime	= NO_CHANGE_64,
892 			.device	= 0,
893 		};
894 		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
895 				       cfile->pid);
896 	}
897 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
898 
899 use_cache:
900 	fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
901 			   file->f_mode & FMODE_WRITE);
902 	if (!(file->f_flags & O_DIRECT))
903 		goto out;
904 	if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY)
905 		goto out;
906 	cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE);
907 
908 out:
909 	free_dentry_path(page);
910 	free_xid(xid);
911 	cifs_put_tlink(tlink);
912 	cifs_free_open_info(&data);
913 	return rc;
914 }
915 
916 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
917 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
918 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
919 
920 /*
921  * Try to reacquire byte range locks that were released when session
922  * to server was lost.
923  */
924 static int
925 cifs_relock_file(struct cifsFileInfo *cfile)
926 {
927 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
928 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
929 	int rc = 0;
930 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
931 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
932 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
933 
934 	down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
935 	if (cinode->can_cache_brlcks) {
936 		/* can cache locks - no need to relock */
937 		up_read(&cinode->lock_sem);
938 		return rc;
939 	}
940 
941 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
942 	if (cap_unix(tcon->ses) &&
943 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
944 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
945 		rc = cifs_push_posix_locks(cfile);
946 	else
947 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
948 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
949 
950 	up_read(&cinode->lock_sem);
951 	return rc;
952 }
953 
954 static int
955 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
956 {
957 	int rc = -EACCES;
958 	unsigned int xid;
959 	__u32 oplock;
960 	struct cifs_sb_info *cifs_sb;
961 	struct cifs_tcon *tcon;
962 	struct TCP_Server_Info *server;
963 	struct cifsInodeInfo *cinode;
964 	struct inode *inode;
965 	void *page;
966 	const char *full_path;
967 	int desired_access;
968 	int disposition = FILE_OPEN;
969 	int create_options = CREATE_NOT_DIR;
970 	struct cifs_open_parms oparms;
971 	int rdwr_for_fscache = 0;
972 
973 	xid = get_xid();
974 	mutex_lock(&cfile->fh_mutex);
975 	if (!cfile->invalidHandle) {
976 		mutex_unlock(&cfile->fh_mutex);
977 		free_xid(xid);
978 		return 0;
979 	}
980 
981 	inode = d_inode(cfile->dentry);
982 	cifs_sb = CIFS_SB(inode->i_sb);
983 	tcon = tlink_tcon(cfile->tlink);
984 	server = tcon->ses->server;
985 
986 	/*
987 	 * Can not grab rename sem here because various ops, including those
988 	 * that already have the rename sem can end up causing writepage to get
989 	 * called and if the server was down that means we end up here, and we
990 	 * can never tell if the caller already has the rename_sem.
991 	 */
992 	page = alloc_dentry_path();
993 	full_path = build_path_from_dentry(cfile->dentry, page);
994 	if (IS_ERR(full_path)) {
995 		mutex_unlock(&cfile->fh_mutex);
996 		free_dentry_path(page);
997 		free_xid(xid);
998 		return PTR_ERR(full_path);
999 	}
1000 
1001 	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
1002 		 inode, cfile->f_flags, full_path);
1003 
1004 	if (tcon->ses->server->oplocks)
1005 		oplock = REQ_OPLOCK;
1006 	else
1007 		oplock = 0;
1008 
1009 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1010 	if (tcon->unix_ext && cap_unix(tcon->ses) &&
1011 	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
1012 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
1013 		/*
1014 		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
1015 		 * original open. Must mask them off for a reopen.
1016 		 */
1017 		unsigned int oflags = cfile->f_flags &
1018 						~(O_CREAT | O_EXCL | O_TRUNC);
1019 
1020 		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
1021 				     cifs_sb->ctx->file_mode /* ignored */,
1022 				     oflags, &oplock, &cfile->fid.netfid, xid);
1023 		if (rc == 0) {
1024 			cifs_dbg(FYI, "posix reopen succeeded\n");
1025 			oparms.reconnect = true;
1026 			goto reopen_success;
1027 		}
1028 		/*
1029 		 * fallthrough to retry open the old way on errors, especially
1030 		 * in the reconnect path it is important to retry hard
1031 		 */
1032 	}
1033 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1034 
1035 	/* If we're caching, we need to be able to fill in around partial writes. */
1036 	if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY)
1037 		rdwr_for_fscache = 1;
1038 
1039 	desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache);
1040 
1041 	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
1042 	if (cfile->f_flags & O_SYNC)
1043 		create_options |= CREATE_WRITE_THROUGH;
1044 
1045 	if (cfile->f_flags & O_DIRECT)
1046 		create_options |= CREATE_NO_BUFFER;
1047 
1048 	if (server->ops->get_lease_key)
1049 		server->ops->get_lease_key(inode, &cfile->fid);
1050 
1051 retry_open:
1052 	oparms = (struct cifs_open_parms) {
1053 		.tcon = tcon,
1054 		.cifs_sb = cifs_sb,
1055 		.desired_access = desired_access,
1056 		.create_options = cifs_create_options(cifs_sb, create_options),
1057 		.disposition = disposition,
1058 		.path = full_path,
1059 		.fid = &cfile->fid,
1060 		.reconnect = true,
1061 	};
1062 
1063 	/*
1064 	 * Can not refresh inode by passing in file_info buf to be returned by
1065 	 * ops->open and then calling get_inode_info with returned buf since
1066 	 * file might have write behind data that needs to be flushed and server
1067 	 * version of file size can be stale. If we knew for sure that inode was
1068 	 * not dirty locally we could do this.
1069 	 */
1070 	rc = server->ops->open(xid, &oparms, &oplock, NULL);
1071 	if (rc == -ENOENT && oparms.reconnect == false) {
1072 		/* durable handle timeout is expired - open the file again */
1073 		rc = server->ops->open(xid, &oparms, &oplock, NULL);
1074 		/* indicate that we need to relock the file */
1075 		oparms.reconnect = true;
1076 	}
1077 	if (rc == -EACCES && rdwr_for_fscache == 1) {
1078 		desired_access = cifs_convert_flags(cfile->f_flags, 0);
1079 		rdwr_for_fscache = 2;
1080 		goto retry_open;
1081 	}
1082 
1083 	if (rc) {
1084 		mutex_unlock(&cfile->fh_mutex);
1085 		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
1086 		cifs_dbg(FYI, "oplock: %d\n", oplock);
1087 		goto reopen_error_exit;
1088 	}
1089 
1090 	if (rdwr_for_fscache == 2)
1091 		cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
1092 
1093 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1094 reopen_success:
1095 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1096 	cfile->invalidHandle = false;
1097 	mutex_unlock(&cfile->fh_mutex);
1098 	cinode = CIFS_I(inode);
1099 
1100 	if (can_flush) {
1101 		rc = filemap_write_and_wait(inode->i_mapping);
1102 		if (!is_interrupt_error(rc))
1103 			mapping_set_error(inode->i_mapping, rc);
1104 
1105 		if (tcon->posix_extensions)
1106 			rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
1107 		else if (tcon->unix_ext)
1108 			rc = cifs_get_inode_info_unix(&inode, full_path,
1109 						      inode->i_sb, xid);
1110 		else
1111 			rc = cifs_get_inode_info(&inode, full_path, NULL,
1112 						 inode->i_sb, xid, NULL);
1113 	}
1114 	/*
1115 	 * Else we are writing out data to server already and could deadlock if
1116 	 * we tried to flush data, and since we do not know if we have data that
1117 	 * would invalidate the current end of file on the server we can not go
1118 	 * to the server to get the new inode info.
1119 	 */
1120 
1121 	/*
1122 	 * If the server returned a read oplock and we have mandatory brlocks,
1123 	 * set oplock level to None.
1124 	 */
1125 	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
1126 		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
1127 		oplock = 0;
1128 	}
1129 
1130 	server->ops->set_fid(cfile, &cfile->fid, oplock);
1131 	if (oparms.reconnect)
1132 		cifs_relock_file(cfile);
1133 
1134 reopen_error_exit:
1135 	free_dentry_path(page);
1136 	free_xid(xid);
1137 	return rc;
1138 }
1139 
1140 void smb2_deferred_work_close(struct work_struct *work)
1141 {
1142 	struct cifsFileInfo *cfile = container_of(work,
1143 			struct cifsFileInfo, deferred.work);
1144 
1145 	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1146 	cifs_del_deferred_close(cfile);
1147 	cfile->deferred_close_scheduled = false;
1148 	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1149 	_cifsFileInfo_put(cfile, true, false);
1150 }
1151 
1152 int cifs_close(struct inode *inode, struct file *file)
1153 {
1154 	struct cifsFileInfo *cfile;
1155 	struct cifsInodeInfo *cinode = CIFS_I(inode);
1156 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1157 	struct cifs_deferred_close *dclose;
1158 
1159 	cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1160 
1161 	if (file->private_data != NULL) {
1162 		cfile = file->private_data;
1163 		file->private_data = NULL;
1164 		dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1165 		if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG)
1166 		    && cinode->lease_granted &&
1167 		    !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
1168 		    dclose) {
1169 			if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1170 				inode_set_mtime_to_ts(inode,
1171 						      inode_set_ctime_current(inode));
1172 			}
1173 			spin_lock(&cinode->deferred_lock);
1174 			cifs_add_deferred_close(cfile, dclose);
1175 			if (cfile->deferred_close_scheduled &&
1176 			    delayed_work_pending(&cfile->deferred)) {
1177 				/*
1178 				 * If there is no pending work, mod_delayed_work queues new work.
1179 				 * So, Increase the ref count to avoid use-after-free.
1180 				 */
1181 				if (!mod_delayed_work(deferredclose_wq,
1182 						&cfile->deferred, cifs_sb->ctx->closetimeo))
1183 					cifsFileInfo_get(cfile);
1184 			} else {
1185 				/* Deferred close for files */
1186 				queue_delayed_work(deferredclose_wq,
1187 						&cfile->deferred, cifs_sb->ctx->closetimeo);
1188 				cfile->deferred_close_scheduled = true;
1189 				spin_unlock(&cinode->deferred_lock);
1190 				return 0;
1191 			}
1192 			spin_unlock(&cinode->deferred_lock);
1193 			_cifsFileInfo_put(cfile, true, false);
1194 		} else {
1195 			_cifsFileInfo_put(cfile, true, false);
1196 			kfree(dclose);
1197 		}
1198 	}
1199 
1200 	/* return code from the ->release op is always ignored */
1201 	return 0;
1202 }
1203 
1204 void
1205 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1206 {
1207 	struct cifsFileInfo *open_file, *tmp;
1208 	struct list_head tmp_list;
1209 
1210 	if (!tcon->use_persistent || !tcon->need_reopen_files)
1211 		return;
1212 
1213 	tcon->need_reopen_files = false;
1214 
1215 	cifs_dbg(FYI, "Reopen persistent handles\n");
1216 	INIT_LIST_HEAD(&tmp_list);
1217 
1218 	/* list all files open on tree connection, reopen resilient handles  */
1219 	spin_lock(&tcon->open_file_lock);
1220 	list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1221 		if (!open_file->invalidHandle)
1222 			continue;
1223 		cifsFileInfo_get(open_file);
1224 		list_add_tail(&open_file->rlist, &tmp_list);
1225 	}
1226 	spin_unlock(&tcon->open_file_lock);
1227 
1228 	list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1229 		if (cifs_reopen_file(open_file, false /* do not flush */))
1230 			tcon->need_reopen_files = true;
1231 		list_del_init(&open_file->rlist);
1232 		cifsFileInfo_put(open_file);
1233 	}
1234 }
1235 
1236 int cifs_closedir(struct inode *inode, struct file *file)
1237 {
1238 	int rc = 0;
1239 	unsigned int xid;
1240 	struct cifsFileInfo *cfile = file->private_data;
1241 	struct cifs_tcon *tcon;
1242 	struct TCP_Server_Info *server;
1243 	char *buf;
1244 
1245 	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1246 
1247 	if (cfile == NULL)
1248 		return rc;
1249 
1250 	xid = get_xid();
1251 	tcon = tlink_tcon(cfile->tlink);
1252 	server = tcon->ses->server;
1253 
1254 	cifs_dbg(FYI, "Freeing private data in close dir\n");
1255 	spin_lock(&cfile->file_info_lock);
1256 	if (server->ops->dir_needs_close(cfile)) {
1257 		cfile->invalidHandle = true;
1258 		spin_unlock(&cfile->file_info_lock);
1259 		if (server->ops->close_dir)
1260 			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1261 		else
1262 			rc = -ENOSYS;
1263 		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1264 		/* not much we can do if it fails anyway, ignore rc */
1265 		rc = 0;
1266 	} else
1267 		spin_unlock(&cfile->file_info_lock);
1268 
1269 	buf = cfile->srch_inf.ntwrk_buf_start;
1270 	if (buf) {
1271 		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1272 		cfile->srch_inf.ntwrk_buf_start = NULL;
1273 		if (cfile->srch_inf.smallBuf)
1274 			cifs_small_buf_release(buf);
1275 		else
1276 			cifs_buf_release(buf);
1277 	}
1278 
1279 	cifs_put_tlink(cfile->tlink);
1280 	kfree(file->private_data);
1281 	file->private_data = NULL;
1282 	/* BB can we lock the filestruct while this is going on? */
1283 	free_xid(xid);
1284 	return rc;
1285 }
1286 
1287 static struct cifsLockInfo *
1288 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1289 {
1290 	struct cifsLockInfo *lock =
1291 		kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1292 	if (!lock)
1293 		return lock;
1294 	lock->offset = offset;
1295 	lock->length = length;
1296 	lock->type = type;
1297 	lock->pid = current->tgid;
1298 	lock->flags = flags;
1299 	INIT_LIST_HEAD(&lock->blist);
1300 	init_waitqueue_head(&lock->block_q);
1301 	return lock;
1302 }
1303 
1304 void
1305 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1306 {
1307 	struct cifsLockInfo *li, *tmp;
1308 	list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1309 		list_del_init(&li->blist);
1310 		wake_up(&li->block_q);
1311 	}
1312 }
1313 
1314 #define CIFS_LOCK_OP	0
1315 #define CIFS_READ_OP	1
1316 #define CIFS_WRITE_OP	2
1317 
1318 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1319 static bool
1320 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1321 			    __u64 length, __u8 type, __u16 flags,
1322 			    struct cifsFileInfo *cfile,
1323 			    struct cifsLockInfo **conf_lock, int rw_check)
1324 {
1325 	struct cifsLockInfo *li;
1326 	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1327 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1328 
1329 	list_for_each_entry(li, &fdlocks->locks, llist) {
1330 		if (offset + length <= li->offset ||
1331 		    offset >= li->offset + li->length)
1332 			continue;
1333 		if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1334 		    server->ops->compare_fids(cfile, cur_cfile)) {
1335 			/* shared lock prevents write op through the same fid */
1336 			if (!(li->type & server->vals->shared_lock_type) ||
1337 			    rw_check != CIFS_WRITE_OP)
1338 				continue;
1339 		}
1340 		if ((type & server->vals->shared_lock_type) &&
1341 		    ((server->ops->compare_fids(cfile, cur_cfile) &&
1342 		     current->tgid == li->pid) || type == li->type))
1343 			continue;
1344 		if (rw_check == CIFS_LOCK_OP &&
1345 		    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1346 		    server->ops->compare_fids(cfile, cur_cfile))
1347 			continue;
1348 		if (conf_lock)
1349 			*conf_lock = li;
1350 		return true;
1351 	}
1352 	return false;
1353 }
1354 
1355 bool
1356 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1357 			__u8 type, __u16 flags,
1358 			struct cifsLockInfo **conf_lock, int rw_check)
1359 {
1360 	bool rc = false;
1361 	struct cifs_fid_locks *cur;
1362 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1363 
1364 	list_for_each_entry(cur, &cinode->llist, llist) {
1365 		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1366 						 flags, cfile, conf_lock,
1367 						 rw_check);
1368 		if (rc)
1369 			break;
1370 	}
1371 
1372 	return rc;
1373 }
1374 
1375 /*
1376  * Check if there is another lock that prevents us to set the lock (mandatory
1377  * style). If such a lock exists, update the flock structure with its
1378  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1379  * or leave it the same if we can't. Returns 0 if we don't need to request to
1380  * the server or 1 otherwise.
1381  */
1382 static int
1383 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1384 	       __u8 type, struct file_lock *flock)
1385 {
1386 	int rc = 0;
1387 	struct cifsLockInfo *conf_lock;
1388 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1389 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1390 	bool exist;
1391 
1392 	down_read(&cinode->lock_sem);
1393 
1394 	exist = cifs_find_lock_conflict(cfile, offset, length, type,
1395 					flock->fl_flags, &conf_lock,
1396 					CIFS_LOCK_OP);
1397 	if (exist) {
1398 		flock->fl_start = conf_lock->offset;
1399 		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1400 		flock->fl_pid = conf_lock->pid;
1401 		if (conf_lock->type & server->vals->shared_lock_type)
1402 			flock->fl_type = F_RDLCK;
1403 		else
1404 			flock->fl_type = F_WRLCK;
1405 	} else if (!cinode->can_cache_brlcks)
1406 		rc = 1;
1407 	else
1408 		flock->fl_type = F_UNLCK;
1409 
1410 	up_read(&cinode->lock_sem);
1411 	return rc;
1412 }
1413 
1414 static void
1415 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1416 {
1417 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1418 	cifs_down_write(&cinode->lock_sem);
1419 	list_add_tail(&lock->llist, &cfile->llist->locks);
1420 	up_write(&cinode->lock_sem);
1421 }
1422 
1423 /*
1424  * Set the byte-range lock (mandatory style). Returns:
1425  * 1) 0, if we set the lock and don't need to request to the server;
1426  * 2) 1, if no locks prevent us but we need to request to the server;
1427  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1428  */
1429 static int
1430 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1431 		 bool wait)
1432 {
1433 	struct cifsLockInfo *conf_lock;
1434 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1435 	bool exist;
1436 	int rc = 0;
1437 
1438 try_again:
1439 	exist = false;
1440 	cifs_down_write(&cinode->lock_sem);
1441 
1442 	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1443 					lock->type, lock->flags, &conf_lock,
1444 					CIFS_LOCK_OP);
1445 	if (!exist && cinode->can_cache_brlcks) {
1446 		list_add_tail(&lock->llist, &cfile->llist->locks);
1447 		up_write(&cinode->lock_sem);
1448 		return rc;
1449 	}
1450 
1451 	if (!exist)
1452 		rc = 1;
1453 	else if (!wait)
1454 		rc = -EACCES;
1455 	else {
1456 		list_add_tail(&lock->blist, &conf_lock->blist);
1457 		up_write(&cinode->lock_sem);
1458 		rc = wait_event_interruptible(lock->block_q,
1459 					(lock->blist.prev == &lock->blist) &&
1460 					(lock->blist.next == &lock->blist));
1461 		if (!rc)
1462 			goto try_again;
1463 		cifs_down_write(&cinode->lock_sem);
1464 		list_del_init(&lock->blist);
1465 	}
1466 
1467 	up_write(&cinode->lock_sem);
1468 	return rc;
1469 }
1470 
1471 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1472 /*
1473  * Check if there is another lock that prevents us to set the lock (posix
1474  * style). If such a lock exists, update the flock structure with its
1475  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1476  * or leave it the same if we can't. Returns 0 if we don't need to request to
1477  * the server or 1 otherwise.
1478  */
1479 static int
1480 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1481 {
1482 	int rc = 0;
1483 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1484 	unsigned char saved_type = flock->fl_type;
1485 
1486 	if ((flock->fl_flags & FL_POSIX) == 0)
1487 		return 1;
1488 
1489 	down_read(&cinode->lock_sem);
1490 	posix_test_lock(file, flock);
1491 
1492 	if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1493 		flock->fl_type = saved_type;
1494 		rc = 1;
1495 	}
1496 
1497 	up_read(&cinode->lock_sem);
1498 	return rc;
1499 }
1500 
1501 /*
1502  * Set the byte-range lock (posix style). Returns:
1503  * 1) <0, if the error occurs while setting the lock;
1504  * 2) 0, if we set the lock and don't need to request to the server;
1505  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1506  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1507  */
1508 static int
1509 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1510 {
1511 	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1512 	int rc = FILE_LOCK_DEFERRED + 1;
1513 
1514 	if ((flock->fl_flags & FL_POSIX) == 0)
1515 		return rc;
1516 
1517 	cifs_down_write(&cinode->lock_sem);
1518 	if (!cinode->can_cache_brlcks) {
1519 		up_write(&cinode->lock_sem);
1520 		return rc;
1521 	}
1522 
1523 	rc = posix_lock_file(file, flock, NULL);
1524 	up_write(&cinode->lock_sem);
1525 	return rc;
1526 }
1527 
1528 int
1529 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1530 {
1531 	unsigned int xid;
1532 	int rc = 0, stored_rc;
1533 	struct cifsLockInfo *li, *tmp;
1534 	struct cifs_tcon *tcon;
1535 	unsigned int num, max_num, max_buf;
1536 	LOCKING_ANDX_RANGE *buf, *cur;
1537 	static const int types[] = {
1538 		LOCKING_ANDX_LARGE_FILES,
1539 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1540 	};
1541 	int i;
1542 
1543 	xid = get_xid();
1544 	tcon = tlink_tcon(cfile->tlink);
1545 
1546 	/*
1547 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1548 	 * and check it before using.
1549 	 */
1550 	max_buf = tcon->ses->server->maxBuf;
1551 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1552 		free_xid(xid);
1553 		return -EINVAL;
1554 	}
1555 
1556 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1557 		     PAGE_SIZE);
1558 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1559 			PAGE_SIZE);
1560 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1561 						sizeof(LOCKING_ANDX_RANGE);
1562 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1563 	if (!buf) {
1564 		free_xid(xid);
1565 		return -ENOMEM;
1566 	}
1567 
1568 	for (i = 0; i < 2; i++) {
1569 		cur = buf;
1570 		num = 0;
1571 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1572 			if (li->type != types[i])
1573 				continue;
1574 			cur->Pid = cpu_to_le16(li->pid);
1575 			cur->LengthLow = cpu_to_le32((u32)li->length);
1576 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1577 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1578 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1579 			if (++num == max_num) {
1580 				stored_rc = cifs_lockv(xid, tcon,
1581 						       cfile->fid.netfid,
1582 						       (__u8)li->type, 0, num,
1583 						       buf);
1584 				if (stored_rc)
1585 					rc = stored_rc;
1586 				cur = buf;
1587 				num = 0;
1588 			} else
1589 				cur++;
1590 		}
1591 
1592 		if (num) {
1593 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1594 					       (__u8)types[i], 0, num, buf);
1595 			if (stored_rc)
1596 				rc = stored_rc;
1597 		}
1598 	}
1599 
1600 	kfree(buf);
1601 	free_xid(xid);
1602 	return rc;
1603 }
1604 
1605 static __u32
1606 hash_lockowner(fl_owner_t owner)
1607 {
1608 	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1609 }
1610 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1611 
1612 struct lock_to_push {
1613 	struct list_head llist;
1614 	__u64 offset;
1615 	__u64 length;
1616 	__u32 pid;
1617 	__u16 netfid;
1618 	__u8 type;
1619 };
1620 
1621 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1622 static int
1623 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1624 {
1625 	struct inode *inode = d_inode(cfile->dentry);
1626 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1627 	struct file_lock *flock;
1628 	struct file_lock_context *flctx = locks_inode_context(inode);
1629 	unsigned int count = 0, i;
1630 	int rc = 0, xid, type;
1631 	struct list_head locks_to_send, *el;
1632 	struct lock_to_push *lck, *tmp;
1633 	__u64 length;
1634 
1635 	xid = get_xid();
1636 
1637 	if (!flctx)
1638 		goto out;
1639 
1640 	spin_lock(&flctx->flc_lock);
1641 	list_for_each(el, &flctx->flc_posix) {
1642 		count++;
1643 	}
1644 	spin_unlock(&flctx->flc_lock);
1645 
1646 	INIT_LIST_HEAD(&locks_to_send);
1647 
1648 	/*
1649 	 * Allocating count locks is enough because no FL_POSIX locks can be
1650 	 * added to the list while we are holding cinode->lock_sem that
1651 	 * protects locking operations of this inode.
1652 	 */
1653 	for (i = 0; i < count; i++) {
1654 		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1655 		if (!lck) {
1656 			rc = -ENOMEM;
1657 			goto err_out;
1658 		}
1659 		list_add_tail(&lck->llist, &locks_to_send);
1660 	}
1661 
1662 	el = locks_to_send.next;
1663 	spin_lock(&flctx->flc_lock);
1664 	list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1665 		if (el == &locks_to_send) {
1666 			/*
1667 			 * The list ended. We don't have enough allocated
1668 			 * structures - something is really wrong.
1669 			 */
1670 			cifs_dbg(VFS, "Can't push all brlocks!\n");
1671 			break;
1672 		}
1673 		length = cifs_flock_len(flock);
1674 		if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1675 			type = CIFS_RDLCK;
1676 		else
1677 			type = CIFS_WRLCK;
1678 		lck = list_entry(el, struct lock_to_push, llist);
1679 		lck->pid = hash_lockowner(flock->fl_owner);
1680 		lck->netfid = cfile->fid.netfid;
1681 		lck->length = length;
1682 		lck->type = type;
1683 		lck->offset = flock->fl_start;
1684 	}
1685 	spin_unlock(&flctx->flc_lock);
1686 
1687 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1688 		int stored_rc;
1689 
1690 		stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1691 					     lck->offset, lck->length, NULL,
1692 					     lck->type, 0);
1693 		if (stored_rc)
1694 			rc = stored_rc;
1695 		list_del(&lck->llist);
1696 		kfree(lck);
1697 	}
1698 
1699 out:
1700 	free_xid(xid);
1701 	return rc;
1702 err_out:
1703 	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1704 		list_del(&lck->llist);
1705 		kfree(lck);
1706 	}
1707 	goto out;
1708 }
1709 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1710 
1711 static int
1712 cifs_push_locks(struct cifsFileInfo *cfile)
1713 {
1714 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1715 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1716 	int rc = 0;
1717 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1718 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1719 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1720 
1721 	/* we are going to update can_cache_brlcks here - need a write access */
1722 	cifs_down_write(&cinode->lock_sem);
1723 	if (!cinode->can_cache_brlcks) {
1724 		up_write(&cinode->lock_sem);
1725 		return rc;
1726 	}
1727 
1728 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1729 	if (cap_unix(tcon->ses) &&
1730 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1731 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1732 		rc = cifs_push_posix_locks(cfile);
1733 	else
1734 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1735 		rc = tcon->ses->server->ops->push_mand_locks(cfile);
1736 
1737 	cinode->can_cache_brlcks = false;
1738 	up_write(&cinode->lock_sem);
1739 	return rc;
1740 }
1741 
1742 static void
1743 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1744 		bool *wait_flag, struct TCP_Server_Info *server)
1745 {
1746 	if (flock->fl_flags & FL_POSIX)
1747 		cifs_dbg(FYI, "Posix\n");
1748 	if (flock->fl_flags & FL_FLOCK)
1749 		cifs_dbg(FYI, "Flock\n");
1750 	if (flock->fl_flags & FL_SLEEP) {
1751 		cifs_dbg(FYI, "Blocking lock\n");
1752 		*wait_flag = true;
1753 	}
1754 	if (flock->fl_flags & FL_ACCESS)
1755 		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1756 	if (flock->fl_flags & FL_LEASE)
1757 		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1758 	if (flock->fl_flags &
1759 	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1760 	       FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1761 		cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1762 
1763 	*type = server->vals->large_lock_type;
1764 	if (flock->fl_type == F_WRLCK) {
1765 		cifs_dbg(FYI, "F_WRLCK\n");
1766 		*type |= server->vals->exclusive_lock_type;
1767 		*lock = 1;
1768 	} else if (flock->fl_type == F_UNLCK) {
1769 		cifs_dbg(FYI, "F_UNLCK\n");
1770 		*type |= server->vals->unlock_lock_type;
1771 		*unlock = 1;
1772 		/* Check if unlock includes more than one lock range */
1773 	} else if (flock->fl_type == F_RDLCK) {
1774 		cifs_dbg(FYI, "F_RDLCK\n");
1775 		*type |= server->vals->shared_lock_type;
1776 		*lock = 1;
1777 	} else if (flock->fl_type == F_EXLCK) {
1778 		cifs_dbg(FYI, "F_EXLCK\n");
1779 		*type |= server->vals->exclusive_lock_type;
1780 		*lock = 1;
1781 	} else if (flock->fl_type == F_SHLCK) {
1782 		cifs_dbg(FYI, "F_SHLCK\n");
1783 		*type |= server->vals->shared_lock_type;
1784 		*lock = 1;
1785 	} else
1786 		cifs_dbg(FYI, "Unknown type of lock\n");
1787 }
1788 
1789 static int
1790 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1791 	   bool wait_flag, bool posix_lck, unsigned int xid)
1792 {
1793 	int rc = 0;
1794 	__u64 length = cifs_flock_len(flock);
1795 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1796 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1797 	struct TCP_Server_Info *server = tcon->ses->server;
1798 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1799 	__u16 netfid = cfile->fid.netfid;
1800 
1801 	if (posix_lck) {
1802 		int posix_lock_type;
1803 
1804 		rc = cifs_posix_lock_test(file, flock);
1805 		if (!rc)
1806 			return rc;
1807 
1808 		if (type & server->vals->shared_lock_type)
1809 			posix_lock_type = CIFS_RDLCK;
1810 		else
1811 			posix_lock_type = CIFS_WRLCK;
1812 		rc = CIFSSMBPosixLock(xid, tcon, netfid,
1813 				      hash_lockowner(flock->fl_owner),
1814 				      flock->fl_start, length, flock,
1815 				      posix_lock_type, wait_flag);
1816 		return rc;
1817 	}
1818 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1819 
1820 	rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1821 	if (!rc)
1822 		return rc;
1823 
1824 	/* BB we could chain these into one lock request BB */
1825 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1826 				    1, 0, false);
1827 	if (rc == 0) {
1828 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1829 					    type, 0, 1, false);
1830 		flock->fl_type = F_UNLCK;
1831 		if (rc != 0)
1832 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1833 				 rc);
1834 		return 0;
1835 	}
1836 
1837 	if (type & server->vals->shared_lock_type) {
1838 		flock->fl_type = F_WRLCK;
1839 		return 0;
1840 	}
1841 
1842 	type &= ~server->vals->exclusive_lock_type;
1843 
1844 	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1845 				    type | server->vals->shared_lock_type,
1846 				    1, 0, false);
1847 	if (rc == 0) {
1848 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1849 			type | server->vals->shared_lock_type, 0, 1, false);
1850 		flock->fl_type = F_RDLCK;
1851 		if (rc != 0)
1852 			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1853 				 rc);
1854 	} else
1855 		flock->fl_type = F_WRLCK;
1856 
1857 	return 0;
1858 }
1859 
1860 void
1861 cifs_move_llist(struct list_head *source, struct list_head *dest)
1862 {
1863 	struct list_head *li, *tmp;
1864 	list_for_each_safe(li, tmp, source)
1865 		list_move(li, dest);
1866 }
1867 
1868 void
1869 cifs_free_llist(struct list_head *llist)
1870 {
1871 	struct cifsLockInfo *li, *tmp;
1872 	list_for_each_entry_safe(li, tmp, llist, llist) {
1873 		cifs_del_lock_waiters(li);
1874 		list_del(&li->llist);
1875 		kfree(li);
1876 	}
1877 }
1878 
1879 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1880 int
1881 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1882 		  unsigned int xid)
1883 {
1884 	int rc = 0, stored_rc;
1885 	static const int types[] = {
1886 		LOCKING_ANDX_LARGE_FILES,
1887 		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1888 	};
1889 	unsigned int i;
1890 	unsigned int max_num, num, max_buf;
1891 	LOCKING_ANDX_RANGE *buf, *cur;
1892 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1893 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1894 	struct cifsLockInfo *li, *tmp;
1895 	__u64 length = cifs_flock_len(flock);
1896 	struct list_head tmp_llist;
1897 
1898 	INIT_LIST_HEAD(&tmp_llist);
1899 
1900 	/*
1901 	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1902 	 * and check it before using.
1903 	 */
1904 	max_buf = tcon->ses->server->maxBuf;
1905 	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1906 		return -EINVAL;
1907 
1908 	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1909 		     PAGE_SIZE);
1910 	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1911 			PAGE_SIZE);
1912 	max_num = (max_buf - sizeof(struct smb_hdr)) /
1913 						sizeof(LOCKING_ANDX_RANGE);
1914 	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1915 	if (!buf)
1916 		return -ENOMEM;
1917 
1918 	cifs_down_write(&cinode->lock_sem);
1919 	for (i = 0; i < 2; i++) {
1920 		cur = buf;
1921 		num = 0;
1922 		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1923 			if (flock->fl_start > li->offset ||
1924 			    (flock->fl_start + length) <
1925 			    (li->offset + li->length))
1926 				continue;
1927 			if (current->tgid != li->pid)
1928 				continue;
1929 			if (types[i] != li->type)
1930 				continue;
1931 			if (cinode->can_cache_brlcks) {
1932 				/*
1933 				 * We can cache brlock requests - simply remove
1934 				 * a lock from the file's list.
1935 				 */
1936 				list_del(&li->llist);
1937 				cifs_del_lock_waiters(li);
1938 				kfree(li);
1939 				continue;
1940 			}
1941 			cur->Pid = cpu_to_le16(li->pid);
1942 			cur->LengthLow = cpu_to_le32((u32)li->length);
1943 			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1944 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1945 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1946 			/*
1947 			 * We need to save a lock here to let us add it again to
1948 			 * the file's list if the unlock range request fails on
1949 			 * the server.
1950 			 */
1951 			list_move(&li->llist, &tmp_llist);
1952 			if (++num == max_num) {
1953 				stored_rc = cifs_lockv(xid, tcon,
1954 						       cfile->fid.netfid,
1955 						       li->type, num, 0, buf);
1956 				if (stored_rc) {
1957 					/*
1958 					 * We failed on the unlock range
1959 					 * request - add all locks from the tmp
1960 					 * list to the head of the file's list.
1961 					 */
1962 					cifs_move_llist(&tmp_llist,
1963 							&cfile->llist->locks);
1964 					rc = stored_rc;
1965 				} else
1966 					/*
1967 					 * The unlock range request succeed -
1968 					 * free the tmp list.
1969 					 */
1970 					cifs_free_llist(&tmp_llist);
1971 				cur = buf;
1972 				num = 0;
1973 			} else
1974 				cur++;
1975 		}
1976 		if (num) {
1977 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1978 					       types[i], num, 0, buf);
1979 			if (stored_rc) {
1980 				cifs_move_llist(&tmp_llist,
1981 						&cfile->llist->locks);
1982 				rc = stored_rc;
1983 			} else
1984 				cifs_free_llist(&tmp_llist);
1985 		}
1986 	}
1987 
1988 	up_write(&cinode->lock_sem);
1989 	kfree(buf);
1990 	return rc;
1991 }
1992 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1993 
1994 static int
1995 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1996 	   bool wait_flag, bool posix_lck, int lock, int unlock,
1997 	   unsigned int xid)
1998 {
1999 	int rc = 0;
2000 	__u64 length = cifs_flock_len(flock);
2001 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2002 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2003 	struct TCP_Server_Info *server = tcon->ses->server;
2004 	struct inode *inode = d_inode(cfile->dentry);
2005 
2006 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
2007 	if (posix_lck) {
2008 		int posix_lock_type;
2009 
2010 		rc = cifs_posix_lock_set(file, flock);
2011 		if (rc <= FILE_LOCK_DEFERRED)
2012 			return rc;
2013 
2014 		if (type & server->vals->shared_lock_type)
2015 			posix_lock_type = CIFS_RDLCK;
2016 		else
2017 			posix_lock_type = CIFS_WRLCK;
2018 
2019 		if (unlock == 1)
2020 			posix_lock_type = CIFS_UNLCK;
2021 
2022 		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
2023 				      hash_lockowner(flock->fl_owner),
2024 				      flock->fl_start, length,
2025 				      NULL, posix_lock_type, wait_flag);
2026 		goto out;
2027 	}
2028 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
2029 	if (lock) {
2030 		struct cifsLockInfo *lock;
2031 
2032 		lock = cifs_lock_init(flock->fl_start, length, type,
2033 				      flock->fl_flags);
2034 		if (!lock)
2035 			return -ENOMEM;
2036 
2037 		rc = cifs_lock_add_if(cfile, lock, wait_flag);
2038 		if (rc < 0) {
2039 			kfree(lock);
2040 			return rc;
2041 		}
2042 		if (!rc)
2043 			goto out;
2044 
2045 		/*
2046 		 * Windows 7 server can delay breaking lease from read to None
2047 		 * if we set a byte-range lock on a file - break it explicitly
2048 		 * before sending the lock to the server to be sure the next
2049 		 * read won't conflict with non-overlapted locks due to
2050 		 * pagereading.
2051 		 */
2052 		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
2053 					CIFS_CACHE_READ(CIFS_I(inode))) {
2054 			cifs_zap_mapping(inode);
2055 			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
2056 				 inode);
2057 			CIFS_I(inode)->oplock = 0;
2058 		}
2059 
2060 		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
2061 					    type, 1, 0, wait_flag);
2062 		if (rc) {
2063 			kfree(lock);
2064 			return rc;
2065 		}
2066 
2067 		cifs_lock_add(cfile, lock);
2068 	} else if (unlock)
2069 		rc = server->ops->mand_unlock_range(cfile, flock, xid);
2070 
2071 out:
2072 	if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
2073 		/*
2074 		 * If this is a request to remove all locks because we
2075 		 * are closing the file, it doesn't matter if the
2076 		 * unlocking failed as both cifs.ko and the SMB server
2077 		 * remove the lock on file close
2078 		 */
2079 		if (rc) {
2080 			cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
2081 			if (!(flock->fl_flags & FL_CLOSE))
2082 				return rc;
2083 		}
2084 		rc = locks_lock_file_wait(file, flock);
2085 	}
2086 	return rc;
2087 }
2088 
2089 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
2090 {
2091 	int rc, xid;
2092 	int lock = 0, unlock = 0;
2093 	bool wait_flag = false;
2094 	bool posix_lck = false;
2095 	struct cifs_sb_info *cifs_sb;
2096 	struct cifs_tcon *tcon;
2097 	struct cifsFileInfo *cfile;
2098 	__u32 type;
2099 
2100 	xid = get_xid();
2101 
2102 	if (!(fl->fl_flags & FL_FLOCK)) {
2103 		rc = -ENOLCK;
2104 		free_xid(xid);
2105 		return rc;
2106 	}
2107 
2108 	cfile = (struct cifsFileInfo *)file->private_data;
2109 	tcon = tlink_tcon(cfile->tlink);
2110 
2111 	cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
2112 			tcon->ses->server);
2113 	cifs_sb = CIFS_FILE_SB(file);
2114 
2115 	if (cap_unix(tcon->ses) &&
2116 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2117 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2118 		posix_lck = true;
2119 
2120 	if (!lock && !unlock) {
2121 		/*
2122 		 * if no lock or unlock then nothing to do since we do not
2123 		 * know what it is
2124 		 */
2125 		rc = -EOPNOTSUPP;
2126 		free_xid(xid);
2127 		return rc;
2128 	}
2129 
2130 	rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2131 			xid);
2132 	free_xid(xid);
2133 	return rc;
2134 
2135 
2136 }
2137 
2138 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2139 {
2140 	int rc, xid;
2141 	int lock = 0, unlock = 0;
2142 	bool wait_flag = false;
2143 	bool posix_lck = false;
2144 	struct cifs_sb_info *cifs_sb;
2145 	struct cifs_tcon *tcon;
2146 	struct cifsFileInfo *cfile;
2147 	__u32 type;
2148 
2149 	rc = -EACCES;
2150 	xid = get_xid();
2151 
2152 	cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2153 		 flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
2154 		 (long long)flock->fl_end);
2155 
2156 	cfile = (struct cifsFileInfo *)file->private_data;
2157 	tcon = tlink_tcon(cfile->tlink);
2158 
2159 	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2160 			tcon->ses->server);
2161 	cifs_sb = CIFS_FILE_SB(file);
2162 	set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2163 
2164 	if (cap_unix(tcon->ses) &&
2165 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2166 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2167 		posix_lck = true;
2168 	/*
2169 	 * BB add code here to normalize offset and length to account for
2170 	 * negative length which we can not accept over the wire.
2171 	 */
2172 	if (IS_GETLK(cmd)) {
2173 		rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2174 		free_xid(xid);
2175 		return rc;
2176 	}
2177 
2178 	if (!lock && !unlock) {
2179 		/*
2180 		 * if no lock or unlock then nothing to do since we do not
2181 		 * know what it is
2182 		 */
2183 		free_xid(xid);
2184 		return -EOPNOTSUPP;
2185 	}
2186 
2187 	rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2188 			xid);
2189 	free_xid(xid);
2190 	return rc;
2191 }
2192 
2193 /*
2194  * update the file size (if needed) after a write. Should be called with
2195  * the inode->i_lock held
2196  */
2197 void
2198 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2199 		      unsigned int bytes_written)
2200 {
2201 	loff_t end_of_write = offset + bytes_written;
2202 
2203 	if (end_of_write > cifsi->server_eof)
2204 		cifsi->server_eof = end_of_write;
2205 }
2206 
2207 static ssize_t
2208 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2209 	   size_t write_size, loff_t *offset)
2210 {
2211 	int rc = 0;
2212 	unsigned int bytes_written = 0;
2213 	unsigned int total_written;
2214 	struct cifs_tcon *tcon;
2215 	struct TCP_Server_Info *server;
2216 	unsigned int xid;
2217 	struct dentry *dentry = open_file->dentry;
2218 	struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2219 	struct cifs_io_parms io_parms = {0};
2220 
2221 	cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2222 		 write_size, *offset, dentry);
2223 
2224 	tcon = tlink_tcon(open_file->tlink);
2225 	server = tcon->ses->server;
2226 
2227 	if (!server->ops->sync_write)
2228 		return -ENOSYS;
2229 
2230 	xid = get_xid();
2231 
2232 	for (total_written = 0; write_size > total_written;
2233 	     total_written += bytes_written) {
2234 		rc = -EAGAIN;
2235 		while (rc == -EAGAIN) {
2236 			struct kvec iov[2];
2237 			unsigned int len;
2238 
2239 			if (open_file->invalidHandle) {
2240 				/* we could deadlock if we called
2241 				   filemap_fdatawait from here so tell
2242 				   reopen_file not to flush data to
2243 				   server now */
2244 				rc = cifs_reopen_file(open_file, false);
2245 				if (rc != 0)
2246 					break;
2247 			}
2248 
2249 			len = min(server->ops->wp_retry_size(d_inode(dentry)),
2250 				  (unsigned int)write_size - total_written);
2251 			/* iov[0] is reserved for smb header */
2252 			iov[1].iov_base = (char *)write_data + total_written;
2253 			iov[1].iov_len = len;
2254 			io_parms.pid = pid;
2255 			io_parms.tcon = tcon;
2256 			io_parms.offset = *offset;
2257 			io_parms.length = len;
2258 			rc = server->ops->sync_write(xid, &open_file->fid,
2259 					&io_parms, &bytes_written, iov, 1);
2260 		}
2261 		if (rc || (bytes_written == 0)) {
2262 			if (total_written)
2263 				break;
2264 			else {
2265 				free_xid(xid);
2266 				return rc;
2267 			}
2268 		} else {
2269 			spin_lock(&d_inode(dentry)->i_lock);
2270 			cifs_update_eof(cifsi, *offset, bytes_written);
2271 			spin_unlock(&d_inode(dentry)->i_lock);
2272 			*offset += bytes_written;
2273 		}
2274 	}
2275 
2276 	cifs_stats_bytes_written(tcon, total_written);
2277 
2278 	if (total_written > 0) {
2279 		spin_lock(&d_inode(dentry)->i_lock);
2280 		if (*offset > d_inode(dentry)->i_size) {
2281 			i_size_write(d_inode(dentry), *offset);
2282 			d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2283 		}
2284 		spin_unlock(&d_inode(dentry)->i_lock);
2285 	}
2286 	mark_inode_dirty_sync(d_inode(dentry));
2287 	free_xid(xid);
2288 	return total_written;
2289 }
2290 
2291 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2292 					bool fsuid_only)
2293 {
2294 	struct cifsFileInfo *open_file = NULL;
2295 	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2296 
2297 	/* only filter by fsuid on multiuser mounts */
2298 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2299 		fsuid_only = false;
2300 
2301 	spin_lock(&cifs_inode->open_file_lock);
2302 	/* we could simply get the first_list_entry since write-only entries
2303 	   are always at the end of the list but since the first entry might
2304 	   have a close pending, we go through the whole list */
2305 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2306 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2307 			continue;
2308 		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2309 			if ((!open_file->invalidHandle)) {
2310 				/* found a good file */
2311 				/* lock it so it will not be closed on us */
2312 				cifsFileInfo_get(open_file);
2313 				spin_unlock(&cifs_inode->open_file_lock);
2314 				return open_file;
2315 			} /* else might as well continue, and look for
2316 			     another, or simply have the caller reopen it
2317 			     again rather than trying to fix this handle */
2318 		} else /* write only file */
2319 			break; /* write only files are last so must be done */
2320 	}
2321 	spin_unlock(&cifs_inode->open_file_lock);
2322 	return NULL;
2323 }
2324 
2325 /* Return -EBADF if no handle is found and general rc otherwise */
2326 int
2327 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2328 		       struct cifsFileInfo **ret_file)
2329 {
2330 	struct cifsFileInfo *open_file, *inv_file = NULL;
2331 	struct cifs_sb_info *cifs_sb;
2332 	bool any_available = false;
2333 	int rc = -EBADF;
2334 	unsigned int refind = 0;
2335 	bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2336 	bool with_delete = flags & FIND_WR_WITH_DELETE;
2337 	*ret_file = NULL;
2338 
2339 	/*
2340 	 * Having a null inode here (because mapping->host was set to zero by
2341 	 * the VFS or MM) should not happen but we had reports of on oops (due
2342 	 * to it being zero) during stress testcases so we need to check for it
2343 	 */
2344 
2345 	if (cifs_inode == NULL) {
2346 		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2347 		dump_stack();
2348 		return rc;
2349 	}
2350 
2351 	cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2352 
2353 	/* only filter by fsuid on multiuser mounts */
2354 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2355 		fsuid_only = false;
2356 
2357 	spin_lock(&cifs_inode->open_file_lock);
2358 refind_writable:
2359 	if (refind > MAX_REOPEN_ATT) {
2360 		spin_unlock(&cifs_inode->open_file_lock);
2361 		return rc;
2362 	}
2363 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2364 		if (!any_available && open_file->pid != current->tgid)
2365 			continue;
2366 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2367 			continue;
2368 		if (with_delete && !(open_file->fid.access & DELETE))
2369 			continue;
2370 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2371 			if (!open_file->invalidHandle) {
2372 				/* found a good writable file */
2373 				cifsFileInfo_get(open_file);
2374 				spin_unlock(&cifs_inode->open_file_lock);
2375 				*ret_file = open_file;
2376 				return 0;
2377 			} else {
2378 				if (!inv_file)
2379 					inv_file = open_file;
2380 			}
2381 		}
2382 	}
2383 	/* couldn't find useable FH with same pid, try any available */
2384 	if (!any_available) {
2385 		any_available = true;
2386 		goto refind_writable;
2387 	}
2388 
2389 	if (inv_file) {
2390 		any_available = false;
2391 		cifsFileInfo_get(inv_file);
2392 	}
2393 
2394 	spin_unlock(&cifs_inode->open_file_lock);
2395 
2396 	if (inv_file) {
2397 		rc = cifs_reopen_file(inv_file, false);
2398 		if (!rc) {
2399 			*ret_file = inv_file;
2400 			return 0;
2401 		}
2402 
2403 		spin_lock(&cifs_inode->open_file_lock);
2404 		list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2405 		spin_unlock(&cifs_inode->open_file_lock);
2406 		cifsFileInfo_put(inv_file);
2407 		++refind;
2408 		inv_file = NULL;
2409 		spin_lock(&cifs_inode->open_file_lock);
2410 		goto refind_writable;
2411 	}
2412 
2413 	return rc;
2414 }
2415 
2416 struct cifsFileInfo *
2417 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2418 {
2419 	struct cifsFileInfo *cfile;
2420 	int rc;
2421 
2422 	rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2423 	if (rc)
2424 		cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2425 
2426 	return cfile;
2427 }
2428 
2429 int
2430 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2431 		       int flags,
2432 		       struct cifsFileInfo **ret_file)
2433 {
2434 	struct cifsFileInfo *cfile;
2435 	void *page = alloc_dentry_path();
2436 
2437 	*ret_file = NULL;
2438 
2439 	spin_lock(&tcon->open_file_lock);
2440 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2441 		struct cifsInodeInfo *cinode;
2442 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2443 		if (IS_ERR(full_path)) {
2444 			spin_unlock(&tcon->open_file_lock);
2445 			free_dentry_path(page);
2446 			return PTR_ERR(full_path);
2447 		}
2448 		if (strcmp(full_path, name))
2449 			continue;
2450 
2451 		cinode = CIFS_I(d_inode(cfile->dentry));
2452 		spin_unlock(&tcon->open_file_lock);
2453 		free_dentry_path(page);
2454 		return cifs_get_writable_file(cinode, flags, ret_file);
2455 	}
2456 
2457 	spin_unlock(&tcon->open_file_lock);
2458 	free_dentry_path(page);
2459 	return -ENOENT;
2460 }
2461 
2462 int
2463 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2464 		       struct cifsFileInfo **ret_file)
2465 {
2466 	struct cifsFileInfo *cfile;
2467 	void *page = alloc_dentry_path();
2468 
2469 	*ret_file = NULL;
2470 
2471 	spin_lock(&tcon->open_file_lock);
2472 	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2473 		struct cifsInodeInfo *cinode;
2474 		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2475 		if (IS_ERR(full_path)) {
2476 			spin_unlock(&tcon->open_file_lock);
2477 			free_dentry_path(page);
2478 			return PTR_ERR(full_path);
2479 		}
2480 		if (strcmp(full_path, name))
2481 			continue;
2482 
2483 		cinode = CIFS_I(d_inode(cfile->dentry));
2484 		spin_unlock(&tcon->open_file_lock);
2485 		free_dentry_path(page);
2486 		*ret_file = find_readable_file(cinode, 0);
2487 		return *ret_file ? 0 : -ENOENT;
2488 	}
2489 
2490 	spin_unlock(&tcon->open_file_lock);
2491 	free_dentry_path(page);
2492 	return -ENOENT;
2493 }
2494 
2495 void
2496 cifs_writedata_release(struct kref *refcount)
2497 {
2498 	struct cifs_writedata *wdata = container_of(refcount,
2499 					struct cifs_writedata, refcount);
2500 #ifdef CONFIG_CIFS_SMB_DIRECT
2501 	if (wdata->mr) {
2502 		smbd_deregister_mr(wdata->mr);
2503 		wdata->mr = NULL;
2504 	}
2505 #endif
2506 
2507 	if (wdata->cfile)
2508 		cifsFileInfo_put(wdata->cfile);
2509 
2510 	kfree(wdata);
2511 }
2512 
2513 /*
2514  * Write failed with a retryable error. Resend the write request. It's also
2515  * possible that the page was redirtied so re-clean the page.
2516  */
2517 static void
2518 cifs_writev_requeue(struct cifs_writedata *wdata)
2519 {
2520 	int rc = 0;
2521 	struct inode *inode = d_inode(wdata->cfile->dentry);
2522 	struct TCP_Server_Info *server;
2523 	unsigned int rest_len = wdata->bytes;
2524 	loff_t fpos = wdata->offset;
2525 
2526 	server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2527 	do {
2528 		struct cifs_writedata *wdata2;
2529 		unsigned int wsize, cur_len;
2530 
2531 		wsize = server->ops->wp_retry_size(inode);
2532 		if (wsize < rest_len) {
2533 			if (wsize < PAGE_SIZE) {
2534 				rc = -EOPNOTSUPP;
2535 				break;
2536 			}
2537 			cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
2538 		} else {
2539 			cur_len = rest_len;
2540 		}
2541 
2542 		wdata2 = cifs_writedata_alloc(cifs_writev_complete);
2543 		if (!wdata2) {
2544 			rc = -ENOMEM;
2545 			break;
2546 		}
2547 
2548 		wdata2->sync_mode = wdata->sync_mode;
2549 		wdata2->offset	= fpos;
2550 		wdata2->bytes	= cur_len;
2551 		wdata2->iter	= wdata->iter;
2552 
2553 		iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
2554 		iov_iter_truncate(&wdata2->iter, wdata2->bytes);
2555 
2556 		if (iov_iter_is_xarray(&wdata2->iter))
2557 			/* Check for pages having been redirtied and clean
2558 			 * them.  We can do this by walking the xarray.  If
2559 			 * it's not an xarray, then it's a DIO and we shouldn't
2560 			 * be mucking around with the page bits.
2561 			 */
2562 			cifs_undirty_folios(inode, fpos, cur_len);
2563 
2564 		rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2565 					    &wdata2->cfile);
2566 		if (!wdata2->cfile) {
2567 			cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2568 				 rc);
2569 			if (!is_retryable_error(rc))
2570 				rc = -EBADF;
2571 		} else {
2572 			wdata2->pid = wdata2->cfile->pid;
2573 			rc = server->ops->async_writev(wdata2,
2574 						       cifs_writedata_release);
2575 		}
2576 
2577 		kref_put(&wdata2->refcount, cifs_writedata_release);
2578 		if (rc) {
2579 			if (is_retryable_error(rc))
2580 				continue;
2581 			fpos += cur_len;
2582 			rest_len -= cur_len;
2583 			break;
2584 		}
2585 
2586 		fpos += cur_len;
2587 		rest_len -= cur_len;
2588 	} while (rest_len > 0);
2589 
2590 	/* Clean up remaining pages from the original wdata */
2591 	if (iov_iter_is_xarray(&wdata->iter))
2592 		cifs_pages_write_failed(inode, fpos, rest_len);
2593 
2594 	if (rc != 0 && !is_retryable_error(rc))
2595 		mapping_set_error(inode->i_mapping, rc);
2596 	kref_put(&wdata->refcount, cifs_writedata_release);
2597 }
2598 
2599 void
2600 cifs_writev_complete(struct work_struct *work)
2601 {
2602 	struct cifs_writedata *wdata = container_of(work,
2603 						struct cifs_writedata, work);
2604 	struct inode *inode = d_inode(wdata->cfile->dentry);
2605 
2606 	if (wdata->result == 0) {
2607 		spin_lock(&inode->i_lock);
2608 		cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2609 		spin_unlock(&inode->i_lock);
2610 		cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2611 					 wdata->bytes);
2612 	} else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2613 		return cifs_writev_requeue(wdata);
2614 
2615 	if (wdata->result == -EAGAIN)
2616 		cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
2617 	else if (wdata->result < 0)
2618 		cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
2619 	else
2620 		cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
2621 
2622 	if (wdata->result != -EAGAIN)
2623 		mapping_set_error(inode->i_mapping, wdata->result);
2624 	kref_put(&wdata->refcount, cifs_writedata_release);
2625 }
2626 
2627 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
2628 {
2629 	struct cifs_writedata *wdata;
2630 
2631 	wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2632 	if (wdata != NULL) {
2633 		kref_init(&wdata->refcount);
2634 		INIT_LIST_HEAD(&wdata->list);
2635 		init_completion(&wdata->done);
2636 		INIT_WORK(&wdata->work, complete);
2637 	}
2638 	return wdata;
2639 }
2640 
2641 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2642 {
2643 	struct address_space *mapping = page->mapping;
2644 	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2645 	char *write_data;
2646 	int rc = -EFAULT;
2647 	int bytes_written = 0;
2648 	struct inode *inode;
2649 	struct cifsFileInfo *open_file;
2650 
2651 	if (!mapping || !mapping->host)
2652 		return -EFAULT;
2653 
2654 	inode = page->mapping->host;
2655 
2656 	offset += (loff_t)from;
2657 	write_data = kmap(page);
2658 	write_data += from;
2659 
2660 	if ((to > PAGE_SIZE) || (from > to)) {
2661 		kunmap(page);
2662 		return -EIO;
2663 	}
2664 
2665 	/* racing with truncate? */
2666 	if (offset > mapping->host->i_size) {
2667 		kunmap(page);
2668 		return 0; /* don't care */
2669 	}
2670 
2671 	/* check to make sure that we are not extending the file */
2672 	if (mapping->host->i_size - offset < (loff_t)to)
2673 		to = (unsigned)(mapping->host->i_size - offset);
2674 
2675 	rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2676 				    &open_file);
2677 	if (!rc) {
2678 		bytes_written = cifs_write(open_file, open_file->pid,
2679 					   write_data, to - from, &offset);
2680 		cifsFileInfo_put(open_file);
2681 		/* Does mm or vfs already set times? */
2682 		simple_inode_init_ts(inode);
2683 		if ((bytes_written > 0) && (offset))
2684 			rc = 0;
2685 		else if (bytes_written < 0)
2686 			rc = bytes_written;
2687 		else
2688 			rc = -EFAULT;
2689 	} else {
2690 		cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2691 		if (!is_retryable_error(rc))
2692 			rc = -EIO;
2693 	}
2694 
2695 	kunmap(page);
2696 	return rc;
2697 }
2698 
2699 /*
2700  * Extend the region to be written back to include subsequent contiguously
2701  * dirty pages if possible, but don't sleep while doing so.
2702  */
2703 static void cifs_extend_writeback(struct address_space *mapping,
2704 				  struct xa_state *xas,
2705 				  long *_count,
2706 				  loff_t start,
2707 				  int max_pages,
2708 				  loff_t max_len,
2709 				  size_t *_len)
2710 {
2711 	struct folio_batch batch;
2712 	struct folio *folio;
2713 	unsigned int nr_pages;
2714 	pgoff_t index = (start + *_len) / PAGE_SIZE;
2715 	size_t len;
2716 	bool stop = true;
2717 	unsigned int i;
2718 
2719 	folio_batch_init(&batch);
2720 
2721 	do {
2722 		/* Firstly, we gather up a batch of contiguous dirty pages
2723 		 * under the RCU read lock - but we can't clear the dirty flags
2724 		 * there if any of those pages are mapped.
2725 		 */
2726 		rcu_read_lock();
2727 
2728 		xas_for_each(xas, folio, ULONG_MAX) {
2729 			stop = true;
2730 			if (xas_retry(xas, folio))
2731 				continue;
2732 			if (xa_is_value(folio))
2733 				break;
2734 			if (folio->index != index) {
2735 				xas_reset(xas);
2736 				break;
2737 			}
2738 
2739 			if (!folio_try_get_rcu(folio)) {
2740 				xas_reset(xas);
2741 				continue;
2742 			}
2743 			nr_pages = folio_nr_pages(folio);
2744 			if (nr_pages > max_pages) {
2745 				xas_reset(xas);
2746 				break;
2747 			}
2748 
2749 			/* Has the page moved or been split? */
2750 			if (unlikely(folio != xas_reload(xas))) {
2751 				folio_put(folio);
2752 				xas_reset(xas);
2753 				break;
2754 			}
2755 
2756 			if (!folio_trylock(folio)) {
2757 				folio_put(folio);
2758 				xas_reset(xas);
2759 				break;
2760 			}
2761 			if (!folio_test_dirty(folio) ||
2762 			    folio_test_writeback(folio)) {
2763 				folio_unlock(folio);
2764 				folio_put(folio);
2765 				xas_reset(xas);
2766 				break;
2767 			}
2768 
2769 			max_pages -= nr_pages;
2770 			len = folio_size(folio);
2771 			stop = false;
2772 
2773 			index += nr_pages;
2774 			*_count -= nr_pages;
2775 			*_len += len;
2776 			if (max_pages <= 0 || *_len >= max_len || *_count <= 0)
2777 				stop = true;
2778 
2779 			if (!folio_batch_add(&batch, folio))
2780 				break;
2781 			if (stop)
2782 				break;
2783 		}
2784 
2785 		xas_pause(xas);
2786 		rcu_read_unlock();
2787 
2788 		/* Now, if we obtained any pages, we can shift them to being
2789 		 * writable and mark them for caching.
2790 		 */
2791 		if (!folio_batch_count(&batch))
2792 			break;
2793 
2794 		for (i = 0; i < folio_batch_count(&batch); i++) {
2795 			folio = batch.folios[i];
2796 			/* The folio should be locked, dirty and not undergoing
2797 			 * writeback from the loop above.
2798 			 */
2799 			if (!folio_clear_dirty_for_io(folio))
2800 				WARN_ON(1);
2801 			folio_start_writeback(folio);
2802 			folio_unlock(folio);
2803 		}
2804 
2805 		folio_batch_release(&batch);
2806 		cond_resched();
2807 	} while (!stop);
2808 }
2809 
2810 /*
2811  * Write back the locked page and any subsequent non-locked dirty pages.
2812  */
2813 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
2814 						 struct writeback_control *wbc,
2815 						 struct xa_state *xas,
2816 						 struct folio *folio,
2817 						 unsigned long long start,
2818 						 unsigned long long end)
2819 {
2820 	struct inode *inode = mapping->host;
2821 	struct TCP_Server_Info *server;
2822 	struct cifs_writedata *wdata;
2823 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2824 	struct cifs_credits credits_on_stack;
2825 	struct cifs_credits *credits = &credits_on_stack;
2826 	struct cifsFileInfo *cfile = NULL;
2827 	unsigned long long i_size = i_size_read(inode), max_len;
2828 	unsigned int xid, wsize;
2829 	size_t len = folio_size(folio);
2830 	long count = wbc->nr_to_write;
2831 	int rc;
2832 
2833 	/* The folio should be locked, dirty and not undergoing writeback. */
2834 	if (!folio_clear_dirty_for_io(folio))
2835 		WARN_ON_ONCE(1);
2836 	folio_start_writeback(folio);
2837 
2838 	count -= folio_nr_pages(folio);
2839 
2840 	xid = get_xid();
2841 	server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2842 
2843 	rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2844 	if (rc) {
2845 		cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
2846 		goto err_xid;
2847 	}
2848 
2849 	rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2850 					   &wsize, credits);
2851 	if (rc != 0)
2852 		goto err_close;
2853 
2854 	wdata = cifs_writedata_alloc(cifs_writev_complete);
2855 	if (!wdata) {
2856 		rc = -ENOMEM;
2857 		goto err_uncredit;
2858 	}
2859 
2860 	wdata->sync_mode = wbc->sync_mode;
2861 	wdata->offset = folio_pos(folio);
2862 	wdata->pid = cfile->pid;
2863 	wdata->credits = credits_on_stack;
2864 	wdata->cfile = cfile;
2865 	wdata->server = server;
2866 	cfile = NULL;
2867 
2868 	/* Find all consecutive lockable dirty pages that have contiguous
2869 	 * written regions, stopping when we find a page that is not
2870 	 * immediately lockable, is not dirty or is missing, or we reach the
2871 	 * end of the range.
2872 	 */
2873 	if (start < i_size) {
2874 		/* Trim the write to the EOF; the extra data is ignored.  Also
2875 		 * put an upper limit on the size of a single storedata op.
2876 		 */
2877 		max_len = wsize;
2878 		max_len = min_t(unsigned long long, max_len, end - start + 1);
2879 		max_len = min_t(unsigned long long, max_len, i_size - start);
2880 
2881 		if (len < max_len) {
2882 			int max_pages = INT_MAX;
2883 
2884 #ifdef CONFIG_CIFS_SMB_DIRECT
2885 			if (server->smbd_conn)
2886 				max_pages = server->smbd_conn->max_frmr_depth;
2887 #endif
2888 			max_pages -= folio_nr_pages(folio);
2889 
2890 			if (max_pages > 0)
2891 				cifs_extend_writeback(mapping, xas, &count, start,
2892 						      max_pages, max_len, &len);
2893 		}
2894 	}
2895 	len = min_t(unsigned long long, len, i_size - start);
2896 
2897 	/* We now have a contiguous set of dirty pages, each with writeback
2898 	 * set; the first page is still locked at this point, but all the rest
2899 	 * have been unlocked.
2900 	 */
2901 	folio_unlock(folio);
2902 	wdata->bytes = len;
2903 
2904 	if (start < i_size) {
2905 		iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
2906 				start, len);
2907 
2908 		rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2909 		if (rc)
2910 			goto err_wdata;
2911 
2912 		if (wdata->cfile->invalidHandle)
2913 			rc = -EAGAIN;
2914 		else
2915 			rc = wdata->server->ops->async_writev(wdata,
2916 							      cifs_writedata_release);
2917 		if (rc >= 0) {
2918 			kref_put(&wdata->refcount, cifs_writedata_release);
2919 			goto err_close;
2920 		}
2921 	} else {
2922 		/* The dirty region was entirely beyond the EOF. */
2923 		cifs_pages_written_back(inode, start, len);
2924 		rc = 0;
2925 	}
2926 
2927 err_wdata:
2928 	kref_put(&wdata->refcount, cifs_writedata_release);
2929 err_uncredit:
2930 	add_credits_and_wake_if(server, credits, 0);
2931 err_close:
2932 	if (cfile)
2933 		cifsFileInfo_put(cfile);
2934 err_xid:
2935 	free_xid(xid);
2936 	if (rc == 0) {
2937 		wbc->nr_to_write = count;
2938 		rc = len;
2939 	} else if (is_retryable_error(rc)) {
2940 		cifs_pages_write_redirty(inode, start, len);
2941 	} else {
2942 		cifs_pages_write_failed(inode, start, len);
2943 		mapping_set_error(mapping, rc);
2944 	}
2945 	/* Indication to update ctime and mtime as close is deferred */
2946 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2947 	return rc;
2948 }
2949 
2950 /*
2951  * write a region of pages back to the server
2952  */
2953 static ssize_t cifs_writepages_begin(struct address_space *mapping,
2954 				     struct writeback_control *wbc,
2955 				     struct xa_state *xas,
2956 				     unsigned long long *_start,
2957 				     unsigned long long end)
2958 {
2959 	struct folio *folio;
2960 	unsigned long long start = *_start;
2961 	ssize_t ret;
2962 	int skips = 0;
2963 
2964 search_again:
2965 	/* Find the first dirty page. */
2966 	rcu_read_lock();
2967 
2968 	for (;;) {
2969 		folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY);
2970 		if (xas_retry(xas, folio) || xa_is_value(folio))
2971 			continue;
2972 		if (!folio)
2973 			break;
2974 
2975 		if (!folio_try_get_rcu(folio)) {
2976 			xas_reset(xas);
2977 			continue;
2978 		}
2979 
2980 		if (unlikely(folio != xas_reload(xas))) {
2981 			folio_put(folio);
2982 			xas_reset(xas);
2983 			continue;
2984 		}
2985 
2986 		xas_pause(xas);
2987 		break;
2988 	}
2989 	rcu_read_unlock();
2990 	if (!folio)
2991 		return 0;
2992 
2993 	start = folio_pos(folio); /* May regress with THPs */
2994 
2995 	/* At this point we hold neither the i_pages lock nor the page lock:
2996 	 * the page may be truncated or invalidated (changing page->mapping to
2997 	 * NULL), or even swizzled back from swapper_space to tmpfs file
2998 	 * mapping
2999 	 */
3000 lock_again:
3001 	if (wbc->sync_mode != WB_SYNC_NONE) {
3002 		ret = folio_lock_killable(folio);
3003 		if (ret < 0)
3004 			return ret;
3005 	} else {
3006 		if (!folio_trylock(folio))
3007 			goto search_again;
3008 	}
3009 
3010 	if (folio->mapping != mapping ||
3011 	    !folio_test_dirty(folio)) {
3012 		start += folio_size(folio);
3013 		folio_unlock(folio);
3014 		goto search_again;
3015 	}
3016 
3017 	if (folio_test_writeback(folio) ||
3018 	    folio_test_fscache(folio)) {
3019 		folio_unlock(folio);
3020 		if (wbc->sync_mode != WB_SYNC_NONE) {
3021 			folio_wait_writeback(folio);
3022 #ifdef CONFIG_CIFS_FSCACHE
3023 			folio_wait_fscache(folio);
3024 #endif
3025 			goto lock_again;
3026 		}
3027 
3028 		start += folio_size(folio);
3029 		if (wbc->sync_mode == WB_SYNC_NONE) {
3030 			if (skips >= 5 || need_resched()) {
3031 				ret = 0;
3032 				goto out;
3033 			}
3034 			skips++;
3035 		}
3036 		goto search_again;
3037 	}
3038 
3039 	ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end);
3040 out:
3041 	if (ret > 0)
3042 		*_start = start + ret;
3043 	return ret;
3044 }
3045 
3046 /*
3047  * Write a region of pages back to the server
3048  */
3049 static int cifs_writepages_region(struct address_space *mapping,
3050 				  struct writeback_control *wbc,
3051 				  unsigned long long *_start,
3052 				  unsigned long long end)
3053 {
3054 	ssize_t ret;
3055 
3056 	XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE);
3057 
3058 	do {
3059 		ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end);
3060 		if (ret > 0 && wbc->nr_to_write > 0)
3061 			cond_resched();
3062 	} while (ret > 0 && wbc->nr_to_write > 0);
3063 
3064 	return ret > 0 ? 0 : ret;
3065 }
3066 
3067 /*
3068  * Write some of the pending data back to the server
3069  */
3070 static int cifs_writepages(struct address_space *mapping,
3071 			   struct writeback_control *wbc)
3072 {
3073 	loff_t start, end;
3074 	int ret;
3075 
3076 	/* We have to be careful as we can end up racing with setattr()
3077 	 * truncating the pagecache since the caller doesn't take a lock here
3078 	 * to prevent it.
3079 	 */
3080 
3081 	if (wbc->range_cyclic && mapping->writeback_index) {
3082 		start = mapping->writeback_index * PAGE_SIZE;
3083 		ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3084 		if (ret < 0)
3085 			goto out;
3086 
3087 		if (wbc->nr_to_write <= 0) {
3088 			mapping->writeback_index = start / PAGE_SIZE;
3089 			goto out;
3090 		}
3091 
3092 		start = 0;
3093 		end = mapping->writeback_index * PAGE_SIZE;
3094 		mapping->writeback_index = 0;
3095 		ret = cifs_writepages_region(mapping, wbc, &start, end);
3096 		if (ret == 0)
3097 			mapping->writeback_index = start / PAGE_SIZE;
3098 	} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
3099 		start = 0;
3100 		ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3101 		if (wbc->nr_to_write > 0 && ret == 0)
3102 			mapping->writeback_index = start / PAGE_SIZE;
3103 	} else {
3104 		start = wbc->range_start;
3105 		ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end);
3106 	}
3107 
3108 out:
3109 	return ret;
3110 }
3111 
3112 static int
3113 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
3114 {
3115 	int rc;
3116 	unsigned int xid;
3117 
3118 	xid = get_xid();
3119 /* BB add check for wbc flags */
3120 	get_page(page);
3121 	if (!PageUptodate(page))
3122 		cifs_dbg(FYI, "ppw - page not up to date\n");
3123 
3124 	/*
3125 	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
3126 	 *
3127 	 * A writepage() implementation always needs to do either this,
3128 	 * or re-dirty the page with "redirty_page_for_writepage()" in
3129 	 * the case of a failure.
3130 	 *
3131 	 * Just unlocking the page will cause the radix tree tag-bits
3132 	 * to fail to update with the state of the page correctly.
3133 	 */
3134 	set_page_writeback(page);
3135 retry_write:
3136 	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
3137 	if (is_retryable_error(rc)) {
3138 		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
3139 			goto retry_write;
3140 		redirty_page_for_writepage(wbc, page);
3141 	} else if (rc != 0) {
3142 		SetPageError(page);
3143 		mapping_set_error(page->mapping, rc);
3144 	} else {
3145 		SetPageUptodate(page);
3146 	}
3147 	end_page_writeback(page);
3148 	put_page(page);
3149 	free_xid(xid);
3150 	return rc;
3151 }
3152 
3153 static int cifs_write_end(struct file *file, struct address_space *mapping,
3154 			loff_t pos, unsigned len, unsigned copied,
3155 			struct page *page, void *fsdata)
3156 {
3157 	int rc;
3158 	struct inode *inode = mapping->host;
3159 	struct cifsFileInfo *cfile = file->private_data;
3160 	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
3161 	struct folio *folio = page_folio(page);
3162 	__u32 pid;
3163 
3164 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3165 		pid = cfile->pid;
3166 	else
3167 		pid = current->tgid;
3168 
3169 	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
3170 		 page, pos, copied);
3171 
3172 	if (folio_test_checked(folio)) {
3173 		if (copied == len)
3174 			folio_mark_uptodate(folio);
3175 		folio_clear_checked(folio);
3176 	} else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE)
3177 		folio_mark_uptodate(folio);
3178 
3179 	if (!folio_test_uptodate(folio)) {
3180 		char *page_data;
3181 		unsigned offset = pos & (PAGE_SIZE - 1);
3182 		unsigned int xid;
3183 
3184 		xid = get_xid();
3185 		/* this is probably better than directly calling
3186 		   partialpage_write since in this function the file handle is
3187 		   known which we might as well	leverage */
3188 		/* BB check if anything else missing out of ppw
3189 		   such as updating last write time */
3190 		page_data = kmap(page);
3191 		rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
3192 		/* if (rc < 0) should we set writebehind rc? */
3193 		kunmap(page);
3194 
3195 		free_xid(xid);
3196 	} else {
3197 		rc = copied;
3198 		pos += copied;
3199 		set_page_dirty(page);
3200 	}
3201 
3202 	if (rc > 0) {
3203 		spin_lock(&inode->i_lock);
3204 		if (pos > inode->i_size) {
3205 			i_size_write(inode, pos);
3206 			inode->i_blocks = (512 - 1 + pos) >> 9;
3207 		}
3208 		spin_unlock(&inode->i_lock);
3209 	}
3210 
3211 	unlock_page(page);
3212 	put_page(page);
3213 	/* Indication to update ctime and mtime as close is deferred */
3214 	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3215 
3216 	return rc;
3217 }
3218 
3219 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3220 		      int datasync)
3221 {
3222 	unsigned int xid;
3223 	int rc = 0;
3224 	struct cifs_tcon *tcon;
3225 	struct TCP_Server_Info *server;
3226 	struct cifsFileInfo *smbfile = file->private_data;
3227 	struct inode *inode = file_inode(file);
3228 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3229 
3230 	rc = file_write_and_wait_range(file, start, end);
3231 	if (rc) {
3232 		trace_cifs_fsync_err(inode->i_ino, rc);
3233 		return rc;
3234 	}
3235 
3236 	xid = get_xid();
3237 
3238 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3239 		 file, datasync);
3240 
3241 	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3242 		rc = cifs_zap_mapping(inode);
3243 		if (rc) {
3244 			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3245 			rc = 0; /* don't care about it in fsync */
3246 		}
3247 	}
3248 
3249 	tcon = tlink_tcon(smbfile->tlink);
3250 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3251 		server = tcon->ses->server;
3252 		if (server->ops->flush == NULL) {
3253 			rc = -ENOSYS;
3254 			goto strict_fsync_exit;
3255 		}
3256 
3257 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3258 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3259 			if (smbfile) {
3260 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3261 				cifsFileInfo_put(smbfile);
3262 			} else
3263 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3264 		} else
3265 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3266 	}
3267 
3268 strict_fsync_exit:
3269 	free_xid(xid);
3270 	return rc;
3271 }
3272 
3273 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3274 {
3275 	unsigned int xid;
3276 	int rc = 0;
3277 	struct cifs_tcon *tcon;
3278 	struct TCP_Server_Info *server;
3279 	struct cifsFileInfo *smbfile = file->private_data;
3280 	struct inode *inode = file_inode(file);
3281 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3282 
3283 	rc = file_write_and_wait_range(file, start, end);
3284 	if (rc) {
3285 		trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3286 		return rc;
3287 	}
3288 
3289 	xid = get_xid();
3290 
3291 	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3292 		 file, datasync);
3293 
3294 	tcon = tlink_tcon(smbfile->tlink);
3295 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3296 		server = tcon->ses->server;
3297 		if (server->ops->flush == NULL) {
3298 			rc = -ENOSYS;
3299 			goto fsync_exit;
3300 		}
3301 
3302 		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3303 			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3304 			if (smbfile) {
3305 				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3306 				cifsFileInfo_put(smbfile);
3307 			} else
3308 				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3309 		} else
3310 			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3311 	}
3312 
3313 fsync_exit:
3314 	free_xid(xid);
3315 	return rc;
3316 }
3317 
3318 /*
3319  * As file closes, flush all cached write data for this inode checking
3320  * for write behind errors.
3321  */
3322 int cifs_flush(struct file *file, fl_owner_t id)
3323 {
3324 	struct inode *inode = file_inode(file);
3325 	int rc = 0;
3326 
3327 	if (file->f_mode & FMODE_WRITE)
3328 		rc = filemap_write_and_wait(inode->i_mapping);
3329 
3330 	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3331 	if (rc) {
3332 		/* get more nuanced writeback errors */
3333 		rc = filemap_check_wb_err(file->f_mapping, 0);
3334 		trace_cifs_flush_err(inode->i_ino, rc);
3335 	}
3336 	return rc;
3337 }
3338 
3339 static void
3340 cifs_uncached_writedata_release(struct kref *refcount)
3341 {
3342 	struct cifs_writedata *wdata = container_of(refcount,
3343 					struct cifs_writedata, refcount);
3344 
3345 	kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3346 	cifs_writedata_release(refcount);
3347 }
3348 
3349 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3350 
3351 static void
3352 cifs_uncached_writev_complete(struct work_struct *work)
3353 {
3354 	struct cifs_writedata *wdata = container_of(work,
3355 					struct cifs_writedata, work);
3356 	struct inode *inode = d_inode(wdata->cfile->dentry);
3357 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
3358 
3359 	spin_lock(&inode->i_lock);
3360 	cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3361 	if (cifsi->server_eof > inode->i_size)
3362 		i_size_write(inode, cifsi->server_eof);
3363 	spin_unlock(&inode->i_lock);
3364 
3365 	complete(&wdata->done);
3366 	collect_uncached_write_data(wdata->ctx);
3367 	/* the below call can possibly free the last ref to aio ctx */
3368 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3369 }
3370 
3371 static int
3372 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3373 	struct cifs_aio_ctx *ctx)
3374 {
3375 	unsigned int wsize;
3376 	struct cifs_credits credits;
3377 	int rc;
3378 	struct TCP_Server_Info *server = wdata->server;
3379 
3380 	do {
3381 		if (wdata->cfile->invalidHandle) {
3382 			rc = cifs_reopen_file(wdata->cfile, false);
3383 			if (rc == -EAGAIN)
3384 				continue;
3385 			else if (rc)
3386 				break;
3387 		}
3388 
3389 
3390 		/*
3391 		 * Wait for credits to resend this wdata.
3392 		 * Note: we are attempting to resend the whole wdata not in
3393 		 * segments
3394 		 */
3395 		do {
3396 			rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3397 						&wsize, &credits);
3398 			if (rc)
3399 				goto fail;
3400 
3401 			if (wsize < wdata->bytes) {
3402 				add_credits_and_wake_if(server, &credits, 0);
3403 				msleep(1000);
3404 			}
3405 		} while (wsize < wdata->bytes);
3406 		wdata->credits = credits;
3407 
3408 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3409 
3410 		if (!rc) {
3411 			if (wdata->cfile->invalidHandle)
3412 				rc = -EAGAIN;
3413 			else {
3414 #ifdef CONFIG_CIFS_SMB_DIRECT
3415 				if (wdata->mr) {
3416 					wdata->mr->need_invalidate = true;
3417 					smbd_deregister_mr(wdata->mr);
3418 					wdata->mr = NULL;
3419 				}
3420 #endif
3421 				rc = server->ops->async_writev(wdata,
3422 					cifs_uncached_writedata_release);
3423 			}
3424 		}
3425 
3426 		/* If the write was successfully sent, we are done */
3427 		if (!rc) {
3428 			list_add_tail(&wdata->list, wdata_list);
3429 			return 0;
3430 		}
3431 
3432 		/* Roll back credits and retry if needed */
3433 		add_credits_and_wake_if(server, &wdata->credits, 0);
3434 	} while (rc == -EAGAIN);
3435 
3436 fail:
3437 	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3438 	return rc;
3439 }
3440 
3441 /*
3442  * Select span of a bvec iterator we're going to use.  Limit it by both maximum
3443  * size and maximum number of segments.
3444  */
3445 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size,
3446 				     size_t max_segs, unsigned int *_nsegs)
3447 {
3448 	const struct bio_vec *bvecs = iter->bvec;
3449 	unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
3450 	size_t len, span = 0, n = iter->count;
3451 	size_t skip = iter->iov_offset;
3452 
3453 	if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0)
3454 		return 0;
3455 
3456 	while (n && ix < nbv && skip) {
3457 		len = bvecs[ix].bv_len;
3458 		if (skip < len)
3459 			break;
3460 		skip -= len;
3461 		n -= len;
3462 		ix++;
3463 	}
3464 
3465 	while (n && ix < nbv) {
3466 		len = min3(n, bvecs[ix].bv_len - skip, max_size);
3467 		span += len;
3468 		max_size -= len;
3469 		nsegs++;
3470 		ix++;
3471 		if (max_size == 0 || nsegs >= max_segs)
3472 			break;
3473 		skip = 0;
3474 		n -= len;
3475 	}
3476 
3477 	*_nsegs = nsegs;
3478 	return span;
3479 }
3480 
3481 static int
3482 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from,
3483 		     struct cifsFileInfo *open_file,
3484 		     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3485 		     struct cifs_aio_ctx *ctx)
3486 {
3487 	int rc = 0;
3488 	size_t cur_len, max_len;
3489 	struct cifs_writedata *wdata;
3490 	pid_t pid;
3491 	struct TCP_Server_Info *server;
3492 	unsigned int xid, max_segs = INT_MAX;
3493 
3494 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3495 		pid = open_file->pid;
3496 	else
3497 		pid = current->tgid;
3498 
3499 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3500 	xid = get_xid();
3501 
3502 #ifdef CONFIG_CIFS_SMB_DIRECT
3503 	if (server->smbd_conn)
3504 		max_segs = server->smbd_conn->max_frmr_depth;
3505 #endif
3506 
3507 	do {
3508 		struct cifs_credits credits_on_stack;
3509 		struct cifs_credits *credits = &credits_on_stack;
3510 		unsigned int wsize, nsegs = 0;
3511 
3512 		if (signal_pending(current)) {
3513 			rc = -EINTR;
3514 			break;
3515 		}
3516 
3517 		if (open_file->invalidHandle) {
3518 			rc = cifs_reopen_file(open_file, false);
3519 			if (rc == -EAGAIN)
3520 				continue;
3521 			else if (rc)
3522 				break;
3523 		}
3524 
3525 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3526 						   &wsize, credits);
3527 		if (rc)
3528 			break;
3529 
3530 		max_len = min_t(const size_t, len, wsize);
3531 		if (!max_len) {
3532 			rc = -EAGAIN;
3533 			add_credits_and_wake_if(server, credits, 0);
3534 			break;
3535 		}
3536 
3537 		cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs);
3538 		cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3539 			 cur_len, max_len, nsegs, from->nr_segs, max_segs);
3540 		if (cur_len == 0) {
3541 			rc = -EIO;
3542 			add_credits_and_wake_if(server, credits, 0);
3543 			break;
3544 		}
3545 
3546 		wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
3547 		if (!wdata) {
3548 			rc = -ENOMEM;
3549 			add_credits_and_wake_if(server, credits, 0);
3550 			break;
3551 		}
3552 
3553 		wdata->sync_mode = WB_SYNC_ALL;
3554 		wdata->offset	= (__u64)fpos;
3555 		wdata->cfile	= cifsFileInfo_get(open_file);
3556 		wdata->server	= server;
3557 		wdata->pid	= pid;
3558 		wdata->bytes	= cur_len;
3559 		wdata->credits	= credits_on_stack;
3560 		wdata->iter	= *from;
3561 		wdata->ctx	= ctx;
3562 		kref_get(&ctx->refcount);
3563 
3564 		iov_iter_truncate(&wdata->iter, cur_len);
3565 
3566 		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3567 
3568 		if (!rc) {
3569 			if (wdata->cfile->invalidHandle)
3570 				rc = -EAGAIN;
3571 			else
3572 				rc = server->ops->async_writev(wdata,
3573 					cifs_uncached_writedata_release);
3574 		}
3575 
3576 		if (rc) {
3577 			add_credits_and_wake_if(server, &wdata->credits, 0);
3578 			kref_put(&wdata->refcount,
3579 				 cifs_uncached_writedata_release);
3580 			if (rc == -EAGAIN)
3581 				continue;
3582 			break;
3583 		}
3584 
3585 		list_add_tail(&wdata->list, wdata_list);
3586 		iov_iter_advance(from, cur_len);
3587 		fpos += cur_len;
3588 		len -= cur_len;
3589 	} while (len > 0);
3590 
3591 	free_xid(xid);
3592 	return rc;
3593 }
3594 
3595 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3596 {
3597 	struct cifs_writedata *wdata, *tmp;
3598 	struct cifs_tcon *tcon;
3599 	struct cifs_sb_info *cifs_sb;
3600 	struct dentry *dentry = ctx->cfile->dentry;
3601 	ssize_t rc;
3602 
3603 	tcon = tlink_tcon(ctx->cfile->tlink);
3604 	cifs_sb = CIFS_SB(dentry->d_sb);
3605 
3606 	mutex_lock(&ctx->aio_mutex);
3607 
3608 	if (list_empty(&ctx->list)) {
3609 		mutex_unlock(&ctx->aio_mutex);
3610 		return;
3611 	}
3612 
3613 	rc = ctx->rc;
3614 	/*
3615 	 * Wait for and collect replies for any successful sends in order of
3616 	 * increasing offset. Once an error is hit, then return without waiting
3617 	 * for any more replies.
3618 	 */
3619 restart_loop:
3620 	list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3621 		if (!rc) {
3622 			if (!try_wait_for_completion(&wdata->done)) {
3623 				mutex_unlock(&ctx->aio_mutex);
3624 				return;
3625 			}
3626 
3627 			if (wdata->result)
3628 				rc = wdata->result;
3629 			else
3630 				ctx->total_len += wdata->bytes;
3631 
3632 			/* resend call if it's a retryable error */
3633 			if (rc == -EAGAIN) {
3634 				struct list_head tmp_list;
3635 				struct iov_iter tmp_from = ctx->iter;
3636 
3637 				INIT_LIST_HEAD(&tmp_list);
3638 				list_del_init(&wdata->list);
3639 
3640 				if (ctx->direct_io)
3641 					rc = cifs_resend_wdata(
3642 						wdata, &tmp_list, ctx);
3643 				else {
3644 					iov_iter_advance(&tmp_from,
3645 						 wdata->offset - ctx->pos);
3646 
3647 					rc = cifs_write_from_iter(wdata->offset,
3648 						wdata->bytes, &tmp_from,
3649 						ctx->cfile, cifs_sb, &tmp_list,
3650 						ctx);
3651 
3652 					kref_put(&wdata->refcount,
3653 						cifs_uncached_writedata_release);
3654 				}
3655 
3656 				list_splice(&tmp_list, &ctx->list);
3657 				goto restart_loop;
3658 			}
3659 		}
3660 		list_del_init(&wdata->list);
3661 		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3662 	}
3663 
3664 	cifs_stats_bytes_written(tcon, ctx->total_len);
3665 	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3666 
3667 	ctx->rc = (rc == 0) ? ctx->total_len : rc;
3668 
3669 	mutex_unlock(&ctx->aio_mutex);
3670 
3671 	if (ctx->iocb && ctx->iocb->ki_complete)
3672 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3673 	else
3674 		complete(&ctx->done);
3675 }
3676 
3677 static ssize_t __cifs_writev(
3678 	struct kiocb *iocb, struct iov_iter *from, bool direct)
3679 {
3680 	struct file *file = iocb->ki_filp;
3681 	ssize_t total_written = 0;
3682 	struct cifsFileInfo *cfile;
3683 	struct cifs_tcon *tcon;
3684 	struct cifs_sb_info *cifs_sb;
3685 	struct cifs_aio_ctx *ctx;
3686 	int rc;
3687 
3688 	rc = generic_write_checks(iocb, from);
3689 	if (rc <= 0)
3690 		return rc;
3691 
3692 	cifs_sb = CIFS_FILE_SB(file);
3693 	cfile = file->private_data;
3694 	tcon = tlink_tcon(cfile->tlink);
3695 
3696 	if (!tcon->ses->server->ops->async_writev)
3697 		return -ENOSYS;
3698 
3699 	ctx = cifs_aio_ctx_alloc();
3700 	if (!ctx)
3701 		return -ENOMEM;
3702 
3703 	ctx->cfile = cifsFileInfo_get(cfile);
3704 
3705 	if (!is_sync_kiocb(iocb))
3706 		ctx->iocb = iocb;
3707 
3708 	ctx->pos = iocb->ki_pos;
3709 	ctx->direct_io = direct;
3710 	ctx->nr_pinned_pages = 0;
3711 
3712 	if (user_backed_iter(from)) {
3713 		/*
3714 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
3715 		 * they contain references to the calling process's virtual
3716 		 * memory layout which won't be available in an async worker
3717 		 * thread.  This also takes a pin on every folio involved.
3718 		 */
3719 		rc = netfs_extract_user_iter(from, iov_iter_count(from),
3720 					     &ctx->iter, 0);
3721 		if (rc < 0) {
3722 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3723 			return rc;
3724 		}
3725 
3726 		ctx->nr_pinned_pages = rc;
3727 		ctx->bv = (void *)ctx->iter.bvec;
3728 		ctx->bv_need_unpin = iov_iter_extract_will_pin(from);
3729 	} else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
3730 		   !is_sync_kiocb(iocb)) {
3731 		/*
3732 		 * If the op is asynchronous, we need to copy the list attached
3733 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
3734 		 * will be pinned by the caller; in any case, we may or may not
3735 		 * be able to pin the pages, so we don't try.
3736 		 */
3737 		ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL);
3738 		if (!ctx->bv) {
3739 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3740 			return -ENOMEM;
3741 		}
3742 	} else {
3743 		/*
3744 		 * Otherwise, we just pass the iterator down as-is and rely on
3745 		 * the caller to make sure the pages referred to by the
3746 		 * iterator don't evaporate.
3747 		 */
3748 		ctx->iter = *from;
3749 	}
3750 
3751 	ctx->len = iov_iter_count(&ctx->iter);
3752 
3753 	/* grab a lock here due to read response handlers can access ctx */
3754 	mutex_lock(&ctx->aio_mutex);
3755 
3756 	rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter,
3757 				  cfile, cifs_sb, &ctx->list, ctx);
3758 
3759 	/*
3760 	 * If at least one write was successfully sent, then discard any rc
3761 	 * value from the later writes. If the other write succeeds, then
3762 	 * we'll end up returning whatever was written. If it fails, then
3763 	 * we'll get a new rc value from that.
3764 	 */
3765 	if (!list_empty(&ctx->list))
3766 		rc = 0;
3767 
3768 	mutex_unlock(&ctx->aio_mutex);
3769 
3770 	if (rc) {
3771 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3772 		return rc;
3773 	}
3774 
3775 	if (!is_sync_kiocb(iocb)) {
3776 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3777 		return -EIOCBQUEUED;
3778 	}
3779 
3780 	rc = wait_for_completion_killable(&ctx->done);
3781 	if (rc) {
3782 		mutex_lock(&ctx->aio_mutex);
3783 		ctx->rc = rc = -EINTR;
3784 		total_written = ctx->total_len;
3785 		mutex_unlock(&ctx->aio_mutex);
3786 	} else {
3787 		rc = ctx->rc;
3788 		total_written = ctx->total_len;
3789 	}
3790 
3791 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
3792 
3793 	if (unlikely(!total_written))
3794 		return rc;
3795 
3796 	iocb->ki_pos += total_written;
3797 	return total_written;
3798 }
3799 
3800 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3801 {
3802 	struct file *file = iocb->ki_filp;
3803 
3804 	cifs_revalidate_mapping(file->f_inode);
3805 	return __cifs_writev(iocb, from, true);
3806 }
3807 
3808 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3809 {
3810 	return __cifs_writev(iocb, from, false);
3811 }
3812 
3813 static ssize_t
3814 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3815 {
3816 	struct file *file = iocb->ki_filp;
3817 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3818 	struct inode *inode = file->f_mapping->host;
3819 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3820 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3821 	ssize_t rc;
3822 
3823 	inode_lock(inode);
3824 	/*
3825 	 * We need to hold the sem to be sure nobody modifies lock list
3826 	 * with a brlock that prevents writing.
3827 	 */
3828 	down_read(&cinode->lock_sem);
3829 
3830 	rc = generic_write_checks(iocb, from);
3831 	if (rc <= 0)
3832 		goto out;
3833 
3834 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3835 				     server->vals->exclusive_lock_type, 0,
3836 				     NULL, CIFS_WRITE_OP))
3837 		rc = __generic_file_write_iter(iocb, from);
3838 	else
3839 		rc = -EACCES;
3840 out:
3841 	up_read(&cinode->lock_sem);
3842 	inode_unlock(inode);
3843 
3844 	if (rc > 0)
3845 		rc = generic_write_sync(iocb, rc);
3846 	return rc;
3847 }
3848 
3849 ssize_t
3850 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3851 {
3852 	struct inode *inode = file_inode(iocb->ki_filp);
3853 	struct cifsInodeInfo *cinode = CIFS_I(inode);
3854 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3855 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3856 						iocb->ki_filp->private_data;
3857 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3858 	ssize_t written;
3859 
3860 	written = cifs_get_writer(cinode);
3861 	if (written)
3862 		return written;
3863 
3864 	if (CIFS_CACHE_WRITE(cinode)) {
3865 		if (cap_unix(tcon->ses) &&
3866 		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3867 		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3868 			written = generic_file_write_iter(iocb, from);
3869 			goto out;
3870 		}
3871 		written = cifs_writev(iocb, from);
3872 		goto out;
3873 	}
3874 	/*
3875 	 * For non-oplocked files in strict cache mode we need to write the data
3876 	 * to the server exactly from the pos to pos+len-1 rather than flush all
3877 	 * affected pages because it may cause a error with mandatory locks on
3878 	 * these pages but not on the region from pos to ppos+len-1.
3879 	 */
3880 	written = cifs_user_writev(iocb, from);
3881 	if (CIFS_CACHE_READ(cinode)) {
3882 		/*
3883 		 * We have read level caching and we have just sent a write
3884 		 * request to the server thus making data in the cache stale.
3885 		 * Zap the cache and set oplock/lease level to NONE to avoid
3886 		 * reading stale data from the cache. All subsequent read
3887 		 * operations will read new data from the server.
3888 		 */
3889 		cifs_zap_mapping(inode);
3890 		cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3891 			 inode);
3892 		cinode->oplock = 0;
3893 	}
3894 out:
3895 	cifs_put_writer(cinode);
3896 	return written;
3897 }
3898 
3899 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
3900 {
3901 	struct cifs_readdata *rdata;
3902 
3903 	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3904 	if (rdata) {
3905 		kref_init(&rdata->refcount);
3906 		INIT_LIST_HEAD(&rdata->list);
3907 		init_completion(&rdata->done);
3908 		INIT_WORK(&rdata->work, complete);
3909 	}
3910 
3911 	return rdata;
3912 }
3913 
3914 void
3915 cifs_readdata_release(struct kref *refcount)
3916 {
3917 	struct cifs_readdata *rdata = container_of(refcount,
3918 					struct cifs_readdata, refcount);
3919 
3920 	if (rdata->ctx)
3921 		kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3922 #ifdef CONFIG_CIFS_SMB_DIRECT
3923 	if (rdata->mr) {
3924 		smbd_deregister_mr(rdata->mr);
3925 		rdata->mr = NULL;
3926 	}
3927 #endif
3928 	if (rdata->cfile)
3929 		cifsFileInfo_put(rdata->cfile);
3930 
3931 	kfree(rdata);
3932 }
3933 
3934 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3935 
3936 static void
3937 cifs_uncached_readv_complete(struct work_struct *work)
3938 {
3939 	struct cifs_readdata *rdata = container_of(work,
3940 						struct cifs_readdata, work);
3941 
3942 	complete(&rdata->done);
3943 	collect_uncached_read_data(rdata->ctx);
3944 	/* the below call can possibly free the last ref to aio ctx */
3945 	kref_put(&rdata->refcount, cifs_readdata_release);
3946 }
3947 
3948 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3949 			struct list_head *rdata_list,
3950 			struct cifs_aio_ctx *ctx)
3951 {
3952 	unsigned int rsize;
3953 	struct cifs_credits credits;
3954 	int rc;
3955 	struct TCP_Server_Info *server;
3956 
3957 	/* XXX: should we pick a new channel here? */
3958 	server = rdata->server;
3959 
3960 	do {
3961 		if (rdata->cfile->invalidHandle) {
3962 			rc = cifs_reopen_file(rdata->cfile, true);
3963 			if (rc == -EAGAIN)
3964 				continue;
3965 			else if (rc)
3966 				break;
3967 		}
3968 
3969 		/*
3970 		 * Wait for credits to resend this rdata.
3971 		 * Note: we are attempting to resend the whole rdata not in
3972 		 * segments
3973 		 */
3974 		do {
3975 			rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3976 						&rsize, &credits);
3977 
3978 			if (rc)
3979 				goto fail;
3980 
3981 			if (rsize < rdata->bytes) {
3982 				add_credits_and_wake_if(server, &credits, 0);
3983 				msleep(1000);
3984 			}
3985 		} while (rsize < rdata->bytes);
3986 		rdata->credits = credits;
3987 
3988 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3989 		if (!rc) {
3990 			if (rdata->cfile->invalidHandle)
3991 				rc = -EAGAIN;
3992 			else {
3993 #ifdef CONFIG_CIFS_SMB_DIRECT
3994 				if (rdata->mr) {
3995 					rdata->mr->need_invalidate = true;
3996 					smbd_deregister_mr(rdata->mr);
3997 					rdata->mr = NULL;
3998 				}
3999 #endif
4000 				rc = server->ops->async_readv(rdata);
4001 			}
4002 		}
4003 
4004 		/* If the read was successfully sent, we are done */
4005 		if (!rc) {
4006 			/* Add to aio pending list */
4007 			list_add_tail(&rdata->list, rdata_list);
4008 			return 0;
4009 		}
4010 
4011 		/* Roll back credits and retry if needed */
4012 		add_credits_and_wake_if(server, &rdata->credits, 0);
4013 	} while (rc == -EAGAIN);
4014 
4015 fail:
4016 	kref_put(&rdata->refcount, cifs_readdata_release);
4017 	return rc;
4018 }
4019 
4020 static int
4021 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file,
4022 		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
4023 		     struct cifs_aio_ctx *ctx)
4024 {
4025 	struct cifs_readdata *rdata;
4026 	unsigned int rsize, nsegs, max_segs = INT_MAX;
4027 	struct cifs_credits credits_on_stack;
4028 	struct cifs_credits *credits = &credits_on_stack;
4029 	size_t cur_len, max_len;
4030 	int rc;
4031 	pid_t pid;
4032 	struct TCP_Server_Info *server;
4033 
4034 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4035 
4036 #ifdef CONFIG_CIFS_SMB_DIRECT
4037 	if (server->smbd_conn)
4038 		max_segs = server->smbd_conn->max_frmr_depth;
4039 #endif
4040 
4041 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4042 		pid = open_file->pid;
4043 	else
4044 		pid = current->tgid;
4045 
4046 	do {
4047 		if (open_file->invalidHandle) {
4048 			rc = cifs_reopen_file(open_file, true);
4049 			if (rc == -EAGAIN)
4050 				continue;
4051 			else if (rc)
4052 				break;
4053 		}
4054 
4055 		if (cifs_sb->ctx->rsize == 0)
4056 			cifs_sb->ctx->rsize =
4057 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4058 							     cifs_sb->ctx);
4059 
4060 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4061 						   &rsize, credits);
4062 		if (rc)
4063 			break;
4064 
4065 		max_len = min_t(size_t, len, rsize);
4066 
4067 		cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len,
4068 						 max_segs, &nsegs);
4069 		cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n",
4070 			 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs);
4071 		if (cur_len == 0) {
4072 			rc = -EIO;
4073 			add_credits_and_wake_if(server, credits, 0);
4074 			break;
4075 		}
4076 
4077 		rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
4078 		if (!rdata) {
4079 			add_credits_and_wake_if(server, credits, 0);
4080 			rc = -ENOMEM;
4081 			break;
4082 		}
4083 
4084 		rdata->server	= server;
4085 		rdata->cfile	= cifsFileInfo_get(open_file);
4086 		rdata->offset	= fpos;
4087 		rdata->bytes	= cur_len;
4088 		rdata->pid	= pid;
4089 		rdata->credits	= credits_on_stack;
4090 		rdata->ctx	= ctx;
4091 		kref_get(&ctx->refcount);
4092 
4093 		rdata->iter	= ctx->iter;
4094 		iov_iter_truncate(&rdata->iter, cur_len);
4095 
4096 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4097 
4098 		if (!rc) {
4099 			if (rdata->cfile->invalidHandle)
4100 				rc = -EAGAIN;
4101 			else
4102 				rc = server->ops->async_readv(rdata);
4103 		}
4104 
4105 		if (rc) {
4106 			add_credits_and_wake_if(server, &rdata->credits, 0);
4107 			kref_put(&rdata->refcount, cifs_readdata_release);
4108 			if (rc == -EAGAIN)
4109 				continue;
4110 			break;
4111 		}
4112 
4113 		list_add_tail(&rdata->list, rdata_list);
4114 		iov_iter_advance(&ctx->iter, cur_len);
4115 		fpos += cur_len;
4116 		len -= cur_len;
4117 	} while (len > 0);
4118 
4119 	return rc;
4120 }
4121 
4122 static void
4123 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4124 {
4125 	struct cifs_readdata *rdata, *tmp;
4126 	struct cifs_sb_info *cifs_sb;
4127 	int rc;
4128 
4129 	cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4130 
4131 	mutex_lock(&ctx->aio_mutex);
4132 
4133 	if (list_empty(&ctx->list)) {
4134 		mutex_unlock(&ctx->aio_mutex);
4135 		return;
4136 	}
4137 
4138 	rc = ctx->rc;
4139 	/* the loop below should proceed in the order of increasing offsets */
4140 again:
4141 	list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4142 		if (!rc) {
4143 			if (!try_wait_for_completion(&rdata->done)) {
4144 				mutex_unlock(&ctx->aio_mutex);
4145 				return;
4146 			}
4147 
4148 			if (rdata->result == -EAGAIN) {
4149 				/* resend call if it's a retryable error */
4150 				struct list_head tmp_list;
4151 				unsigned int got_bytes = rdata->got_bytes;
4152 
4153 				list_del_init(&rdata->list);
4154 				INIT_LIST_HEAD(&tmp_list);
4155 
4156 				if (ctx->direct_io) {
4157 					/*
4158 					 * Re-use rdata as this is a
4159 					 * direct I/O
4160 					 */
4161 					rc = cifs_resend_rdata(
4162 						rdata,
4163 						&tmp_list, ctx);
4164 				} else {
4165 					rc = cifs_send_async_read(
4166 						rdata->offset + got_bytes,
4167 						rdata->bytes - got_bytes,
4168 						rdata->cfile, cifs_sb,
4169 						&tmp_list, ctx);
4170 
4171 					kref_put(&rdata->refcount,
4172 						cifs_readdata_release);
4173 				}
4174 
4175 				list_splice(&tmp_list, &ctx->list);
4176 
4177 				goto again;
4178 			} else if (rdata->result)
4179 				rc = rdata->result;
4180 
4181 			/* if there was a short read -- discard anything left */
4182 			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4183 				rc = -ENODATA;
4184 
4185 			ctx->total_len += rdata->got_bytes;
4186 		}
4187 		list_del_init(&rdata->list);
4188 		kref_put(&rdata->refcount, cifs_readdata_release);
4189 	}
4190 
4191 	/* mask nodata case */
4192 	if (rc == -ENODATA)
4193 		rc = 0;
4194 
4195 	ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4196 
4197 	mutex_unlock(&ctx->aio_mutex);
4198 
4199 	if (ctx->iocb && ctx->iocb->ki_complete)
4200 		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4201 	else
4202 		complete(&ctx->done);
4203 }
4204 
4205 static ssize_t __cifs_readv(
4206 	struct kiocb *iocb, struct iov_iter *to, bool direct)
4207 {
4208 	size_t len;
4209 	struct file *file = iocb->ki_filp;
4210 	struct cifs_sb_info *cifs_sb;
4211 	struct cifsFileInfo *cfile;
4212 	struct cifs_tcon *tcon;
4213 	ssize_t rc, total_read = 0;
4214 	loff_t offset = iocb->ki_pos;
4215 	struct cifs_aio_ctx *ctx;
4216 
4217 	len = iov_iter_count(to);
4218 	if (!len)
4219 		return 0;
4220 
4221 	cifs_sb = CIFS_FILE_SB(file);
4222 	cfile = file->private_data;
4223 	tcon = tlink_tcon(cfile->tlink);
4224 
4225 	if (!tcon->ses->server->ops->async_readv)
4226 		return -ENOSYS;
4227 
4228 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4229 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4230 
4231 	ctx = cifs_aio_ctx_alloc();
4232 	if (!ctx)
4233 		return -ENOMEM;
4234 
4235 	ctx->pos	= offset;
4236 	ctx->direct_io	= direct;
4237 	ctx->len	= len;
4238 	ctx->cfile	= cifsFileInfo_get(cfile);
4239 	ctx->nr_pinned_pages = 0;
4240 
4241 	if (!is_sync_kiocb(iocb))
4242 		ctx->iocb = iocb;
4243 
4244 	if (user_backed_iter(to)) {
4245 		/*
4246 		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
4247 		 * they contain references to the calling process's virtual
4248 		 * memory layout which won't be available in an async worker
4249 		 * thread.  This also takes a pin on every folio involved.
4250 		 */
4251 		rc = netfs_extract_user_iter(to, iov_iter_count(to),
4252 					     &ctx->iter, 0);
4253 		if (rc < 0) {
4254 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4255 			return rc;
4256 		}
4257 
4258 		ctx->nr_pinned_pages = rc;
4259 		ctx->bv = (void *)ctx->iter.bvec;
4260 		ctx->bv_need_unpin = iov_iter_extract_will_pin(to);
4261 		ctx->should_dirty = true;
4262 	} else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
4263 		   !is_sync_kiocb(iocb)) {
4264 		/*
4265 		 * If the op is asynchronous, we need to copy the list attached
4266 		 * to a BVEC/KVEC-type iterator, but we assume that the storage
4267 		 * will be retained by the caller; in any case, we may or may
4268 		 * not be able to pin the pages, so we don't try.
4269 		 */
4270 		ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL);
4271 		if (!ctx->bv) {
4272 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4273 			return -ENOMEM;
4274 		}
4275 	} else {
4276 		/*
4277 		 * Otherwise, we just pass the iterator down as-is and rely on
4278 		 * the caller to make sure the pages referred to by the
4279 		 * iterator don't evaporate.
4280 		 */
4281 		ctx->iter = *to;
4282 	}
4283 
4284 	if (direct) {
4285 		rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4286 						  offset, offset + len - 1);
4287 		if (rc) {
4288 			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4289 			return -EAGAIN;
4290 		}
4291 	}
4292 
4293 	/* grab a lock here due to read response handlers can access ctx */
4294 	mutex_lock(&ctx->aio_mutex);
4295 
4296 	rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4297 
4298 	/* if at least one read request send succeeded, then reset rc */
4299 	if (!list_empty(&ctx->list))
4300 		rc = 0;
4301 
4302 	mutex_unlock(&ctx->aio_mutex);
4303 
4304 	if (rc) {
4305 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4306 		return rc;
4307 	}
4308 
4309 	if (!is_sync_kiocb(iocb)) {
4310 		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4311 		return -EIOCBQUEUED;
4312 	}
4313 
4314 	rc = wait_for_completion_killable(&ctx->done);
4315 	if (rc) {
4316 		mutex_lock(&ctx->aio_mutex);
4317 		ctx->rc = rc = -EINTR;
4318 		total_read = ctx->total_len;
4319 		mutex_unlock(&ctx->aio_mutex);
4320 	} else {
4321 		rc = ctx->rc;
4322 		total_read = ctx->total_len;
4323 	}
4324 
4325 	kref_put(&ctx->refcount, cifs_aio_ctx_release);
4326 
4327 	if (total_read) {
4328 		iocb->ki_pos += total_read;
4329 		return total_read;
4330 	}
4331 	return rc;
4332 }
4333 
4334 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4335 {
4336 	return __cifs_readv(iocb, to, true);
4337 }
4338 
4339 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4340 {
4341 	return __cifs_readv(iocb, to, false);
4342 }
4343 
4344 ssize_t
4345 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4346 {
4347 	struct inode *inode = file_inode(iocb->ki_filp);
4348 	struct cifsInodeInfo *cinode = CIFS_I(inode);
4349 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4350 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4351 						iocb->ki_filp->private_data;
4352 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4353 	int rc = -EACCES;
4354 
4355 	/*
4356 	 * In strict cache mode we need to read from the server all the time
4357 	 * if we don't have level II oplock because the server can delay mtime
4358 	 * change - so we can't make a decision about inode invalidating.
4359 	 * And we can also fail with pagereading if there are mandatory locks
4360 	 * on pages affected by this read but not on the region from pos to
4361 	 * pos+len-1.
4362 	 */
4363 	if (!CIFS_CACHE_READ(cinode))
4364 		return cifs_user_readv(iocb, to);
4365 
4366 	if (cap_unix(tcon->ses) &&
4367 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4368 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4369 		return generic_file_read_iter(iocb, to);
4370 
4371 	/*
4372 	 * We need to hold the sem to be sure nobody modifies lock list
4373 	 * with a brlock that prevents reading.
4374 	 */
4375 	down_read(&cinode->lock_sem);
4376 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4377 				     tcon->ses->server->vals->shared_lock_type,
4378 				     0, NULL, CIFS_READ_OP))
4379 		rc = generic_file_read_iter(iocb, to);
4380 	up_read(&cinode->lock_sem);
4381 	return rc;
4382 }
4383 
4384 static ssize_t
4385 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4386 {
4387 	int rc = -EACCES;
4388 	unsigned int bytes_read = 0;
4389 	unsigned int total_read;
4390 	unsigned int current_read_size;
4391 	unsigned int rsize;
4392 	struct cifs_sb_info *cifs_sb;
4393 	struct cifs_tcon *tcon;
4394 	struct TCP_Server_Info *server;
4395 	unsigned int xid;
4396 	char *cur_offset;
4397 	struct cifsFileInfo *open_file;
4398 	struct cifs_io_parms io_parms = {0};
4399 	int buf_type = CIFS_NO_BUFFER;
4400 	__u32 pid;
4401 
4402 	xid = get_xid();
4403 	cifs_sb = CIFS_FILE_SB(file);
4404 
4405 	/* FIXME: set up handlers for larger reads and/or convert to async */
4406 	rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4407 
4408 	if (file->private_data == NULL) {
4409 		rc = -EBADF;
4410 		free_xid(xid);
4411 		return rc;
4412 	}
4413 	open_file = file->private_data;
4414 	tcon = tlink_tcon(open_file->tlink);
4415 	server = cifs_pick_channel(tcon->ses);
4416 
4417 	if (!server->ops->sync_read) {
4418 		free_xid(xid);
4419 		return -ENOSYS;
4420 	}
4421 
4422 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4423 		pid = open_file->pid;
4424 	else
4425 		pid = current->tgid;
4426 
4427 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4428 		cifs_dbg(FYI, "attempting read on write only file instance\n");
4429 
4430 	for (total_read = 0, cur_offset = read_data; read_size > total_read;
4431 	     total_read += bytes_read, cur_offset += bytes_read) {
4432 		do {
4433 			current_read_size = min_t(uint, read_size - total_read,
4434 						  rsize);
4435 			/*
4436 			 * For windows me and 9x we do not want to request more
4437 			 * than it negotiated since it will refuse the read
4438 			 * then.
4439 			 */
4440 			if (!(tcon->ses->capabilities &
4441 				tcon->ses->server->vals->cap_large_files)) {
4442 				current_read_size = min_t(uint,
4443 					current_read_size, CIFSMaxBufSize);
4444 			}
4445 			if (open_file->invalidHandle) {
4446 				rc = cifs_reopen_file(open_file, true);
4447 				if (rc != 0)
4448 					break;
4449 			}
4450 			io_parms.pid = pid;
4451 			io_parms.tcon = tcon;
4452 			io_parms.offset = *offset;
4453 			io_parms.length = current_read_size;
4454 			io_parms.server = server;
4455 			rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4456 						    &bytes_read, &cur_offset,
4457 						    &buf_type);
4458 		} while (rc == -EAGAIN);
4459 
4460 		if (rc || (bytes_read == 0)) {
4461 			if (total_read) {
4462 				break;
4463 			} else {
4464 				free_xid(xid);
4465 				return rc;
4466 			}
4467 		} else {
4468 			cifs_stats_bytes_read(tcon, total_read);
4469 			*offset += bytes_read;
4470 		}
4471 	}
4472 	free_xid(xid);
4473 	return total_read;
4474 }
4475 
4476 /*
4477  * If the page is mmap'ed into a process' page tables, then we need to make
4478  * sure that it doesn't change while being written back.
4479  */
4480 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4481 {
4482 	struct folio *folio = page_folio(vmf->page);
4483 
4484 	/* Wait for the folio to be written to the cache before we allow it to
4485 	 * be modified.  We then assume the entire folio will need writing back.
4486 	 */
4487 #ifdef CONFIG_CIFS_FSCACHE
4488 	if (folio_test_fscache(folio) &&
4489 	    folio_wait_fscache_killable(folio) < 0)
4490 		return VM_FAULT_RETRY;
4491 #endif
4492 
4493 	folio_wait_writeback(folio);
4494 
4495 	if (folio_lock_killable(folio) < 0)
4496 		return VM_FAULT_RETRY;
4497 	return VM_FAULT_LOCKED;
4498 }
4499 
4500 static const struct vm_operations_struct cifs_file_vm_ops = {
4501 	.fault = filemap_fault,
4502 	.map_pages = filemap_map_pages,
4503 	.page_mkwrite = cifs_page_mkwrite,
4504 };
4505 
4506 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4507 {
4508 	int xid, rc = 0;
4509 	struct inode *inode = file_inode(file);
4510 
4511 	xid = get_xid();
4512 
4513 	if (!CIFS_CACHE_READ(CIFS_I(inode)))
4514 		rc = cifs_zap_mapping(inode);
4515 	if (!rc)
4516 		rc = generic_file_mmap(file, vma);
4517 	if (!rc)
4518 		vma->vm_ops = &cifs_file_vm_ops;
4519 
4520 	free_xid(xid);
4521 	return rc;
4522 }
4523 
4524 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4525 {
4526 	int rc, xid;
4527 
4528 	xid = get_xid();
4529 
4530 	rc = cifs_revalidate_file(file);
4531 	if (rc)
4532 		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4533 			 rc);
4534 	if (!rc)
4535 		rc = generic_file_mmap(file, vma);
4536 	if (!rc)
4537 		vma->vm_ops = &cifs_file_vm_ops;
4538 
4539 	free_xid(xid);
4540 	return rc;
4541 }
4542 
4543 /*
4544  * Unlock a bunch of folios in the pagecache.
4545  */
4546 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
4547 {
4548 	struct folio *folio;
4549 	XA_STATE(xas, &mapping->i_pages, first);
4550 
4551 	rcu_read_lock();
4552 	xas_for_each(&xas, folio, last) {
4553 		folio_unlock(folio);
4554 	}
4555 	rcu_read_unlock();
4556 }
4557 
4558 static void cifs_readahead_complete(struct work_struct *work)
4559 {
4560 	struct cifs_readdata *rdata = container_of(work,
4561 						   struct cifs_readdata, work);
4562 	struct folio *folio;
4563 	pgoff_t last;
4564 	bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
4565 
4566 	XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
4567 
4568 	if (good)
4569 		cifs_readahead_to_fscache(rdata->mapping->host,
4570 					  rdata->offset, rdata->bytes);
4571 
4572 	if (iov_iter_count(&rdata->iter) > 0)
4573 		iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
4574 
4575 	last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE;
4576 
4577 	rcu_read_lock();
4578 	xas_for_each(&xas, folio, last) {
4579 		if (good) {
4580 			flush_dcache_folio(folio);
4581 			folio_mark_uptodate(folio);
4582 		}
4583 		folio_unlock(folio);
4584 	}
4585 	rcu_read_unlock();
4586 
4587 	kref_put(&rdata->refcount, cifs_readdata_release);
4588 }
4589 
4590 static void cifs_readahead(struct readahead_control *ractl)
4591 {
4592 	struct cifsFileInfo *open_file = ractl->file->private_data;
4593 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4594 	struct TCP_Server_Info *server;
4595 	unsigned int xid, nr_pages, cache_nr_pages = 0;
4596 	unsigned int ra_pages;
4597 	pgoff_t next_cached = ULONG_MAX, ra_index;
4598 	bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4599 		cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4600 	bool check_cache = caching;
4601 	pid_t pid;
4602 	int rc = 0;
4603 
4604 	/* Note that readahead_count() lags behind our dequeuing of pages from
4605 	 * the ractl, wo we have to keep track for ourselves.
4606 	 */
4607 	ra_pages = readahead_count(ractl);
4608 	ra_index = readahead_index(ractl);
4609 
4610 	xid = get_xid();
4611 
4612 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4613 		pid = open_file->pid;
4614 	else
4615 		pid = current->tgid;
4616 
4617 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4618 
4619 	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4620 		 __func__, ractl->file, ractl->mapping, ra_pages);
4621 
4622 	/*
4623 	 * Chop the readahead request up into rsize-sized read requests.
4624 	 */
4625 	while ((nr_pages = ra_pages)) {
4626 		unsigned int i, rsize;
4627 		struct cifs_readdata *rdata;
4628 		struct cifs_credits credits_on_stack;
4629 		struct cifs_credits *credits = &credits_on_stack;
4630 		struct folio *folio;
4631 		pgoff_t fsize;
4632 
4633 		/*
4634 		 * Find out if we have anything cached in the range of
4635 		 * interest, and if so, where the next chunk of cached data is.
4636 		 */
4637 		if (caching) {
4638 			if (check_cache) {
4639 				rc = cifs_fscache_query_occupancy(
4640 					ractl->mapping->host, ra_index, nr_pages,
4641 					&next_cached, &cache_nr_pages);
4642 				if (rc < 0)
4643 					caching = false;
4644 				check_cache = false;
4645 			}
4646 
4647 			if (ra_index == next_cached) {
4648 				/*
4649 				 * TODO: Send a whole batch of pages to be read
4650 				 * by the cache.
4651 				 */
4652 				folio = readahead_folio(ractl);
4653 				fsize = folio_nr_pages(folio);
4654 				ra_pages -= fsize;
4655 				ra_index += fsize;
4656 				if (cifs_readpage_from_fscache(ractl->mapping->host,
4657 							       &folio->page) < 0) {
4658 					/*
4659 					 * TODO: Deal with cache read failure
4660 					 * here, but for the moment, delegate
4661 					 * that to readpage.
4662 					 */
4663 					caching = false;
4664 				}
4665 				folio_unlock(folio);
4666 				next_cached += fsize;
4667 				cache_nr_pages -= fsize;
4668 				if (cache_nr_pages == 0)
4669 					check_cache = true;
4670 				continue;
4671 			}
4672 		}
4673 
4674 		if (open_file->invalidHandle) {
4675 			rc = cifs_reopen_file(open_file, true);
4676 			if (rc) {
4677 				if (rc == -EAGAIN)
4678 					continue;
4679 				break;
4680 			}
4681 		}
4682 
4683 		if (cifs_sb->ctx->rsize == 0)
4684 			cifs_sb->ctx->rsize =
4685 				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4686 							     cifs_sb->ctx);
4687 
4688 		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4689 						   &rsize, credits);
4690 		if (rc)
4691 			break;
4692 		nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages);
4693 		if (next_cached != ULONG_MAX)
4694 			nr_pages = min_t(size_t, nr_pages, next_cached - ra_index);
4695 
4696 		/*
4697 		 * Give up immediately if rsize is too small to read an entire
4698 		 * page. The VFS will fall back to readpage. We should never
4699 		 * reach this point however since we set ra_pages to 0 when the
4700 		 * rsize is smaller than a cache page.
4701 		 */
4702 		if (unlikely(!nr_pages)) {
4703 			add_credits_and_wake_if(server, credits, 0);
4704 			break;
4705 		}
4706 
4707 		rdata = cifs_readdata_alloc(cifs_readahead_complete);
4708 		if (!rdata) {
4709 			/* best to give up if we're out of mem */
4710 			add_credits_and_wake_if(server, credits, 0);
4711 			break;
4712 		}
4713 
4714 		rdata->offset	= ra_index * PAGE_SIZE;
4715 		rdata->bytes	= nr_pages * PAGE_SIZE;
4716 		rdata->cfile	= cifsFileInfo_get(open_file);
4717 		rdata->server	= server;
4718 		rdata->mapping	= ractl->mapping;
4719 		rdata->pid	= pid;
4720 		rdata->credits	= credits_on_stack;
4721 
4722 		for (i = 0; i < nr_pages; i++) {
4723 			if (!readahead_folio(ractl))
4724 				WARN_ON(1);
4725 		}
4726 		ra_pages -= nr_pages;
4727 		ra_index += nr_pages;
4728 
4729 		iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages,
4730 				rdata->offset, rdata->bytes);
4731 
4732 		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4733 		if (!rc) {
4734 			if (rdata->cfile->invalidHandle)
4735 				rc = -EAGAIN;
4736 			else
4737 				rc = server->ops->async_readv(rdata);
4738 		}
4739 
4740 		if (rc) {
4741 			add_credits_and_wake_if(server, &rdata->credits, 0);
4742 			cifs_unlock_folios(rdata->mapping,
4743 					   rdata->offset / PAGE_SIZE,
4744 					   (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
4745 			/* Fallback to the readpage in error/reconnect cases */
4746 			kref_put(&rdata->refcount, cifs_readdata_release);
4747 			break;
4748 		}
4749 
4750 		kref_put(&rdata->refcount, cifs_readdata_release);
4751 	}
4752 
4753 	free_xid(xid);
4754 }
4755 
4756 /*
4757  * cifs_readpage_worker must be called with the page pinned
4758  */
4759 static int cifs_readpage_worker(struct file *file, struct page *page,
4760 	loff_t *poffset)
4761 {
4762 	struct inode *inode = file_inode(file);
4763 	struct timespec64 atime, mtime;
4764 	char *read_data;
4765 	int rc;
4766 
4767 	/* Is the page cached? */
4768 	rc = cifs_readpage_from_fscache(inode, page);
4769 	if (rc == 0)
4770 		goto read_complete;
4771 
4772 	read_data = kmap(page);
4773 	/* for reads over a certain size could initiate async read ahead */
4774 
4775 	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4776 
4777 	if (rc < 0)
4778 		goto io_error;
4779 	else
4780 		cifs_dbg(FYI, "Bytes read %d\n", rc);
4781 
4782 	/* we do not want atime to be less than mtime, it broke some apps */
4783 	atime = inode_set_atime_to_ts(inode, current_time(inode));
4784 	mtime = inode_get_mtime(inode);
4785 	if (timespec64_compare(&atime, &mtime) < 0)
4786 		inode_set_atime_to_ts(inode, inode_get_mtime(inode));
4787 
4788 	if (PAGE_SIZE > rc)
4789 		memset(read_data + rc, 0, PAGE_SIZE - rc);
4790 
4791 	flush_dcache_page(page);
4792 	SetPageUptodate(page);
4793 	rc = 0;
4794 
4795 io_error:
4796 	kunmap(page);
4797 
4798 read_complete:
4799 	unlock_page(page);
4800 	return rc;
4801 }
4802 
4803 static int cifs_read_folio(struct file *file, struct folio *folio)
4804 {
4805 	struct page *page = &folio->page;
4806 	loff_t offset = page_file_offset(page);
4807 	int rc = -EACCES;
4808 	unsigned int xid;
4809 
4810 	xid = get_xid();
4811 
4812 	if (file->private_data == NULL) {
4813 		rc = -EBADF;
4814 		free_xid(xid);
4815 		return rc;
4816 	}
4817 
4818 	cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4819 		 page, (int)offset, (int)offset);
4820 
4821 	rc = cifs_readpage_worker(file, page, &offset);
4822 
4823 	free_xid(xid);
4824 	return rc;
4825 }
4826 
4827 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4828 {
4829 	struct cifsFileInfo *open_file;
4830 
4831 	spin_lock(&cifs_inode->open_file_lock);
4832 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4833 		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4834 			spin_unlock(&cifs_inode->open_file_lock);
4835 			return 1;
4836 		}
4837 	}
4838 	spin_unlock(&cifs_inode->open_file_lock);
4839 	return 0;
4840 }
4841 
4842 /* We do not want to update the file size from server for inodes
4843    open for write - to avoid races with writepage extending
4844    the file - in the future we could consider allowing
4845    refreshing the inode only on increases in the file size
4846    but this is tricky to do without racing with writebehind
4847    page caching in the current Linux kernel design */
4848 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file,
4849 			    bool from_readdir)
4850 {
4851 	if (!cifsInode)
4852 		return true;
4853 
4854 	if (is_inode_writable(cifsInode) ||
4855 		((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) {
4856 		/* This inode is open for write at least once */
4857 		struct cifs_sb_info *cifs_sb;
4858 
4859 		cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4860 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4861 			/* since no page cache to corrupt on directio
4862 			we can change size safely */
4863 			return true;
4864 		}
4865 
4866 		if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4867 			return true;
4868 
4869 		return false;
4870 	} else
4871 		return true;
4872 }
4873 
4874 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4875 			loff_t pos, unsigned len,
4876 			struct page **pagep, void **fsdata)
4877 {
4878 	int oncethru = 0;
4879 	pgoff_t index = pos >> PAGE_SHIFT;
4880 	loff_t offset = pos & (PAGE_SIZE - 1);
4881 	loff_t page_start = pos & PAGE_MASK;
4882 	loff_t i_size;
4883 	struct page *page;
4884 	int rc = 0;
4885 
4886 	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4887 
4888 start:
4889 	page = grab_cache_page_write_begin(mapping, index);
4890 	if (!page) {
4891 		rc = -ENOMEM;
4892 		goto out;
4893 	}
4894 
4895 	if (PageUptodate(page))
4896 		goto out;
4897 
4898 	/*
4899 	 * If we write a full page it will be up to date, no need to read from
4900 	 * the server. If the write is short, we'll end up doing a sync write
4901 	 * instead.
4902 	 */
4903 	if (len == PAGE_SIZE)
4904 		goto out;
4905 
4906 	/*
4907 	 * optimize away the read when we have an oplock, and we're not
4908 	 * expecting to use any of the data we'd be reading in. That
4909 	 * is, when the page lies beyond the EOF, or straddles the EOF
4910 	 * and the write will cover all of the existing data.
4911 	 */
4912 	if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4913 		i_size = i_size_read(mapping->host);
4914 		if (page_start >= i_size ||
4915 		    (offset == 0 && (pos + len) >= i_size)) {
4916 			zero_user_segments(page, 0, offset,
4917 					   offset + len,
4918 					   PAGE_SIZE);
4919 			/*
4920 			 * PageChecked means that the parts of the page
4921 			 * to which we're not writing are considered up
4922 			 * to date. Once the data is copied to the
4923 			 * page, it can be set uptodate.
4924 			 */
4925 			SetPageChecked(page);
4926 			goto out;
4927 		}
4928 	}
4929 
4930 	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4931 		/*
4932 		 * might as well read a page, it is fast enough. If we get
4933 		 * an error, we don't need to return it. cifs_write_end will
4934 		 * do a sync write instead since PG_uptodate isn't set.
4935 		 */
4936 		cifs_readpage_worker(file, page, &page_start);
4937 		put_page(page);
4938 		oncethru = 1;
4939 		goto start;
4940 	} else {
4941 		/* we could try using another file handle if there is one -
4942 		   but how would we lock it to prevent close of that handle
4943 		   racing with this read? In any case
4944 		   this will be written out by write_end so is fine */
4945 	}
4946 out:
4947 	*pagep = page;
4948 	return rc;
4949 }
4950 
4951 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4952 {
4953 	if (folio_test_private(folio))
4954 		return 0;
4955 	if (folio_test_fscache(folio)) {
4956 		if (current_is_kswapd() || !(gfp & __GFP_FS))
4957 			return false;
4958 		folio_wait_fscache(folio);
4959 	}
4960 	fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4961 	return true;
4962 }
4963 
4964 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4965 				 size_t length)
4966 {
4967 	folio_wait_fscache(folio);
4968 }
4969 
4970 static int cifs_launder_folio(struct folio *folio)
4971 {
4972 	int rc = 0;
4973 	loff_t range_start = folio_pos(folio);
4974 	loff_t range_end = range_start + folio_size(folio);
4975 	struct writeback_control wbc = {
4976 		.sync_mode = WB_SYNC_ALL,
4977 		.nr_to_write = 0,
4978 		.range_start = range_start,
4979 		.range_end = range_end,
4980 	};
4981 
4982 	cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
4983 
4984 	if (folio_clear_dirty_for_io(folio))
4985 		rc = cifs_writepage_locked(&folio->page, &wbc);
4986 
4987 	folio_wait_fscache(folio);
4988 	return rc;
4989 }
4990 
4991 void cifs_oplock_break(struct work_struct *work)
4992 {
4993 	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4994 						  oplock_break);
4995 	struct inode *inode = d_inode(cfile->dentry);
4996 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4997 	struct cifsInodeInfo *cinode = CIFS_I(inode);
4998 	struct cifs_tcon *tcon;
4999 	struct TCP_Server_Info *server;
5000 	struct tcon_link *tlink;
5001 	int rc = 0;
5002 	bool purge_cache = false, oplock_break_cancelled;
5003 	__u64 persistent_fid, volatile_fid;
5004 	__u16 net_fid;
5005 
5006 	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
5007 			TASK_UNINTERRUPTIBLE);
5008 
5009 	tlink = cifs_sb_tlink(cifs_sb);
5010 	if (IS_ERR(tlink))
5011 		goto out;
5012 	tcon = tlink_tcon(tlink);
5013 	server = tcon->ses->server;
5014 
5015 	server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
5016 				      cfile->oplock_epoch, &purge_cache);
5017 
5018 	if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
5019 						cifs_has_mand_locks(cinode)) {
5020 		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
5021 			 inode);
5022 		cinode->oplock = 0;
5023 	}
5024 
5025 	if (inode && S_ISREG(inode->i_mode)) {
5026 		if (CIFS_CACHE_READ(cinode))
5027 			break_lease(inode, O_RDONLY);
5028 		else
5029 			break_lease(inode, O_WRONLY);
5030 		rc = filemap_fdatawrite(inode->i_mapping);
5031 		if (!CIFS_CACHE_READ(cinode) || purge_cache) {
5032 			rc = filemap_fdatawait(inode->i_mapping);
5033 			mapping_set_error(inode->i_mapping, rc);
5034 			cifs_zap_mapping(inode);
5035 		}
5036 		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
5037 		if (CIFS_CACHE_WRITE(cinode))
5038 			goto oplock_break_ack;
5039 	}
5040 
5041 	rc = cifs_push_locks(cfile);
5042 	if (rc)
5043 		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
5044 
5045 oplock_break_ack:
5046 	/*
5047 	 * When oplock break is received and there are no active
5048 	 * file handles but cached, then schedule deferred close immediately.
5049 	 * So, new open will not use cached handle.
5050 	 */
5051 
5052 	if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
5053 		cifs_close_deferred_file(cinode);
5054 
5055 	persistent_fid = cfile->fid.persistent_fid;
5056 	volatile_fid = cfile->fid.volatile_fid;
5057 	net_fid = cfile->fid.netfid;
5058 	oplock_break_cancelled = cfile->oplock_break_cancelled;
5059 
5060 	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
5061 	/*
5062 	 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
5063 	 * an acknowledgment to be sent when the file has already been closed.
5064 	 */
5065 	spin_lock(&cinode->open_file_lock);
5066 	/* check list empty since can race with kill_sb calling tree disconnect */
5067 	if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
5068 		spin_unlock(&cinode->open_file_lock);
5069 		rc = server->ops->oplock_response(tcon, persistent_fid,
5070 						  volatile_fid, net_fid, cinode);
5071 		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
5072 	} else
5073 		spin_unlock(&cinode->open_file_lock);
5074 
5075 	cifs_put_tlink(tlink);
5076 out:
5077 	cifs_done_oplock_break(cinode);
5078 }
5079 
5080 /*
5081  * The presence of cifs_direct_io() in the address space ops vector
5082  * allowes open() O_DIRECT flags which would have failed otherwise.
5083  *
5084  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5085  * so this method should never be called.
5086  *
5087  * Direct IO is not yet supported in the cached mode.
5088  */
5089 static ssize_t
5090 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5091 {
5092         /*
5093          * FIXME
5094          * Eventually need to support direct IO for non forcedirectio mounts
5095          */
5096         return -EINVAL;
5097 }
5098 
5099 static int cifs_swap_activate(struct swap_info_struct *sis,
5100 			      struct file *swap_file, sector_t *span)
5101 {
5102 	struct cifsFileInfo *cfile = swap_file->private_data;
5103 	struct inode *inode = swap_file->f_mapping->host;
5104 	unsigned long blocks;
5105 	long long isize;
5106 
5107 	cifs_dbg(FYI, "swap activate\n");
5108 
5109 	if (!swap_file->f_mapping->a_ops->swap_rw)
5110 		/* Cannot support swap */
5111 		return -EINVAL;
5112 
5113 	spin_lock(&inode->i_lock);
5114 	blocks = inode->i_blocks;
5115 	isize = inode->i_size;
5116 	spin_unlock(&inode->i_lock);
5117 	if (blocks*512 < isize) {
5118 		pr_warn("swap activate: swapfile has holes\n");
5119 		return -EINVAL;
5120 	}
5121 	*span = sis->pages;
5122 
5123 	pr_warn_once("Swap support over SMB3 is experimental\n");
5124 
5125 	/*
5126 	 * TODO: consider adding ACL (or documenting how) to prevent other
5127 	 * users (on this or other systems) from reading it
5128 	 */
5129 
5130 
5131 	/* TODO: add sk_set_memalloc(inet) or similar */
5132 
5133 	if (cfile)
5134 		cfile->swapfile = true;
5135 	/*
5136 	 * TODO: Since file already open, we can't open with DENY_ALL here
5137 	 * but we could add call to grab a byte range lock to prevent others
5138 	 * from reading or writing the file
5139 	 */
5140 
5141 	sis->flags |= SWP_FS_OPS;
5142 	return add_swap_extent(sis, 0, sis->max, 0);
5143 }
5144 
5145 static void cifs_swap_deactivate(struct file *file)
5146 {
5147 	struct cifsFileInfo *cfile = file->private_data;
5148 
5149 	cifs_dbg(FYI, "swap deactivate\n");
5150 
5151 	/* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5152 
5153 	if (cfile)
5154 		cfile->swapfile = false;
5155 
5156 	/* do we need to unpin (or unlock) the file */
5157 }
5158 
5159 /*
5160  * Mark a page as having been made dirty and thus needing writeback.  We also
5161  * need to pin the cache object to write back to.
5162  */
5163 #ifdef CONFIG_CIFS_FSCACHE
5164 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
5165 {
5166 	return fscache_dirty_folio(mapping, folio,
5167 					cifs_inode_cookie(mapping->host));
5168 }
5169 #else
5170 #define cifs_dirty_folio filemap_dirty_folio
5171 #endif
5172 
5173 const struct address_space_operations cifs_addr_ops = {
5174 	.read_folio = cifs_read_folio,
5175 	.readahead = cifs_readahead,
5176 	.writepages = cifs_writepages,
5177 	.write_begin = cifs_write_begin,
5178 	.write_end = cifs_write_end,
5179 	.dirty_folio = cifs_dirty_folio,
5180 	.release_folio = cifs_release_folio,
5181 	.direct_IO = cifs_direct_io,
5182 	.invalidate_folio = cifs_invalidate_folio,
5183 	.launder_folio = cifs_launder_folio,
5184 	.migrate_folio = filemap_migrate_folio,
5185 	/*
5186 	 * TODO: investigate and if useful we could add an is_dirty_writeback
5187 	 * helper if needed
5188 	 */
5189 	.swap_activate = cifs_swap_activate,
5190 	.swap_deactivate = cifs_swap_deactivate,
5191 };
5192 
5193 /*
5194  * cifs_readahead requires the server to support a buffer large enough to
5195  * contain the header plus one complete page of data.  Otherwise, we need
5196  * to leave cifs_readahead out of the address space operations.
5197  */
5198 const struct address_space_operations cifs_addr_ops_smallbuf = {
5199 	.read_folio = cifs_read_folio,
5200 	.writepages = cifs_writepages,
5201 	.write_begin = cifs_write_begin,
5202 	.write_end = cifs_write_end,
5203 	.dirty_folio = cifs_dirty_folio,
5204 	.release_folio = cifs_release_folio,
5205 	.invalidate_folio = cifs_invalidate_folio,
5206 	.launder_folio = cifs_launder_folio,
5207 	.migrate_folio = filemap_migrate_folio,
5208 };
5209